def get_args() -> argparse.Namespace:
"""Parse command-line arguments for evaluation."""
parser = argparse.ArgumentParser(description="Evaluate model on test audio (file-level pooling).")
parser.add_argument("--model_path", type=str, required=True, help="Path to .keras or .tflite model")
parser.add_argument("--model_config", type=str, default="", help="Path to model config JSON")
parser.add_argument("--data_path_test", type=str, required=True, help="Path to test dataset root")
parser.add_argument("--max_files", type=int, default=-1, help="Max test files per class")
parser.add_argument("--batch_size", type=int, default=16, help="Batch size for chunk inference")
parser.add_argument("--overlap", type=float, default=0.0, help="Chunk overlap (seconds)")
parser.add_argument("--pooling", type=str, default="avg", choices=["avg", "max", "lme"])
parser.add_argument("--save_csv", type=str, default="", help="Optional path to save predictions CSV")
parser.add_argument("--confusion_matrix", action="store_true", default=False, help="Print confusion matrix")
parser.add_argument("--save_cm_plot", type=str, default="", help="Save confusion matrix plot to file")
parser.add_argument(
"--optimize_thresholds", action="store_true", default=False, help="Find per-class optimal F1 thresholds"
)
parser.add_argument("--benchmark", type=str, default="", help="Save structured JSON benchmark report to this path")
parser.add_argument(
"--benchmark_latency",
action="store_true",
default=False,
help="Measure per-chunk inference latency (mean, median, p95, p99)",
)
parser.add_argument(
"--species_report",
type=str,
default="",
help="Save per-species AP report with 95%% bootstrap CI to this CSV path",
)
parser.add_argument(
"--n_bootstrap",
type=int,
default=1000,
help="Number of bootstrap resamples for species AP confidence intervals (default: 1000)",
)
parser.add_argument("--det_curve", action="store_true", default=False, help="Print ASCII DET curve")
parser.add_argument("--save_det_plot", type=str, default="", help="Save DET curve plot to file")
parser.add_argument(
"--report_html",
type=str,
default="",
help="Generate a self-contained HTML evaluation report",
)
return parser.parse_args()