birdnet package

Subpackages

Submodules

birdnet.argparse_helper module

birdnet.backends module

birdnet.base module

birdnet.benchmark_script module

birdnet.globals module

birdnet.helper module

birdnet.local_data module

birdnet.logging_utils module

birdnet.model_loader module

Module for loading models. Provides functions to load official and custom models.

birdnet.model_loader.load(model_type, version, backend, /, *, precision='fp32', lang='en_us', **model_kwargs)
Return type:

ModelBase

birdnet.model_loader.load_custom(model_type, version, backend, model, species_list, /, *, precision='fp32', check_validity=True, **model_kwargs)
Return type:

ModelBase

birdnet.model_loader.load_perch_v2(device)
Return type:

AcousticModelPerchV2

birdnet.shm module

birdnet.utils module

Module contents

class birdnet.AcousticDataEncodingResult(tensor, input_durations, segment_duration_s, overlap_duration_s, speed, model_path, model_fmin, model_fmax, model_sr, model_precision, model_version)

Bases: AcousticEncodingResultBase

Attributes:
emb_dim

Return the embedding dimensionality.

embeddings

Return the raw embedding tensor produced by the encoder.

embeddings_masked

Return the mask that marks relevant segments across files.

hop_duration_s
input_durations

Durations of each input in seconds.

inputs

Identifiers for each input processed by the result.

max_n_segments

Return the maximum segment count reserved per input.

memory_size_MiB

Return the total result memory usage including embeddings buffers.

model_fmax

Upper bound of the model’s bandpass filter.

model_fmin

Lower bound of the model’s bandpass filter.

model_path
model_precision
model_sr

Sampling rate expected by the model.

model_version
n_inputs

Number of inputs in the result payload.

overlap_duration_s

Overlap duration between sliding windows in seconds.

segment_duration_s

Segment duration as configured on the inference pipeline.

speed

Speed multiplier that was applied to the inputs.

Methods

to_arrow_table()

Produce a PyArrow table that serializes each embedding with timing metadata.

to_csv(path, *[, encoding, buffer_size_kb, ...])

Dump the structured embeddings to a CSV file for downstream analysis.

to_dataframe()

Convert the structured array into a pandas DataFrame.

to_parquet(path, *[, compression, ...])

Write the contents to disk as an Arrow Parquet file.

to_structured_array()

Convert the embeddings and timing metadata into a structured array.

unprocessable_inputs()

Return the indices of inputs that could not be processed.

load

save

class birdnet.AcousticDataPredictionResult(tensor, species_list, input_durations, segment_duration_s, overlap_duration_s, speed, model_path, model_fmin, model_fmax, model_sr, model_precision, model_version)

Bases: AcousticPredictionResultBase

Attributes:
hop_duration_s
input_durations

Durations of each input in seconds.

inputs

Identifiers for each input processed by the result.

max_n_segments
memory_size_MiB

Memory usage for the base result metadata.

model_fmax

Upper bound of the model’s bandpass filter.

model_fmin

Lower bound of the model’s bandpass filter.

model_path
model_precision
model_sr

Sampling rate expected by the model.

model_version
n_inputs

Number of inputs in the result payload.

n_species
overlap_duration_s

Overlap duration between sliding windows in seconds.

segment_duration_s

Segment duration as configured on the inference pipeline.

species_ids
species_list
species_masked
species_probs
speed

Speed multiplier that was applied to the inputs.

top_k
unprocessable_inputs

Methods

to_dataframe()

Convert the structured array into a pandas DataFrame.

to_parquet(path, *[, compression, ...])

Write the contents to disk as an Arrow Parquet file.

load

save

to_arrow_table

to_csv

to_structured_array

class birdnet.AcousticEncodingResultBase(inputs, input_durations, model_path, model_fmin, model_fmax, model_sr, model_precision, model_version, segment_duration_s, overlap_duration_s, speed, tensor)

Bases: AcousticResultBase

Attributes:
emb_dim

Return the embedding dimensionality.

embeddings

Return the raw embedding tensor produced by the encoder.

embeddings_masked

Return the mask that marks relevant segments across files.

hop_duration_s
input_durations

Durations of each input in seconds.

inputs

Identifiers for each input processed by the result.

max_n_segments

Return the maximum segment count reserved per input.

memory_size_MiB

Return the total result memory usage including embeddings buffers.

model_fmax

Upper bound of the model’s bandpass filter.

model_fmin

Lower bound of the model’s bandpass filter.

model_path
model_precision
model_sr

Sampling rate expected by the model.

model_version
n_inputs

Number of inputs in the result payload.

overlap_duration_s

Overlap duration between sliding windows in seconds.

segment_duration_s

Segment duration as configured on the inference pipeline.

speed

Speed multiplier that was applied to the inputs.

Methods

to_arrow_table()

Produce a PyArrow table that serializes each embedding with timing metadata.

to_csv(path, *[, encoding, buffer_size_kb, ...])

Dump the structured embeddings to a CSV file for downstream analysis.

to_dataframe()

Convert the structured array into a pandas DataFrame.

to_parquet(path, *[, compression, ...])

Write the contents to disk as an Arrow Parquet file.

to_structured_array()

Convert the embeddings and timing metadata into a structured array.

unprocessable_inputs()

Return the indices of inputs that could not be processed.

load

save

property emb_dim: int

Return the embedding dimensionality.

Returns:

int: Number of coefficients per embedding vector.

property embeddings: ndarray

Return the raw embedding tensor produced by the encoder.

Returns:

np.ndarray: Embeddings with shape (n_inputs, n_segments, emb_dim).

property embeddings_masked: ndarray

Return the mask that marks relevant segments across files.

Returns:

np.ndarray: Boolean mask of the same shape as embeddings.

property max_n_segments: int

Return the maximum segment count reserved per input.

Returns:

int: Number of overlapping windows available per file.

property memory_size_MiB: float

Return the total result memory usage including embeddings buffers.

Returns:

float: Memory size in mebibytes.

to_arrow_table()

Produce a PyArrow table that serializes each embedding with timing metadata.

Return type:

Table

Returns:

pa.Table: Table containing dictionary-encoded inputs and embeddings lists.

to_csv(path, *, encoding='utf-8', buffer_size_kb=1024, silent=False)

Dump the structured embeddings to a CSV file for downstream analysis.

Return type:

None

Args:

path: File path where the CSV will be written (must end with .csv). encoding: Text encoding for the output file. buffer_size_kb: Buffer size used when writing the file. silent: Suppress progress messages when True.

to_structured_array()

Convert the embeddings and timing metadata into a structured array.

Return type:

ndarray

Returns:

np.ndarray: Array with fields for input path, start/end times, and embedding.

unprocessable_inputs()

Return the indices of inputs that could not be processed.

Return type:

ndarray

Returns:

np.ndarray: Boolean mask or indices for skipped inputs.

class birdnet.AcousticEncodingSession(species_list, model_path, model_segment_size_s, model_sample_rate, model_is_custom, model_sig_fmin, model_sig_fmax, model_version, model_backend_type, model_backend_custom_kwargs, model_emb_dim, *, n_producers, n_workers, batch_size, prefetch_ratio, overlap_duration_s, speed, bandpass_fmin, bandpass_fmax, half_precision, max_audio_duration_min, show_stats, progress_callback, device, max_n_files)

Bases: AcousticSessionBase

Methods

cancel

end

run

run_arrays

run(inputs)
Return type:

AcousticFileEncodingResult

run_arrays(inputs)
Return type:

AcousticDataEncodingResult

class birdnet.AcousticFileEncodingResult(tensor, files, file_durations, segment_duration_s, overlap_duration_s, speed, model_path, model_fmin, model_fmax, model_sr, model_precision, model_version)

Bases: AcousticEncodingResultBase

Attributes:
emb_dim

Return the embedding dimensionality.

embeddings

Return the raw embedding tensor produced by the encoder.

embeddings_masked

Return the mask that marks relevant segments across files.

hop_duration_s
input_durations

Durations of each input in seconds.

inputs

Identifiers for each input processed by the result.

max_n_segments

Return the maximum segment count reserved per input.

memory_size_MiB

Return the total result memory usage including embeddings buffers.

model_fmax

Upper bound of the model’s bandpass filter.

model_fmin

Lower bound of the model’s bandpass filter.

model_path
model_precision
model_sr

Sampling rate expected by the model.

model_version
n_inputs

Number of inputs in the result payload.

overlap_duration_s

Overlap duration between sliding windows in seconds.

segment_duration_s

Segment duration as configured on the inference pipeline.

speed

Speed multiplier that was applied to the inputs.

Methods

to_arrow_table()

Produce a PyArrow table that serializes each embedding with timing metadata.

to_csv(path, *[, encoding, buffer_size_kb, ...])

Dump the structured embeddings to a CSV file for downstream analysis.

to_dataframe()

Convert the structured array into a pandas DataFrame.

to_parquet(path, *[, compression, ...])

Write the contents to disk as an Arrow Parquet file.

to_structured_array()

Convert the embeddings and timing metadata into a structured array.

unprocessable_inputs()

Return the indices of inputs that could not be processed.

load

save

class birdnet.AcousticFilePredictionResult(tensor, files, species_list, file_durations, segment_duration_s, overlap_duration_s, speed, model_path, model_fmin, model_fmax, model_sr, model_precision, model_version)

Bases: AcousticPredictionResultBase

Attributes:
hop_duration_s
input_durations

Durations of each input in seconds.

inputs

Identifiers for each input processed by the result.

max_n_segments
memory_size_MiB

Memory usage for the base result metadata.

model_fmax

Upper bound of the model’s bandpass filter.

model_fmin

Lower bound of the model’s bandpass filter.

model_path
model_precision
model_sr

Sampling rate expected by the model.

model_version
n_inputs

Number of inputs in the result payload.

n_species
overlap_duration_s

Overlap duration between sliding windows in seconds.

segment_duration_s

Segment duration as configured on the inference pipeline.

species_ids
species_list
species_masked
species_probs
speed

Speed multiplier that was applied to the inputs.

top_k
unprocessable_inputs

Methods

to_dataframe()

Convert the structured array into a pandas DataFrame.

to_parquet(path, *[, compression, ...])

Write the contents to disk as an Arrow Parquet file.

get_unprocessed_files

load

save

to_arrow_table

to_csv

to_structured_array

get_unprocessed_files()
Return type:

set[Path]

class birdnet.AcousticModelPerchV2(model_path, species_list, is_custom_model, backend_type, backend_kwargs)

Bases: AcousticModelBase

Attributes:
backend_kwargs
backend_type
is_custom_model
model_path
n_species
species_list

Methods

encode(inp, /, *[, n_producers, n_workers, ...])

Run encoding with the Perch V2 model on files or paths to obtain embeddings.

encode_arrays(inp, /, *[, n_producers, ...])

Run encoding with the Perch V2 model directly on in-memory audio arrays.

encode_session(*[, n_producers, n_workers, ...])

Create an encoding session with explicit resource configuration.

get_version()

Return the string label that identifies the acoustic model version.

predict(inp, /, *[, top_k, n_producers, ...])

Run prediction with the Perch V2 model on files or paths with configurable inference options.

predict_arrays(inp, /, *[, top_k, ...])

Run prediction with the Perch V2 model directly on in-memory audio arrays.

predict_session(*[, top_k, n_producers, ...])

Create a prediction session allowing manual control over the inference lifecycle.

get_embeddings_dim

get_sample_rate

get_segment_size_s

get_segment_size_samples

get_sig_fmax

get_sig_fmin

load

load_custom

encode(inp, /, *, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, speed=1.0, bandpass_fmin=0, bandpass_fmax=15000, half_precision=False, max_audio_duration_min=None, show_stats=None, progress_callback=None, device='CPU')

Run encoding with the Perch V2 model on files or paths to obtain embeddings.

Return type:

AcousticEncodingResultBase

Args:

inp: Path(s) or string(s) pointing to audio files to encode. n_producers: Threads tasked with producing audio batches. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. speed: Resampling multiplier to accommodate different recording speeds. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress. device: Target device(s) for running the backend.

Returns:

AcousticEncodingResultBase: Object containing embeddings for each file.

encode_arrays(inp, /, *, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, speed=1.0, bandpass_fmin=0, bandpass_fmax=15000, half_precision=False, max_audio_duration_min=None, show_stats=None, progress_callback=None, device='CPU')

Run encoding with the Perch V2 model directly on in-memory audio arrays.

Return type:

AcousticEncodingResultBase

Args:

inp: Tuple(s) of (audio ndarray, sampling rate). n_producers: Threads generating batches from the arrays. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. speed: Resampling multiplier to accommodate different recording speeds. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress. device: Target device(s) for running the backend.

Returns:

AcousticEncodingResultBase: Object containing embeddings for each input array.

encode_session(*, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, speed=1.0, bandpass_fmin=0, bandpass_fmax=15000, half_precision=False, max_audio_duration_min=None, show_stats=None, progress_callback=None, device='CPU', max_n_files=65536)

Create an encoding session with explicit resource configuration.

Return type:

AcousticEncodingSession

Args:

species_list: Ordered species collection used during the session. model_path: Path to the acoustic model binary. n_producers: Threads tasked with producing audio batches. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. speed: Resampling multiplier to accommodate different recording speeds. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress. device: Target device(s) for running the backend. max_n_files: Upper bound on files to limit resource consumption.

Returns:

AcousticEncodingSession: Session capable of running encodings.

classmethod get_embeddings_dim()
Return type:

int

classmethod get_sample_rate()
Return type:

int

classmethod get_segment_size_s()
Return type:

float

classmethod get_segment_size_samples()
Return type:

int

classmethod get_sig_fmax()
Return type:

int

classmethod get_sig_fmin()
Return type:

int

classmethod get_version()

Return the string label that identifies the acoustic model version.

Return type:

Literal['2.4']

Returns:

ACOUSTIC_MODEL_VERSIONS: Registered enum constant for the supported version.

classmethod load(model_path, species_list, backend_type, backend_kwargs)
Return type:

AcousticModelPerchV2

classmethod load_custom(model_path, species_list, backend_type, backend_kwargs, check_validity)
Return type:

AcousticModelPerchV2

predict(inp, /, *, top_k=5, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, bandpass_fmin=0, bandpass_fmax=15000, speed=1.0, apply_sigmoid=False, sigmoid_sensitivity=None, default_confidence_threshold=0.1, custom_confidence_thresholds=None, custom_species_list=None, half_precision=False, max_audio_duration_min=None, device='CPU', show_stats=None, progress_callback=None)

Run prediction with the Perch V2 model on files or paths with configurable inference options.

Return type:

AcousticPredictionResultBase

Args:

inp: Path(s) or string(s) pointing to audio files to analyze. top_k: Number of highest-confidence results to return per segment. n_producers: Threads tasked with producing audio batches. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. speed: Resampling multiplier to accommodate different recording speeds. apply_sigmoid: Whether to transform logits with a sigmoid. sigmoid_sensitivity: Optional scale for the sigmoid function. default_confidence_threshold: Base threshold to emit a detection. custom_confidence_thresholds: Species-specific override thresholds. custom_species_list: Path or iterable defining a subset of species. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. device: Target device(s) for running the backend. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress.

Returns:

AcousticPredictionResultBase: Object containing detected species and confidence scores.

predict_arrays(inp, /, *, top_k=5, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, bandpass_fmin=0, bandpass_fmax=15000, speed=1.0, apply_sigmoid=False, sigmoid_sensitivity=None, default_confidence_threshold=0.1, custom_confidence_thresholds=None, custom_species_list=None, half_precision=False, max_audio_duration_min=None, device='CPU', show_stats=None, progress_callback=None)

Run prediction with the Perch V2 model directly on in-memory audio arrays.

Return type:

AcousticPredictionResultBase

Args:

inp: Tuple(s) of (audio ndarray, sampling rate). top_k: Number of highest-confidence results to return per segment. n_producers: Threads generating batches from the arrays. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. speed: Resampling multiplier to accommodate different recording speeds. apply_sigmoid: Whether to transform logits with a sigmoid. sigmoid_sensitivity: Optional scale for the sigmoid function. default_confidence_threshold: Base threshold to emit a detection. custom_confidence_thresholds: Species-specific override thresholds. custom_species_list: Path or iterable defining a subset of species. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. device: Target device(s) for running the backend. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress.

Returns:

AcousticPredictionResultBase: Object containing detected species and confidence scores.

predict_session(*, top_k=5, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, speed=1.0, bandpass_fmin=0, bandpass_fmax=15000, apply_sigmoid=False, sigmoid_sensitivity=None, default_confidence_threshold=0.1, custom_confidence_thresholds=None, custom_species_list=None, half_precision=False, max_audio_duration_min=None, show_stats=None, progress_callback=None, device='CPU', max_n_files=65536)

Create a prediction session allowing manual control over the inference lifecycle.

Return type:

AcousticPredictionSession

Args:

species_list: Ordered species collection used during the session. model_path: Path to the acoustic model binary. top_k: Number of highest-confidence results to return per segment. n_producers: Threads tasked with producing audio batches. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. speed: Resampling multiplier to accommodate different recording speeds. apply_sigmoid: Whether to transform logits with a sigmoid. sigmoid_sensitivity: Optional scale for the sigmoid function. default_confidence_threshold: Base threshold to emit a detection. custom_confidence_thresholds: Species-specific override thresholds. custom_species_list: Path or iterable defining a subset of species. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress. device: Target device(s) for running the backend. max_n_files: Upper bound on files to limit resource consumption.

Returns:

AcousticPredictionSession: Session capable of running predictions.

class birdnet.AcousticModelV2_4(model_path, species_list, is_custom_model, backend_type, backend_kwargs)

Bases: AcousticModelBase

Attributes:
backend_kwargs
backend_type
is_custom_model
model_path
n_species
species_list

Methods

encode(inp, /, *[, n_producers, n_workers, ...])

Run encoding with the BirdNET 2.4 model on files or paths to obtain embeddings.

encode_arrays(inp, /, *[, n_producers, ...])

Run encoding with the BirdNET 2.4 model directly on in-memory audio arrays.

encode_session(*[, n_producers, n_workers, ...])

Create an encoding session with explicit resource configuration.

get_version()

Return the string label that identifies the acoustic model version.

predict(inp, /, *[, top_k, n_producers, ...])

Run prediction with the BirdNET 2.4 model on files or paths with configurable inference options.

predict_arrays(inp, /, *[, top_k, ...])

Run prediction with the BirdNET 2.4 model directly on in-memory audio arrays.

predict_session(*[, top_k, n_producers, ...])

Create a prediction session allowing manual control over the inference lifecycle.

get_embeddings_dim

get_sample_rate

get_segment_size_s

get_segment_size_samples

get_sig_fmax

get_sig_fmin

load

load_custom

encode(inp, /, *, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, speed=1.0, bandpass_fmin=0, bandpass_fmax=15000, half_precision=False, max_audio_duration_min=None, show_stats=None, progress_callback=None, device='CPU')

Run encoding with the BirdNET 2.4 model on files or paths to obtain embeddings.

Return type:

AcousticEncodingResultBase

Args:

inp: Path(s) or string(s) pointing to audio files to encode. n_producers: Threads tasked with producing audio batches. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. speed: Resampling multiplier to accommodate different recording speeds. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress. device: Target device(s) for running the backend.

Returns:

AcousticEncodingResultBase: Object containing embeddings for each file.

encode_arrays(inp, /, *, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, speed=1.0, bandpass_fmin=0, bandpass_fmax=15000, half_precision=False, max_audio_duration_min=None, show_stats=None, progress_callback=None, device='CPU')

Run encoding with the BirdNET 2.4 model directly on in-memory audio arrays.

Return type:

AcousticEncodingResultBase

Args:

inp: Tuple(s) of (audio ndarray, sampling rate). n_producers: Threads generating batches from the arrays. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. speed: Resampling multiplier to accommodate different recording speeds. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress. device: Target device(s) for running the backend.

Returns:

AcousticEncodingResultBase: Object containing embeddings for each input array.

encode_session(*, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, speed=1.0, bandpass_fmin=0, bandpass_fmax=15000, half_precision=False, max_audio_duration_min=None, show_stats=None, progress_callback=None, device='CPU', max_n_files=65536)

Create an encoding session with explicit resource configuration.

Return type:

AcousticEncodingSession

Args:

species_list: Ordered species collection used during the session. model_path: Path to the acoustic model binary. n_producers: Threads tasked with producing audio batches. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. speed: Resampling multiplier to accommodate different recording speeds. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress. device: Target device(s) for running the backend. max_n_files: Upper bound on files to limit resource consumption.

Returns:

AcousticEncodingSession: Session capable of running encodings.

classmethod get_embeddings_dim()
Return type:

int

classmethod get_sample_rate()
Return type:

int

classmethod get_segment_size_s()
Return type:

float

classmethod get_segment_size_samples()
Return type:

int

classmethod get_sig_fmax()
Return type:

int

classmethod get_sig_fmin()
Return type:

int

classmethod get_version()

Return the string label that identifies the acoustic model version.

Return type:

Literal['2.4']

Returns:

ACOUSTIC_MODEL_VERSIONS: Registered enum constant for the supported version.

classmethod load(model_path, species_list, backend_type, backend_kwargs)
Return type:

AcousticModelV2_4

classmethod load_custom(model_path, species_list, backend_type, backend_kwargs, check_validity)
Return type:

AcousticModelV2_4

predict(inp, /, *, top_k=5, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, bandpass_fmin=0, bandpass_fmax=15000, speed=1.0, apply_sigmoid=True, sigmoid_sensitivity=1.0, default_confidence_threshold=0.1, custom_confidence_thresholds=None, custom_species_list=None, half_precision=False, max_audio_duration_min=None, device='CPU', show_stats=None, progress_callback=None)

Run prediction with the BirdNET 2.4 model on files or paths with configurable inference options.

Return type:

AcousticPredictionResultBase

Args:

inp: Path(s) or string(s) pointing to audio files to analyze. top_k: Number of highest-confidence results to return per segment. n_producers: Threads tasked with producing audio batches. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. speed: Resampling multiplier to accommodate different recording speeds. apply_sigmoid: Whether to transform logits with a sigmoid. sigmoid_sensitivity: Optional scale for the sigmoid function. default_confidence_threshold: Base threshold to emit a detection. custom_confidence_thresholds: Species-specific override thresholds. custom_species_list: Path or iterable defining a subset of species. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. device: Target device(s) for running the backend. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress.

Returns:

AcousticPredictionResultBase: Object containing detected species and confidence scores.

predict_arrays(inp, /, *, top_k=5, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, bandpass_fmin=0, bandpass_fmax=15000, speed=1.0, apply_sigmoid=True, sigmoid_sensitivity=1.0, default_confidence_threshold=0.1, custom_confidence_thresholds=None, custom_species_list=None, half_precision=False, max_audio_duration_min=None, device='CPU', show_stats=None, progress_callback=None)

Run prediction with the BirdNET 2.4 model directly on in-memory audio arrays.

Return type:

AcousticPredictionResultBase

Args:

inp: Tuple(s) of (audio ndarray, sampling rate). top_k: Number of highest-confidence results to return per segment. n_producers: Threads generating batches from the arrays. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. speed: Resampling multiplier to accommodate different recording speeds. apply_sigmoid: Whether to transform logits with a sigmoid. sigmoid_sensitivity: Optional scale for the sigmoid function. default_confidence_threshold: Base threshold to emit a detection. custom_confidence_thresholds: Species-specific override thresholds. custom_species_list: Path or iterable defining a subset of species. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. device: Target device(s) for running the backend. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress.

Returns:

AcousticPredictionResultBase: Object containing detected species and confidence scores.

predict_session(*, top_k=5, n_producers=1, n_workers=None, batch_size=1, prefetch_ratio=1, overlap_duration_s=0, speed=1.0, bandpass_fmin=0, bandpass_fmax=15000, apply_sigmoid=True, sigmoid_sensitivity=1.0, default_confidence_threshold=0.1, custom_confidence_thresholds=None, custom_species_list=None, half_precision=False, max_audio_duration_min=None, show_stats=None, progress_callback=None, device='CPU', max_n_files=65536)

Create a prediction session allowing manual control over the inference lifecycle.

Return type:

AcousticPredictionSession

Args:

species_list: Ordered species collection used during the session. model_path: Path to the acoustic model binary. top_k: Number of highest-confidence results to return per segment. n_producers: Threads tasked with producing audio batches. n_workers: Optional worker count for backend processing. batch_size: Number of records evaluated per inference call. prefetch_ratio: How many batches to decode ahead of processing. overlap_duration_s: Seconds of overlap between sliding windows. bandpass_fmin: Lower bound for the bandpass filter in Hz. bandpass_fmax: Upper bound for the bandpass filter in Hz. speed: Resampling multiplier to accommodate different recording speeds. apply_sigmoid: Whether to transform logits with a sigmoid. sigmoid_sensitivity: Optional scale for the sigmoid function. default_confidence_threshold: Base threshold to emit a detection. custom_confidence_thresholds: Species-specific override thresholds. custom_species_list: Path or iterable defining a subset of species. half_precision: Use float16 where supported for inference. max_audio_duration_min: Maximum total duration per call. show_stats: Level of statistics logging to emit. progress_callback: Optional callback to report progress. device: Target device(s) for running the backend. max_n_files: Upper bound on files to limit resource consumption.

Returns:

AcousticPredictionSession: Session capable of running predictions.

class birdnet.AcousticPredictionResultBase(inputs, input_durations, model_path, model_fmin, model_fmax, model_sr, model_precision, model_version, species_list, segment_duration_s, overlap_duration_s, speed, tensor)

Bases: AcousticResultBase

Attributes:
hop_duration_s
input_durations

Durations of each input in seconds.

inputs

Identifiers for each input processed by the result.

max_n_segments
memory_size_MiB

Memory usage for the base result metadata.

model_fmax

Upper bound of the model’s bandpass filter.

model_fmin

Lower bound of the model’s bandpass filter.

model_path
model_precision
model_sr

Sampling rate expected by the model.

model_version
n_inputs

Number of inputs in the result payload.

n_species
overlap_duration_s

Overlap duration between sliding windows in seconds.

segment_duration_s

Segment duration as configured on the inference pipeline.

species_ids
species_list
species_masked
species_probs
speed

Speed multiplier that was applied to the inputs.

top_k
unprocessable_inputs

Methods

to_dataframe()

Convert the structured array into a pandas DataFrame.

to_parquet(path, *[, compression, ...])

Write the contents to disk as an Arrow Parquet file.

load

save

to_arrow_table

to_csv

to_structured_array

property max_n_segments: int
property memory_size_MiB: float

Memory usage for the base result metadata.

Returns:

float: Memory used by metadata buffers in mebibytes.

property n_species: int
property species_ids: ndarray
property species_list: ndarray
property species_masked: ndarray
property species_probs: ndarray
to_arrow_table()
Return type:

Table

to_csv(path, *, encoding='utf-8', buffer_size_kb=1024, silent=False)
Return type:

None

to_structured_array()
Return type:

ndarray

property top_k: int
property unprocessable_inputs: ndarray
class birdnet.AcousticPredictionSession(species_list, model_path, model_segment_size_s, model_sample_rate, model_is_custom, model_sig_fmin, model_sig_fmax, model_version, model_backend_type, model_backend_custom_kwargs, *, top_k, n_producers, n_workers, batch_size=1, prefetch_ratio=1, overlap_duration_s, speed, bandpass_fmin, bandpass_fmax, apply_sigmoid, sigmoid_sensitivity, default_confidence_threshold, custom_confidence_thresholds, custom_species_list, half_precision=True, max_audio_duration_min, show_stats, progress_callback, device, max_n_files)

Bases: AcousticSessionBase

Methods

cancel

end

run

run_arrays

run(inputs)
Return type:

AcousticFilePredictionResult

run_arrays(inputs)
Return type:

AcousticDataPredictionResult

class birdnet.AcousticProgressStats(finished, buffer_stats, producer_stats, worker_stats, wall_time_s, memory_usage_MiB, memory_usage_max_MiB, cpu_usage_pct, cpu_usage_max_pct, progress_pct, est_remaining_time_s, processed_segments, processed_batches, total_segments, speed_xrt, speed_seg_per_s)

Bases: object

Attributes:
est_remaining_time_hhmmss
buffer_stats: BufferStats
cpu_usage_max_pct: float
cpu_usage_pct: float
property est_remaining_time_hhmmss: str | None
est_remaining_time_s: float | None
finished: bool
memory_usage_MiB: float
memory_usage_max_MiB: float
processed_batches: int
processed_segments: int
producer_stats: ProducerStats
progress_pct: float
speed_seg_per_s: float | None
speed_xrt: float | None
total_segments: int | None
wall_time_s: float
worker_stats: WorkerStats | None
class birdnet.GeoModelV2_4(model_path, species_list, is_custom_model, backend_type, backend_kwargs)

Bases: GeoModelBase

Attributes:
backend_kwargs
backend_type
is_custom_model
model_path
n_species
species_list

Methods

get_model_type

get_version

load

load_custom

predict

predict_session

classmethod get_model_type()
Return type:

Literal['acoustic', 'geo']

classmethod get_version()
Return type:

Literal['2.4']

classmethod load(model_path, species_list, backend_type, backend_kwargs)
Return type:

GeoModelV2_4

classmethod load_custom(model_path, species_list, backend_type, backend_kwargs, check_validity)
Return type:

GeoModelV2_4

predict(latitude, longitude, /, *, week=None, min_confidence=0.03, half_precision=False, device='CPU')
Return type:

GeoPredictionResult

predict_session(*, min_confidence=0.03, half_precision=False, device='CPU')
Return type:

GeoPredictionSession

class birdnet.GeoPredictionResult(model_path, model_version, model_precision, latitude, longitude, week, species_masked, species_ids, species_probs, species_list)

Bases: ResultBase

Attributes:
latitude
longitude
memory_size_MiB
model_path
model_precision
model_version
n_species
species_ids
species_list
species_masked
species_probs
week

Methods

load

save

to_arrow_table

to_csv

to_dataframe

to_set

to_structured_array

to_txt

property latitude: int
property longitude: int
property memory_size_MiB: float
property n_species: int
property species_ids: ndarray
property species_list: ndarray
property species_masked: ndarray
property species_probs: ndarray
to_arrow_table(sort_by='species')
Return type:

Table

to_csv(csv_out_path, sort_by='species', encoding='utf8')
Return type:

None

to_dataframe(sort_by='species')
Return type:

DataFrame

to_set()
Return type:

set[str]

to_structured_array(sort_by='species')
Return type:

ndarray

to_txt(txt_out_path, sort_by='species', encoding='utf8')
Return type:

None

property week: int
class birdnet.GeoPredictionSession(species_list, model_path, model_is_custom, model_version, model_backend_type, model_backend_custom_kwargs, *, min_confidence, half_precision, device)

Bases: GeoSessionBase

Methods

run

run(latitude, longitude, /, *, week=None)
Return type:

GeoPredictionResult

birdnet.get_package_logger()
Return type:

Logger

birdnet.load(model_type, version, backend, /, *, precision='fp32', lang='en_us', **model_kwargs)
Return type:

ModelBase

birdnet.load_custom(model_type, version, backend, model, species_list, /, *, precision='fp32', check_validity=True, **model_kwargs)
Return type:

ModelBase

birdnet.load_perch_v2(device)
Return type:

AcousticModelPerchV2