Skip to content

Commit a950341

Browse files
committed
create new audio duration checking with ability to account for early end of file
drop defunct audio functions
1 parent c7bd158 commit a950341

File tree

3 files changed

+42
-137
lines changed

3 files changed

+42
-137
lines changed

buzzcode/analyze_audio.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,10 @@
1010
from buzzcode.embedders import load_embedder_model, load_embedder_config
1111
from buzzcode.analysis import load_model, translate_results, suffix_result, suffix_partial, solve_memory, melt_coverage, \
1212
get_gaps, smooth_gaps, gaps_to_chunklist, stitch_partial
13-
from buzzcode.audio import stream_to_queue
13+
from buzzcode.audio import stream_to_queue, get_duration
1414
import pandas as pd
1515
import os
1616
import re
17-
import librosa
1817
import multiprocessing
1918
from queue import Empty
2019
import json
@@ -124,7 +123,7 @@ def worker_logger():
124123
warnings.warn(f'file too small, skipping: {path_audio}')
125124
continue
126125

127-
duration_audio = librosa.get_duration(path=path_audio)
126+
duration_audio = get_duration(path_audio)
128127

129128
paths_chunks = search_dir(os.path.dirname(base_out), [os.path.basename(base_out) + suffix_partial])
130129

@@ -358,7 +357,8 @@ def analyze_assignment(assignment):
358357
base_out = re.sub(dir_audio, dir_out, base_out)
359358

360359
printlog(f"combining result chunks for {re.sub(dir_out, '', base_out)}", 1)
361-
stitch_partial(base_out, c['duration_audio'], framelength)
360+
# re-calculate duration to catch bad EOF
361+
stitch_partial(base_out, get_duration(c['path_audio']), framelength)
362362

363363
timer_total.stop()
364364
closing_message = f"{datetime.now()} - analysis complete; total time: {timer_total.get_total()}s"
@@ -370,4 +370,4 @@ def analyze_assignment(assignment):
370370

371371

372372
if __name__ == "__main__":
373-
analyze_batch(modelname='new_light', gpu=False, vram=1, cpus=4, memory_allot=10, verbosity=2)
373+
analyze_batch(modelname='model_general', gpu=False, vram=1, cpus=4, memory_allot=10, verbosity=2)

buzzcode/audio.py

Lines changed: 35 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,28 @@
1-
import multiprocessing
1+
import glob
22
import os
3-
import sys
4-
import warnings
3+
import re
4+
55
import librosa
6-
import soundfile as sf
76
import numpy as np
7+
import soundfile as sf
88

9+
from buzzcode.config import tag_eof
910

10-
def load_audio(path_audio, time_start=0, time_stop=None, resample_rate=None):
11-
track = sf.SoundFile(path_audio)
12-
13-
can_seek = track.seekable() # True
14-
if not can_seek:
15-
raise ValueError("Input file not compatible with seeking")
16-
17-
if time_stop is None:
18-
time_stop = librosa.get_duration(path=path_audio)
1911

20-
sr = track.samplerate
21-
start_frame = round(sr * time_start)
22-
frames_to_read = round(sr * (time_stop - time_start))
23-
track.seek(start_frame)
24-
audio_data = track.read(frames_to_read)
12+
def get_duration(path_audio):
13+
track = sf.SoundFile(path_audio)
2514

26-
if audio_data.shape[1] > 1: # if multi-channel, convert to mono
27-
audio_data = np.mean(audio_data, axis=-1)
15+
base_eof = os.path.splitext(path_audio)[0] + '_EOF'
16+
paths_eof = glob.glob(base_eof + '*')
2817

29-
if resample_rate is not None:
30-
audio_data = librosa.resample(y=audio_data, orig_sr=sr, target_sr=resample_rate)
31-
sr = resample_rate
18+
if paths_eof:
19+
if len(paths_eof) > 1:
20+
raise ValueError(f"multiple EOF files found for {path_audio}")
21+
frame_final = int(re.search(base_eof + "_(.*)", paths_eof[0]).group(1))
22+
else:
23+
frame_final = track.frames
3224

33-
return audio_data, sr
25+
return frame_final / track.samplerate
3426

3527

3628
def frame_audio(audio_data, framelength, samplerate, framehop_s):
@@ -49,104 +41,13 @@ def frame_audio(audio_data, framelength, samplerate, framehop_s):
4941
return frames
5042

5143

52-
def extract_frequencies(audio_data, sr=44100, n_freq=3, freq_range=(300, 600)):
53-
# n_fft = 4096
54-
n_fft = 8192
55-
freqs = np.array((np.arange(0, 1 + n_fft / 2) * sr) / n_fft)
56-
57-
index_min = np.argmin(abs(freqs - freq_range[0]))
58-
index_max = np.argmin(abs(freqs - freq_range[1]))
59-
60-
spectra = abs(librosa.stft(audio_data, n_fft=n_fft))
61-
62-
spectrum = np.mean(spectra[index_min:index_max], axis=1) # also consider mode or median
63-
64-
spectrum_ispeak = np.array([spectrum[i-1] < spectrum[i] > spectrum[i+1] for i in range(1, len(spectrum) - 1)])
65-
66-
peaks_indices = np.array([e[0] + 1 for e in enumerate(spectrum_ispeak) if e[1]])
67-
peak_amplitudes = spectrum[peaks_indices]
68-
69-
# I need to check that index + index_min properly translates the indices back to the freqs
70-
try:
71-
# it's technically possible to get an error here if there are fewer than n_freq peaks in the spectrum
72-
max_indices = peaks_indices[np.argpartition(peak_amplitudes, -n_freq)[-n_freq:]]
73-
max_freqs = sorted(freqs[max_indices + index_min])
74-
except (IndexError, ValueError):
75-
max_indices = peaks_indices
76-
max_freqs = sorted(freqs[max_indices + index_min])
77-
max_freqs = np.append(max_freqs, [0 for _ in range(n_freq - len(max_freqs))])
78-
79-
return max_freqs
80-
81-
82-
def snip_audio(sniplist, cpus, conflict_out='skip'):
83-
""" takes sniplist as list of tuples (path_raw, path_snip, start, end) and cuts those snips out of larger raw
84-
audio files."""
85-
raws = list(set([t[0] for t in sniplist]))
86-
snips = list(set([t[1] for t in sniplist]))
87-
88-
control_dict = {}
89-
for raw in raws:
90-
rawsnips = [t for t in sniplist if t[0] == raw]
91-
rawsnips = sorted(rawsnips, key=lambda x: x[2]) # sort for sequential seeking
92-
control_dict.update({raw: rawsnips})
93-
94-
q_raw = multiprocessing.Queue()
44+
def mark_eof(path_audio, frame_final):
45+
path_eof = os.path.splitext(path_audio)[0] + tag_eof + '_' + str(frame_final)
46+
open(path_eof, 'a').close()
9547

96-
for raw in raws:
97-
q_raw.put(raw)
9848

99-
for i in range(cpus):
100-
q_raw.put("terminate")
101-
102-
dirs_out = list(set([os.path.dirname(snip) for snip in snips]))
103-
for d in dirs_out:
104-
os.makedirs(d, exist_ok=True)
105-
106-
def worker_snipper(worker_id):
107-
print(f'snipper {worker_id}: starting')
108-
109-
# Raw loop
110-
#
111-
while True:
112-
raw_assigned = q_raw.get()
113-
if raw_assigned == 'terminate':
114-
print(f"snipper {worker_id}: received terminate signal; exiting")
115-
sys.exit(0)
116-
117-
print(f'snipper {worker_id}: starting on raw {raw_assigned}')
118-
sniplist_assigned = control_dict[raw_assigned]
119-
120-
track = sf.SoundFile(raw_assigned)
121-
samplerate_native = track.samplerate
122-
123-
# snip loop
124-
#
125-
for path_raw, path_snip, start, end in sniplist_assigned:
126-
if os.path.exists(path_snip) and conflict_out == 'skip':
127-
continue
128-
129-
# print(f'snipper {worker_id}: snipping {path_snip}')
130-
start_frame = round(samplerate_native * start)
131-
frames_to_read = round(samplerate_native * (end - start))
132-
track.seek(start_frame)
133-
134-
audio_data = track.read(frames_to_read)
135-
136-
sf.write(path_snip, audio_data, samplerate_native)
137-
138-
process_list = [multiprocessing.Process(target=worker_snipper, args=[c]) for c in range(cpus)]
139-
for p in process_list:
140-
p.start()
141-
142-
for p in process_list:
143-
p.join()
144-
145-
print('snipping finished!')
146-
147-
148-
def stream_to_queue(path_audio, chunklist, q_assignments, resample_rate, smallread_tolerance=0.98):
149-
def chunk_to_assignment(chunk, track, samplerate_native):
49+
def stream_to_queue(path_audio, chunklist, q_assignments, resample_rate, mark_bad_eof=True):
50+
def queue_assignment(chunk, track, samplerate_native):
15051
sample_from = int(chunk[0] * samplerate_native)
15152
sample_to = int(chunk[1] * samplerate_native)
15253
read_size = sample_to - sample_from
@@ -156,18 +57,14 @@ def chunk_to_assignment(chunk, track, samplerate_native):
15657
if track.channels > 1:
15758
samples = np.mean(samples, axis=1)
15859

159-
# we've found that many of our files give an incorrect .frames count, or else headers are broken
160-
# this results in a silent failure where no samples are returned
60+
# we've found that many of our .mp3 files give an incorrect .frames count, or else headers are broken
61+
# this appears to be because our recorders ran out of battery while recording
62+
# SoundFile does not handle this gracefully, so we catch it here.
16163
n_samples = len(samples)
162-
if n_samples == 0:
163-
warnings.warn( f"no data read for chunk {chunk} in file {path_audio}")
164-
165-
elif (n_samples/read_size) < smallread_tolerance: # there's always a tiny smallread at end of file
64+
if n_samples < read_size:
16665
perc = int((n_samples / read_size) * 100)
167-
168-
warnings.warn(
169-
f"unexpectedly small read for chunk {chunk} for file {path_audio}. "
170-
f"Received {perc}% of samples requested ({read_size}/{n_samples})")
66+
f"unexpectedly small read for chunk {chunk} for file {path_audio}. "
67+
f"Received {perc}% of samples requested ({read_size}/{n_samples})"
17168

17269
samples = librosa.resample(y=samples, orig_sr=samplerate_native, target_sr=resample_rate)
17370

@@ -182,7 +79,13 @@ def chunk_to_assignment(chunk, track, samplerate_native):
18279
track = sf.SoundFile(path_audio)
18380
samplerate_native = track.samplerate
18481

185-
for chunk in chunklist: # TODO: check for bad audio because samples returned is too small?
186-
chunk_to_assignment(chunk, track, samplerate_native)
82+
for chunk in chunklist:
83+
queue_assignment(chunk, track, samplerate_native)
84+
85+
audio_duration = track.frames / track.samplerate
86+
chunk_near_end = abs(chunk[1] - audio_duration) < 10
87+
88+
if mark_bad_eof and chunk_near_end:
89+
mark_eof(path_audio, track.tell())
18790

18891
track.close()

buzzcode/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,5 @@
1616
# Analysis
1717
dir_audio_in = './audio_in'
1818

19+
# tags
20+
tag_eof = '_finalframe'

0 commit comments

Comments
 (0)