Skip to content

Commit c98044f

Browse files
authored
Merge pull request #34 from dotX12/dev
Dev to master
2 parents 5c1ec65 + ad14d2a commit c98044f

File tree

2 files changed

+102
-33
lines changed

2 files changed

+102
-33
lines changed

shazamio/algorithm.py

Lines changed: 34 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
from copy import copy
22
from typing import List, Optional, Any
3-
4-
from numpy import hanning, log, maximum, fft, array
3+
import numpy as np
54

65
from .enums import FrequencyBand
76
from .signature import DecodedMessage, FrequencyPeak
87

9-
HANNING_MATRIX = hanning(2050)[1:-1] # Wipe trailing and leading zeroes
8+
HANNING_MATRIX = np.hanning(2050)[1:-1] # Wipe trailing and leading zeroes
109

1110

1211
class RingBuffer(list):
@@ -154,10 +153,10 @@ def do_fft(self, batch_of_128_s16le_mono_samples):
154153
# The pre multiplication of the array is for applying a windowing function before the DFT
155154
# (slight rounded Hanning without zeros at edges)
156155

157-
fft_results: array = fft.rfft(HANNING_MATRIX * excerpt_from_ring_buffer)
156+
fft_results: np.array = np.fft.rfft(HANNING_MATRIX * excerpt_from_ring_buffer)
158157

159158
fft_results = (fft_results.real**2 + fft_results.imag**2) / (1 << 17)
160-
fft_results = maximum(fft_results, 0.0000000001)
159+
fft_results = np.maximum(fft_results, 0.0000000001)
161160

162161
self.fft_outputs.append(fft_results)
163162

@@ -168,39 +167,40 @@ def do_peak_spreading_and_recognition(self):
168167
self.do_peak_recognition()
169168

170169
def do_peak_spreading(self):
171-
172170
origin_last_fft: List[float] = self.fft_outputs[self.fft_outputs.position - 1]
173171

174-
spread_last_fft: List[float] = list(origin_last_fft)
175-
176-
for position in range(1025):
177-
178-
# Perform frequency-domain spreading of peak values
179-
180-
if position < 1023:
181-
spread_last_fft[position] = max(
182-
spread_last_fft[position : position + 3]
183-
)
184-
185-
# Perform time-domain spreading of peak values
172+
temporary_array_1 = np.tile(origin_last_fft, 3).reshape((3, -1))
173+
temporary_array_1[1] = np.roll(temporary_array_1[1], -1)
174+
temporary_array_1[2] = np.roll(temporary_array_1[2], -2)
186175

187-
max_value = spread_last_fft[position]
176+
origin_last_fft_np = np.hstack(
177+
[temporary_array_1.max(axis=0)[:-3], origin_last_fft[-3:]]
178+
)
188179

189-
for former_fft_num in [-1, -3, -6]:
190-
former_fft_output = self.spread_fft_output[
191-
(self.spread_fft_output.position + former_fft_num)
192-
% self.spread_fft_output.buffer_size
193-
]
180+
i1, i2, i3 = [
181+
(self.spread_fft_output.position + former_fft_num)
182+
% self.spread_fft_output.buffer_size
183+
for former_fft_num in [-1, -3, -6]
184+
]
194185

195-
former_fft_output[position] = max_value = max(
196-
former_fft_output[position], max_value
197-
)
186+
temporary_array_2 = np.vstack(
187+
[
188+
origin_last_fft_np,
189+
self.spread_fft_output[i1],
190+
self.spread_fft_output[i2],
191+
self.spread_fft_output[i3],
192+
]
193+
)
198194

199-
# Save output locally
195+
temporary_array_2[1] = np.max(temporary_array_2[:2, :], axis=0)
196+
temporary_array_2[2] = np.max(temporary_array_2[:3, :], axis=0)
197+
temporary_array_2[3] = np.max(temporary_array_2[:4, :], axis=0)
200198

201-
self.spread_fft_output.append(spread_last_fft)
199+
self.spread_fft_output[i1] = temporary_array_2[1].tolist()
200+
self.spread_fft_output[i2] = temporary_array_2[2].tolist()
201+
self.spread_fft_output[i3] = temporary_array_2[3].tolist()
202202

203-
pass
203+
self.spread_fft_output.append(list(origin_last_fft_np))
204204

205205
def do_peak_recognition(self):
206206

@@ -256,14 +256,15 @@ def do_peak_recognition(self):
256256
fft_number = self.spread_fft_output.num_written - 46
257257

258258
peak_magnitude = (
259-
log(max(1 / 64, fft_minus_46[bin_position])) * 1477.3 + 6144
259+
np.log(max(1 / 64, fft_minus_46[bin_position])) * 1477.3
260+
+ 6144
260261
)
261262
peak_magnitude_before = (
262-
log(max(1 / 64, fft_minus_46[bin_position - 1])) * 1477.3
263+
np.log(max(1 / 64, fft_minus_46[bin_position - 1])) * 1477.3
263264
+ 6144
264265
)
265266
peak_magnitude_after = (
266-
log(max(1 / 64, fft_minus_46[bin_position + 1])) * 1477.3
267+
np.log(max(1 / 64, fft_minus_46[bin_position + 1])) * 1477.3
267268
+ 6144
268269
)
269270

tests/test_peak_spreading_numpy.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from pydub import AudioSegment
2+
from typing import List
3+
4+
from shazamio.algorithm import SignatureGenerator
5+
from types import MethodType
6+
7+
8+
def do_peak_spreading_non_numpy(self):
9+
origin_last_fft: List[float] = self.fft_outputs[self.fft_outputs.position - 1]
10+
11+
spread_last_fft: List[float] = list(origin_last_fft)
12+
13+
for position in range(1025):
14+
15+
# Perform frequency-domain spreading of peak values
16+
17+
if position < 1023:
18+
spread_last_fft[position] = max(spread_last_fft[position : position + 3])
19+
20+
# Perform time-domain spreading of peak values
21+
22+
max_value = spread_last_fft[position]
23+
24+
for former_fft_num in [-1, -3, -6]:
25+
former_fft_output = self.spread_fft_output[
26+
(self.spread_fft_output.position + former_fft_num)
27+
% self.spread_fft_output.buffer_size
28+
]
29+
30+
former_fft_output[position] = max_value = max(
31+
former_fft_output[position], max_value
32+
)
33+
34+
# Save output locally
35+
36+
self.spread_fft_output.append(spread_last_fft)
37+
38+
pass
39+
40+
41+
async def test_do_peak_spreading_numpy():
42+
audio = AudioSegment.from_file(file="examples/data/dora.ogg")
43+
44+
audio = audio.set_sample_width(2)
45+
audio = audio.set_frame_rate(16000)
46+
audio = audio.set_channels(1)
47+
48+
signature_generator_non_numpy = SignatureGenerator()
49+
signature_generator_non_numpy.do_peak_spreading = MethodType(
50+
do_peak_spreading_non_numpy, signature_generator_non_numpy
51+
)
52+
signature_generator_non_numpy.feed_input(audio.get_array_of_samples())
53+
signature_generator_non_numpy.MAX_TIME_SECONDS = 12
54+
55+
signature_non_numpy = signature_generator_non_numpy.get_next_signature()
56+
57+
while not signature_non_numpy:
58+
signature_non_numpy = signature_generator_non_numpy.get_next_signature()
59+
60+
signature_generator = SignatureGenerator()
61+
signature_generator.feed_input(audio.get_array_of_samples())
62+
signature_generator.MAX_TIME_SECONDS = 12
63+
64+
signature = signature_generator.get_next_signature()
65+
66+
while not signature:
67+
signature = signature_generator.get_next_signature()
68+
assert signature.encode_to_binary() == signature_non_numpy.encode_to_binary()

0 commit comments

Comments
 (0)