diff --git a/tests/audio/empty.csv b/tests/audio/empty.csv deleted file mode 100644 index fa19fd08..00000000 --- a/tests/audio/empty.csv +++ /dev/null @@ -1,4 +0,0 @@ -path,duration -tests/audio/nz/f5_script2_ipad_balcony1_room_tone.wav,30.0 -"",0.0 -tests/audio/nz/f5_script2_ipad_balcony1_room_tone.wav,30.0 diff --git a/tests/audio/ir/h179_Bar_1txts.wav b/tests/audio/ir/h179_Bar_1txts.wav deleted file mode 100644 index f4e12fee..00000000 --- a/tests/audio/ir/h179_Bar_1txts.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:206aee1e87c966ead8a5755b2b90470eaa7c19d55ddbbbeca263ce203c8cab3a -size 50237 diff --git a/tests/audio/irs.csv b/tests/audio/irs.csv deleted file mode 100644 index 0df280ff..00000000 --- a/tests/audio/irs.csv +++ /dev/null @@ -1,2 +0,0 @@ -path -tests/audio/ir/h179_Bar_1txts.wav diff --git a/tests/audio/loudness/1770-2_Comp_18LKFS_FrequencySweep.wav b/tests/audio/loudness/1770-2_Comp_18LKFS_FrequencySweep.wav deleted file mode 100644 index 5ab78519..00000000 --- a/tests/audio/loudness/1770-2_Comp_18LKFS_FrequencySweep.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f6a86a369c0c5102bb9ccc350ea37f38f0686184e2435f413efec6ed422e1988 -size 11522770 diff --git a/tests/audio/loudness/1770-2_Comp_23LKFS_10000Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_23LKFS_10000Hz_2ch.wav deleted file mode 100644 index a0c81363..00000000 --- a/tests/audio/loudness/1770-2_Comp_23LKFS_10000Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5366f09b684fae85cb24ba5ea842d3fb890633901ef5dfb21f1a48b1275c5a82 -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_23LKFS_1000Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_23LKFS_1000Hz_2ch.wav deleted file mode 100644 index e8353663..00000000 --- a/tests/audio/loudness/1770-2_Comp_23LKFS_1000Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dbb1d5234bcffad3e8f3ebf29663d2d04d8ed8629b50dd41fdc6be9d20b53924 -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_23LKFS_100Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_23LKFS_100Hz_2ch.wav deleted file mode 100644 index bf74d0c0..00000000 --- a/tests/audio/loudness/1770-2_Comp_23LKFS_100Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b3a47b804e0d10b4ede90f329e4c045f9cdb42ae071269c72b02cbf269f4c35 -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_23LKFS_2000Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_23LKFS_2000Hz_2ch.wav deleted file mode 100644 index 3baec59d..00000000 --- a/tests/audio/loudness/1770-2_Comp_23LKFS_2000Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:864c3b6b474db45fe4eecb08f967fc9b7e137f13ec4413807d76bf6f24c0447f -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_23LKFS_25Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_23LKFS_25Hz_2ch.wav deleted file mode 100644 index 4ddcd9bf..00000000 --- a/tests/audio/loudness/1770-2_Comp_23LKFS_25Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:efc2ecba4d60cc916b9a07210f0dee17d9eefc87df992b83c0be650bda3c05f5 -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_23LKFS_500Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_23LKFS_500Hz_2ch.wav deleted file mode 100644 index 16f23ee5..00000000 --- a/tests/audio/loudness/1770-2_Comp_23LKFS_500Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ad7e4fea350130f5229395af4b81b6ff1b81eb525c064dab724d920186eb2069 -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_24LKFS_10000Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_24LKFS_10000Hz_2ch.wav deleted file mode 100644 index 2c6370e8..00000000 --- a/tests/audio/loudness/1770-2_Comp_24LKFS_10000Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:100fe78b0e618a6a02aa3633e9003e4931e23359c284a782d44d806a1ceafb2e -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_24LKFS_1000Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_24LKFS_1000Hz_2ch.wav deleted file mode 100644 index 3d03e7e9..00000000 --- a/tests/audio/loudness/1770-2_Comp_24LKFS_1000Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4a5bd6197e0ff8993389518be97f9ecaf49d5edcee8cd588a807b3905064da54 -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_24LKFS_100Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_24LKFS_100Hz_2ch.wav deleted file mode 100644 index 89223593..00000000 --- a/tests/audio/loudness/1770-2_Comp_24LKFS_100Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca6aa1f2526b7f69e7e7d2f535a461cfa4f5fbb6bf5f9c751814904943bee60c -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_24LKFS_2000Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_24LKFS_2000Hz_2ch.wav deleted file mode 100644 index 2fb018f2..00000000 --- a/tests/audio/loudness/1770-2_Comp_24LKFS_2000Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a0de7f176f410a7dafd98c6fdd8c8330348c40afa8542113cfa39601408b0258 -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_24LKFS_25Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_24LKFS_25Hz_2ch.wav deleted file mode 100644 index 349e1aab..00000000 --- a/tests/audio/loudness/1770-2_Comp_24LKFS_25Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b4b450513ff4ca97fa6d7e13ee3c7ddae6d09e3cbb1fa7661114ff83e08b5f5 -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_24LKFS_500Hz_2ch.wav b/tests/audio/loudness/1770-2_Comp_24LKFS_500Hz_2ch.wav deleted file mode 100644 index a8f67bbb..00000000 --- a/tests/audio/loudness/1770-2_Comp_24LKFS_500Hz_2ch.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d524338200c4ac4dd9c0b58a8a5edd4f06e3fea677f02b42aa2aed4e6459c106 -size 5568048 diff --git a/tests/audio/loudness/1770-2_Comp_AbsGateTest.wav b/tests/audio/loudness/1770-2_Comp_AbsGateTest.wav deleted file mode 100644 index 931a7c99..00000000 --- a/tests/audio/loudness/1770-2_Comp_AbsGateTest.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3abc54a67fc1a743d7a4b86a0c8b37e14e51928a6bfb099de3c2476afb74c120 -size 768044 diff --git a/tests/audio/loudness/1770-2_Comp_RelGateTest.wav b/tests/audio/loudness/1770-2_Comp_RelGateTest.wav deleted file mode 100644 index ecc9a016..00000000 --- a/tests/audio/loudness/1770-2_Comp_RelGateTest.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39b6ec1f20ec5b4f07da7c5473f0aae06a7e63c7ea81e0a13b9773cbf0c71a81 -size 768044 diff --git a/tests/audio/loudness/1770-2_Conf_Mono_Voice+Music-23LKFS.wav b/tests/audio/loudness/1770-2_Conf_Mono_Voice+Music-23LKFS.wav deleted file mode 100644 index 7db65779..00000000 --- a/tests/audio/loudness/1770-2_Conf_Mono_Voice+Music-23LKFS.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f8b318474b158c9f2ee842f0cfadf58ffcf504ffe8daa172220190d12dd0785b -size 7997742 diff --git a/tests/audio/loudness/1770-2_Conf_Mono_Voice+Music-24LKFS.wav b/tests/audio/loudness/1770-2_Conf_Mono_Voice+Music-24LKFS.wav deleted file mode 100644 index 8f0dde75..00000000 --- a/tests/audio/loudness/1770-2_Conf_Mono_Voice+Music-24LKFS.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fd7b534e2097601b2393b7e0b84392ee640d869e3d54bd5554a999c3e2da881d -size 7997766 diff --git a/tests/audio/loudness/1770-2_Conf_Stereo_VinL+R-23LKFS.wav b/tests/audio/loudness/1770-2_Conf_Stereo_VinL+R-23LKFS.wav deleted file mode 100644 index 93873096..00000000 --- a/tests/audio/loudness/1770-2_Conf_Stereo_VinL+R-23LKFS.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3ff26c997d838aff36b4319f9e0a673c0b51fe26722ecb0153887b66f38c296f -size 15966152 diff --git a/tests/audio/loudness/1770-2_Conf_Stereo_VinL+R-24LKFS.wav b/tests/audio/loudness/1770-2_Conf_Stereo_VinL+R-24LKFS.wav deleted file mode 100644 index 907d682b..00000000 --- a/tests/audio/loudness/1770-2_Conf_Stereo_VinL+R-24LKFS.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eaa3eff1f4aec58dbcd0d0ece25efb8576bed6c11c293bce237566cc78970ed0 -size 15966176 diff --git a/tests/audio/loudness/piano.wav b/tests/audio/loudness/piano.wav deleted file mode 100644 index fa36142c..00000000 --- a/tests/audio/loudness/piano.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:04640d222ed95757842678946888330e03bce99adb5d0b7f99acbad582073664 -size 1764046 diff --git a/tests/audio/loudness/sine_1000.wav b/tests/audio/loudness/sine_1000.wav deleted file mode 100644 index 26f356ab..00000000 --- a/tests/audio/loudness/sine_1000.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1193df8c81ac76fc5c03c08843441e705b51b5703aa99f818bc7dd5a154bc00c -size 93768 diff --git a/tests/audio/noises.csv b/tests/audio/noises.csv deleted file mode 100644 index d2ecedd3..00000000 --- a/tests/audio/noises.csv +++ /dev/null @@ -1,2 +0,0 @@ -path -tests/audio/nz/f5_script2_ipad_balcony1_room_tone.wav diff --git a/tests/audio/nz/f5_script2_ipad_balcony1_room_tone.wav b/tests/audio/nz/f5_script2_ipad_balcony1_room_tone.wav deleted file mode 100644 index ff8be174..00000000 --- a/tests/audio/nz/f5_script2_ipad_balcony1_room_tone.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:857661eabc5fd5d773884c4dc972e6af2c3a54ede8012066a1415c0c56259603 -size 2646044 diff --git a/tests/audio/spk.csv b/tests/audio/spk.csv deleted file mode 100644 index 8730b5c5..00000000 --- a/tests/audio/spk.csv +++ /dev/null @@ -1,2 +0,0 @@ -path,loudness -tests/audio/spk/f10_script4_produced.wav,-16 diff --git a/tests/audio/spk/f10_script4_produced.mp3 b/tests/audio/spk/f10_script4_produced.mp3 deleted file mode 100644 index 47289901..00000000 --- a/tests/audio/spk/f10_script4_produced.mp3 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:09698ecf1815ebce1bf85d736fa743409a2e49573f2947146526210caa0036c5 -size 1142300 diff --git a/tests/audio/spk/f10_script4_produced.wav b/tests/audio/spk/f10_script4_produced.wav deleted file mode 100644 index 7ad78ce1..00000000 --- a/tests/audio/spk/f10_script4_produced.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1aa7b29136b528df2d6c34b4207f75636a39e10aa98c83e9e0e5272333416814 -size 12588204 diff --git a/tests/core/test_audio_signal.py b/tests/core/test_audio_signal.py deleted file mode 100644 index 830f0ebf..00000000 --- a/tests/core/test_audio_signal.py +++ /dev/null @@ -1,606 +0,0 @@ -import pathlib -import tempfile - -import librosa -import numpy as np -import pytest -import rich -import torch - -import audiotools -from audiotools import AudioSignal - - -def test_io(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(pathlib.Path(audio_path)) - - with tempfile.NamedTemporaryFile(suffix=".wav") as f: - signal.write(f.name) - signal_from_file = AudioSignal(f.name) - - mp3_signal = AudioSignal(audio_path.replace("wav", "mp3")) - print(mp3_signal) - - assert signal == signal_from_file - print(signal) - print(signal.markdown()) - - mp3_signal = AudioSignal.excerpt( - audio_path.replace("wav", "mp3"), offset=5, duration=5 - ) - assert mp3_signal.signal_duration == 5.0 - assert mp3_signal.duration == 5.0 - assert mp3_signal.length == mp3_signal.signal_length - - rich.print(signal) - - array = np.random.randn(2, 16000) - signal = AudioSignal(array, sample_rate=16000) - assert np.allclose(signal.numpy(), array) - - signal = AudioSignal(array, 44100) - assert signal.sample_rate == 44100 - signal.shape - - with pytest.raises(ValueError): - signal = AudioSignal(5, sample_rate=16000) - - signal = AudioSignal(audio_path, offset=10, duration=10) - assert np.allclose(signal.signal_duration, 10.0) - assert np.allclose(signal.duration, 10.0) - - signal = AudioSignal.excerpt(audio_path, offset=5, duration=5) - assert signal.signal_duration == 5.0 - assert signal.duration == 5.0 - - assert "offset" in signal.metadata - assert "duration" in signal.metadata - - signal = AudioSignal(torch.randn(1000), 44100) - assert signal.audio_data.ndim == 3 - assert torch.all(signal.samples == signal.audio_data) - - audio_path = "tests/audio/spk/f10_script4_produced.wav" - assert AudioSignal(audio_path).hash() == AudioSignal(audio_path).hash() - assert ( - AudioSignal(audio_path).hash() != AudioSignal(audio_path).normalize(-20).hash() - ) - - with pytest.raises(RuntimeError): - AudioSignal(audio_path, offset=100000, duration=3) - - -def test_copy_and_clone(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path) - signal.stft() - signal.loudness() - - copied = signal.copy() - deep_copied = signal.deepcopy() - cloned = signal.clone() - - for a in ["audio_data", "stft_data", "_loudness"]: - a1 = getattr(signal, a) - a2 = getattr(cloned, a) - a3 = getattr(copied, a) - a4 = getattr(deep_copied, a) - - assert id(a1) != id(a2) - assert id(a1) == id(a3) - assert id(a1) != id(a4) - - assert np.allclose(a1, a2) - assert np.allclose(a1, a3) - assert np.allclose(a1, a4) - - for a in ["path_to_file", "metadata"]: - a1 = getattr(signal, a) - a2 = getattr(cloned, a) - a3 = getattr(copied, a) - a4 = getattr(deep_copied, a) - - assert id(a1) == id(a2) if isinstance(a1, str) else id(a1) != id(a2) - assert id(a1) == id(a3) - assert id(a1) == id(a4) if isinstance(a1, str) else id(a1) != id(a2) - - # for clone, id should differ if path is list, and should differ always for metadata - # if path is string, id should remain same... - - assert signal.original_signal_length == copied.original_signal_length - assert signal.original_signal_length == deep_copied.original_signal_length - assert signal.original_signal_length == cloned.original_signal_length - - signal = signal.detach() - - -@pytest.mark.parametrize("loudness_cutoff", [-np.inf, -160, -80, -40, -20]) -def test_salient_excerpt(loudness_cutoff): - MAP = {-np.inf: 0.0, -160: 0.0, -80: 0.001, -40: 0.01, -20: 0.1} - with tempfile.NamedTemporaryFile(suffix=".wav") as f: - sr = 44100 - signal = AudioSignal(torch.zeros(sr * 60), sr) - - signal[..., sr * 20 : sr * 21] = MAP[loudness_cutoff] * torch.randn(44100) - - signal.write(f.name) - signal = AudioSignal.salient_excerpt( - f.name, loudness_cutoff=loudness_cutoff, duration=1, num_tries=None - ) - - assert "offset" in signal.metadata - assert "duration" in signal.metadata - assert signal.loudness() >= loudness_cutoff - - signal = AudioSignal.salient_excerpt( - f.name, loudness_cutoff=np.inf, duration=1, num_tries=10 - ) - signal = AudioSignal.salient_excerpt( - f.name, - loudness_cutoff=None, - duration=1, - ) - - -def test_arithmetic(): - def _make_signals(): - array = np.random.randn(2, 16000) - sig1 = AudioSignal(array, sample_rate=16000) - - array = np.random.randn(2, 16000) - sig2 = AudioSignal(array, sample_rate=16000) - return sig1, sig2 - - # Addition (with a copy) - sig1, sig2 = _make_signals() - sig3 = sig1 + sig2 - assert torch.allclose(sig3.audio_data, sig1.audio_data + sig2.audio_data) - - # Addition (rmul) - sig1, _ = _make_signals() - sig3 = 5.0 + sig1 - assert torch.allclose(sig3.audio_data, sig1.audio_data + 5.0) - - # In place addition - sig3, sig2 = _make_signals() - sig1 = sig3.deepcopy() - sig3 += sig2 - assert torch.allclose(sig3.audio_data, sig1.audio_data + sig2.audio_data) - - # Subtraction (with a copy) - sig1, sig2 = _make_signals() - sig3 = sig1 - sig2 - assert torch.allclose(sig3.audio_data, sig1.audio_data - sig2.audio_data) - - # In place subtraction - sig3, sig2 = _make_signals() - sig1 = sig3.deepcopy() - sig3 -= sig2 - assert torch.allclose(sig3.audio_data, sig1.audio_data - sig2.audio_data) - - # Multiplication (element-wise) - sig1, sig2 = _make_signals() - sig3 = sig1 * sig2 - assert torch.allclose(sig3.audio_data, sig1.audio_data * sig2.audio_data) - - # Multiplication (gain) - sig1, _ = _make_signals() - sig3 = sig1 * 5.0 - assert torch.allclose(sig3.audio_data, sig1.audio_data * 5.0) - - # Multiplication (rmul) - sig1, _ = _make_signals() - sig3 = 5.0 * sig1 - assert torch.allclose(sig3.audio_data, sig1.audio_data * 5.0) - - # Multiplication (in-place) - sig3, sig2 = _make_signals() - sig1 = sig3.deepcopy() - sig3 *= sig2 - assert torch.allclose(sig3.audio_data, sig1.audio_data * sig2.audio_data) - - -def test_equality(): - array = np.random.randn(2, 16000) - sig1 = AudioSignal(array, sample_rate=16000) - sig2 = AudioSignal(array, sample_rate=16000) - - assert sig1 == sig2 - - array = np.random.randn(2, 16000) - sig3 = AudioSignal(array, sample_rate=16000) - - assert sig1 != sig3 - - assert not np.allclose(sig1.numpy(), sig3.numpy()) - - -def test_indexing(): - array = np.random.randn(4, 2, 16000) - sig1 = AudioSignal(array, sample_rate=16000) - - assert np.allclose(sig1[0].audio_data, array[0]) - assert np.allclose(sig1[0, :, 8000].audio_data, array[0, :, 8000]) - - # Test with the associated STFT data. - array = np.random.randn(4, 2, 16000) - sig1 = AudioSignal(array, sample_rate=16000) - sig1.loudness() - sig1.stft() - - indexed = sig1[0] - - assert np.allclose(indexed.audio_data, array[0]) - assert np.allclose(indexed.stft_data, sig1.stft_data[0]) - assert np.allclose(indexed._loudness, sig1._loudness[0]) - - indexed = sig1[0:2] - - assert np.allclose(indexed.audio_data, array[0:2]) - assert np.allclose(indexed.stft_data, sig1.stft_data[0:2]) - assert np.allclose(indexed._loudness, sig1._loudness[0:2]) - - # Test using a boolean tensor to index batch - mask = torch.tensor([True, False, True, False]) - indexed = sig1[mask] - - assert np.allclose(indexed.audio_data, sig1.audio_data[mask]) - assert np.allclose(indexed.stft_data, sig1.stft_data[mask]) - assert np.allclose(indexed._loudness, sig1._loudness[mask]) - - # Set parts of signal using tensor - other_array = torch.from_numpy(np.random.randn(4, 2, 16000)) - sig1 = AudioSignal(array, sample_rate=16000) - sig1[0, :, 6000:8000] = other_array[0, :, 6000:8000] - - assert np.allclose(sig1[0, :, 6000:8000].audio_data, other_array[0, :, 6000:8000]) - - # Set parts of signal using AudioSignal - sig2 = AudioSignal(other_array, sample_rate=16000) - - sig1 = AudioSignal(array, sample_rate=16000) - sig1[0, :, 6000:8000] = sig2[0, :, 6000:8000] - - assert np.allclose( - sig1[0, :, 6000:8000].audio_data, sig2[0, :, 6000:8000].audio_data - ) - - # Check that loudnesses and stft_data get set as well, if only the batch - # dim is indexed. - sig2 = AudioSignal(other_array, sample_rate=16000) - sig2.stft() - sig2.loudness() - - sig1 = AudioSignal(array, sample_rate=16000) - sig1.stft() - sig1.loudness() - - # Test using a boolean tensor to index batch - mask = torch.tensor([True, False, True, False]) - sig1[mask] = sig2[mask] - - for k in ["stft_data", "audio_data", "_loudness"]: - a1 = getattr(sig1, k) - a2 = getattr(sig2, k) - - assert np.allclose(a1[mask], a2[mask]) - - -def test_zeros(): - x = AudioSignal.zeros(0.5, 44100) - assert x.signal_duration == 0.5 - assert x.duration == 0.5 - assert x.sample_rate == 44100 - - -@pytest.mark.parametrize("shape", ["sine", "square", "sawtooth", "triangle", "beep"]) -def test_waves(shape: str): - # error case - if shape == "beep": - with pytest.raises(ValueError): - AudioSignal.wave(440, 0.5, 44100, shape=shape) - - return - - x = AudioSignal.wave(440, 0.5, 44100, shape=shape) - assert x.duration == 0.5 - assert x.sample_rate == 44100 - - # test the default shape arg - x = AudioSignal.wave(440, 0.5, 44100) - assert x.duration == 0.5 - assert x.sample_rate == 44100 - - -def test_zero_pad(): - array = np.random.randn(4, 2, 16000) - sig1 = AudioSignal(array, sample_rate=16000) - - sig1.zero_pad(100, 100) - zeros = torch.zeros(4, 2, 100) - assert torch.allclose(sig1.audio_data[..., :100], zeros) - assert torch.allclose(sig1.audio_data[..., -100:], zeros) - - -def test_zero_pad_to(): - array = np.random.randn(4, 2, 16000) - sig1 = AudioSignal(array, sample_rate=16000) - - sig1.zero_pad_to(16100) - zeros = torch.zeros(4, 2, 100) - assert torch.allclose(sig1.audio_data[..., -100:], zeros) - assert sig1.signal_length == 16100 - - sig1 = AudioSignal(array, sample_rate=16000) - sig1.zero_pad_to(15000) - assert sig1.signal_length == 16000 - - sig1 = AudioSignal(array, sample_rate=16000) - sig1.zero_pad_to(16100, mode="before") - zeros = torch.zeros(4, 2, 100) - assert torch.allclose(sig1.audio_data[..., :100], zeros) - assert sig1.signal_length == 16100 - - sig1 = AudioSignal(array, sample_rate=16000) - sig1.zero_pad_to(15000, mode="before") - assert sig1.signal_length == 16000 - - -def test_truncate(): - array = np.random.randn(4, 2, 16000) - sig1 = AudioSignal(array, sample_rate=16000) - - sig1.truncate_samples(100) - assert sig1.signal_length == 100 - assert np.allclose(sig1.audio_data, array[..., :100]) - - -def test_trim(): - array = np.random.randn(4, 2, 16000) - sig1 = AudioSignal(array, sample_rate=16000) - - sig1.trim(100, 100) - assert sig1.signal_length == 16000 - 200 - assert np.allclose(sig1.audio_data, array[..., 100:-100]) - - array = np.random.randn(4, 2, 16000) - sig1 = AudioSignal(array, sample_rate=16000) - sig1.trim(0, 0) - assert np.allclose(sig1.audio_data, array) - - -def test_to_from_ops(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path) - signal.stft() - signal.loudness() - signal = signal.to("cpu") - - assert signal.audio_data.device == torch.device("cpu") - assert isinstance(signal.numpy(), np.ndarray) - - signal.cpu() - # signal.cuda() - signal.float() - - -def test_device(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path) - signal.to("cpu") - - assert signal.device == torch.device("cpu") - - signal.stft() - signal.audio_data = None - assert signal.device == torch.device("cpu") - - -@pytest.mark.parametrize("window_length", [2048, 512]) -@pytest.mark.parametrize("hop_length", [512, 128]) -@pytest.mark.parametrize("window_type", ["sqrt_hann", "hann", None]) -def test_stft(window_length, hop_length, window_type): - if hop_length >= window_length: - hop_length = window_length // 2 - audio_path = "tests/audio/spk/f10_script4_produced.wav" - stft_params = audiotools.STFTParams( - window_length=window_length, hop_length=hop_length, window_type=window_type - ) - for _stft_params in [None, stft_params]: - signal = AudioSignal(audio_path, duration=10, stft_params=_stft_params) - with pytest.raises(RuntimeError): - signal.istft() - - stft_data = signal.stft() - - assert torch.allclose(signal.stft_data, stft_data) - copied_signal = signal.deepcopy() - copied_signal.stft() - copied_signal = copied_signal.istft() - - assert copied_signal == signal - - mag = signal.magnitude - phase = signal.phase - - recon_stft = mag * torch.exp(1j * phase) - assert torch.allclose(recon_stft, signal.stft_data) - - signal.stft_data = None - mag = signal.magnitude - signal.stft_data = None - phase = signal.phase - - recon_stft = mag * torch.exp(1j * phase) - assert torch.allclose(recon_stft, signal.stft_data) - - # Test with match_stride=True, ignoring the beginning and end. - s = signal.stft_params - if s.hop_length == s.window_length // 4: - og_signal = signal.clone() - stft_data = signal.stft(match_stride=True) - recon_data = signal.istft(match_stride=True) - discard = window_length * 2 - - right_pad, _ = signal.compute_stft_padding( - s.window_length, s.hop_length, match_stride=True - ) - length = signal.signal_length + right_pad - assert stft_data.shape[-1] == length // s.hop_length - - assert torch.allclose( - recon_data.audio_data[..., discard:-discard], - og_signal.audio_data[..., discard:-discard], - atol=1e-6, - ) - - -def test_log_magnitude(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - for _ in range(10): - signal = AudioSignal.excerpt(audio_path, duration=5.0) - magnitude = signal.magnitude.numpy()[0, 0] - librosa_log_mag = librosa.amplitude_to_db(magnitude) - log_mag = signal.log_magnitude().numpy()[0, 0] - - assert np.allclose(log_mag, librosa_log_mag) - - -@pytest.mark.parametrize("n_mels", [40, 80, 128]) -@pytest.mark.parametrize("window_length", [2048, 512]) -@pytest.mark.parametrize("hop_length", [512, 128]) -@pytest.mark.parametrize("window_type", ["sqrt_hann", "hann", None]) -def test_mel_spectrogram(n_mels, window_length, hop_length, window_type): - if hop_length >= window_length: - hop_length = window_length // 2 - audio_path = "tests/audio/spk/f10_script4_produced.wav" - stft_params = audiotools.STFTParams( - window_length=window_length, hop_length=hop_length, window_type=window_type - ) - for _stft_params in [None, stft_params]: - signal = AudioSignal(audio_path, duration=10, stft_params=_stft_params) - mel_spec = signal.mel_spectrogram(n_mels=n_mels) - assert mel_spec.shape[2] == n_mels - - -@pytest.mark.parametrize("n_mfcc", [20, 40]) -@pytest.mark.parametrize("n_mels", [40, 80, 128]) -@pytest.mark.parametrize("window_length", [2048, 512]) -@pytest.mark.parametrize("hop_length", [512, 128]) -def test_mfcc(n_mfcc, n_mels, window_length, hop_length): - if hop_length >= window_length: - hop_length = window_length // 2 - audio_path = "tests/audio/spk/f10_script4_produced.wav" - stft_params = audiotools.STFTParams( - window_length=window_length, hop_length=hop_length - ) - for _stft_params in [None, stft_params]: - signal = AudioSignal(audio_path, duration=10, stft_params=_stft_params) - mfcc = signal.mfcc(n_mfcc=n_mfcc, n_mels=n_mels) - assert mfcc.shape[2] == n_mfcc - - -def test_to_mono(): - array = np.random.randn(4, 2, 16000) - sr = 16000 - - signal = AudioSignal(array, sample_rate=sr) - assert signal.num_channels == 2 - - signal = signal.to_mono() - assert signal.num_channels == 1 - - -def test_float(): - array = np.random.randn(4, 1, 16000).astype("float64") - sr = 1600 - signal = AudioSignal(array, sample_rate=sr) - - signal = signal.float() - assert signal.audio_data.dtype == torch.float - - -@pytest.mark.parametrize("sample_rate", [8000, 16000, 22050, 44100, 48000]) -def test_resample(sample_rate): - array = np.random.randn(4, 2, 16000) - sr = 16000 - - signal = AudioSignal(array, sample_rate=sr) - - signal = signal.resample(sample_rate) - assert signal.sample_rate == sample_rate - assert signal.signal_length == sample_rate - - -def test_batching(): - signals = [] - batch_size = 16 - - # All same length, same sample rate. - for _ in range(batch_size): - array = np.random.randn(2, 16000) - signal = AudioSignal(array, sample_rate=16000) - signals.append(signal) - - batched_signal = AudioSignal.batch(signals) - assert batched_signal.batch_size == batch_size - - signals = [] - # All different lengths, same sample rate, pad signals - for _ in range(batch_size): - L = np.random.randint(8000, 32000) - array = np.random.randn(2, L) - signal = AudioSignal(array, sample_rate=16000) - signals.append(signal) - - with pytest.raises(RuntimeError): - batched_signal = AudioSignal.batch(signals) - - signal_lengths = [x.signal_length for x in signals] - max_length = max(signal_lengths) - batched_signal = AudioSignal.batch(signals, pad_signals=True) - - assert batched_signal.signal_length == max_length - assert batched_signal.batch_size == batch_size - - signals = [] - # All different lengths, same sample rate, truncate signals - for _ in range(batch_size): - L = np.random.randint(8000, 32000) - array = np.random.randn(2, L) - signal = AudioSignal(array, sample_rate=16000) - signals.append(signal) - - with pytest.raises(RuntimeError): - batched_signal = AudioSignal.batch(signals) - - signal_lengths = [x.signal_length for x in signals] - min_length = min(signal_lengths) - batched_signal = AudioSignal.batch(signals, truncate_signals=True) - - assert batched_signal.signal_length == min_length - assert batched_signal.batch_size == batch_size - - signals = [] - # All different lengths, different sample rate, pad signals - for _ in range(batch_size): - L = np.random.randint(8000, 32000) - sr = np.random.choice([8000, 16000, 32000]) - array = np.random.randn(2, L) - signal = AudioSignal(array, sample_rate=int(sr)) - signals.append(signal) - - with pytest.raises(RuntimeError): - batched_signal = AudioSignal.batch(signals) - - signal_lengths = [x.signal_length for x in signals] - max_length = max(signal_lengths) - for i, x in enumerate(signals): - x.path_to_file = i - batched_signal = AudioSignal.batch(signals, resample=True, pad_signals=True) - - assert batched_signal.signal_length == max_length - assert batched_signal.batch_size == batch_size - assert batched_signal.path_to_file == list(range(len(signals))) - assert batched_signal.path_to_input_file == batched_signal.path_to_file diff --git a/tests/core/test_display.py b/tests/core/test_display.py deleted file mode 100644 index f0088221..00000000 --- a/tests/core/test_display.py +++ /dev/null @@ -1,43 +0,0 @@ -from pathlib import Path - -import numpy as np -from torch.utils.tensorboard import SummaryWriter - -from audiotools import AudioSignal - - -def test_specshow(): - array = np.zeros((1, 16000)) - AudioSignal(array, sample_rate=16000).specshow() - AudioSignal(array, sample_rate=16000).specshow(preemphasis=True) - AudioSignal(array, sample_rate=16000).specshow(title="test", preemphasis=True) - AudioSignal(array, sample_rate=16000).specshow(format=False, preemphasis=True) - AudioSignal(array, sample_rate=16000).specshow( - format=False, preemphasis=False, y_axis="mel" - ) - - -def test_waveplot(): - array = np.zeros((1, 16000)) - AudioSignal(array, sample_rate=16000).waveplot() - - -def test_wavespec(): - array = np.zeros((1, 16000)) - AudioSignal(array, sample_rate=16000).wavespec() - - -def test_write_audio_to_tb(): - signal = AudioSignal("tests/audio/spk/f10_script4_produced.mp3", duration=5) - - Path("./scratch").mkdir(parents=True, exist_ok=True) - writer = SummaryWriter("./scratch/") - signal.write_audio_to_tb("tag", writer) - - -def test_save_image(): - signal = AudioSignal( - "tests/audio/spk/f10_script4_produced.wav", duration=10, offset=10 - ) - Path("./scratch").mkdir(parents=True, exist_ok=True) - signal.save_image("./scratch/image.png") diff --git a/tests/core/test_dsp.py b/tests/core/test_dsp.py deleted file mode 100644 index c6edf3c7..00000000 --- a/tests/core/test_dsp.py +++ /dev/null @@ -1,184 +0,0 @@ -import numpy as np -import pytest -import torch - -from audiotools import AudioSignal -from audiotools.core.util import sample_from_dist - - -@pytest.mark.parametrize("window_duration", [0.1, 0.25, 0.5, 1.0]) -@pytest.mark.parametrize("sample_rate", [8000, 16000, 22050, 44100]) -@pytest.mark.parametrize("duration", [0.5, 1.0, 2.0, 10.0]) -def test_overlap_add(duration, sample_rate, window_duration): - np.random.seed(0) - if duration > window_duration: - spk_signal = AudioSignal.batch( - [ - AudioSignal.excerpt( - "tests/audio/spk/f10_script4_produced.wav", duration=duration - ) - for _ in range(16) - ] - ) - spk_signal.resample(sample_rate) - - noise = torch.randn(16, 1, int(duration * sample_rate)) - nz_signal = AudioSignal(noise, sample_rate=sample_rate) - - def _test(signal): - hop_duration = window_duration / 2 - windowed_signal = signal.deepcopy().collect_windows( - window_duration, hop_duration - ) - recombined = windowed_signal.overlap_and_add(hop_duration) - - assert recombined == signal - assert np.allclose(recombined.audio_data, signal.audio_data, 1e-3) - - _test(nz_signal) - _test(spk_signal) - - -@pytest.mark.parametrize("window_duration", [0.1, 0.25, 0.5, 1.0]) -@pytest.mark.parametrize("sample_rate", [8000, 16000, 22050, 44100]) -@pytest.mark.parametrize("duration", [0.5, 1.0, 2.0, 10.0]) -def test_inplace_overlap_add(duration, sample_rate, window_duration): - np.random.seed(0) - if duration > window_duration: - spk_signal = AudioSignal.batch( - [ - AudioSignal.excerpt( - "tests/audio/spk/f10_script4_produced.wav", duration=duration - ) - for _ in range(16) - ] - ) - spk_signal.resample(sample_rate) - - noise = torch.randn(16, 1, int(duration * sample_rate)) - nz_signal = AudioSignal(noise, sample_rate=sample_rate) - - def _test(signal): - hop_duration = window_duration / 2 - windowed_signal = signal.deepcopy().collect_windows( - window_duration, hop_duration - ) - # Compare in-place with unfold results - for i, window in enumerate( - signal.deepcopy().windows(window_duration, hop_duration) - ): - assert np.allclose(window.audio_data, windowed_signal.audio_data[i]) - - _test(nz_signal) - _test(spk_signal) - - -def test_low_pass(): - sample_rate = 44100 - f = 440 - t = torch.arange(0, 1, 1 / sample_rate) - sine_wave = torch.sin(2 * np.pi * f * t) - window = AudioSignal.get_window("hann", sine_wave.shape[-1], sine_wave.device) - sine_wave = sine_wave * window - signal = AudioSignal(sine_wave.unsqueeze(0), sample_rate=sample_rate) - out = signal.deepcopy().low_pass(220) - assert out.audio_data.abs().max() < 1e-4 - - out = signal.deepcopy().low_pass(880) - assert (out - signal).audio_data.abs().max() < 1e-3 - - batch = AudioSignal.batch([signal.deepcopy(), signal.deepcopy(), signal.deepcopy()]) - - cutoffs = [220, 880, 220] - out = batch.deepcopy().low_pass(cutoffs) - - assert out.audio_data[0].abs().max() < 1e-4 - assert out.audio_data[2].abs().max() < 1e-4 - assert (out - batch).audio_data[1].abs().max() < 1e-3 - - -def test_high_pass(): - sample_rate = 44100 - f = 440 - t = torch.arange(0, 1, 1 / sample_rate) - sine_wave = torch.sin(2 * np.pi * f * t) - window = AudioSignal.get_window("hann", sine_wave.shape[-1], sine_wave.device) - sine_wave = sine_wave * window - signal = AudioSignal(sine_wave.unsqueeze(0), sample_rate=sample_rate) - out = signal.deepcopy().high_pass(220) - assert (signal - out).audio_data.abs().max() < 1e-4 - - -def test_mask_frequencies(): - sample_rate = 44100 - fs = torch.as_tensor([500.0, 2000.0, 8000.0, 32000.0])[None] - t = torch.arange(0, 1, 1 / sample_rate)[:, None] - sine_wave = torch.sin(2 * np.pi * t @ fs).sum(dim=-1) - sine_wave = AudioSignal(sine_wave, sample_rate) - masked_sine_wave = sine_wave.mask_frequencies(fmin_hz=1500, fmax_hz=10000) - - fs2 = torch.as_tensor([500.0, 32000.0])[None] - sine_wave2 = torch.sin(2 * np.pi * t @ fs).sum(dim=-1) - sine_wave2 = AudioSignal(sine_wave2, sample_rate) - - assert torch.allclose(masked_sine_wave.audio_data, sine_wave2.audio_data) - - -def test_mask_timesteps(): - sample_rate = 44100 - f = 440 - t = torch.linspace(0, 1, sample_rate) - sine_wave = torch.sin(2 * np.pi * f * t) - sine_wave = AudioSignal(sine_wave, sample_rate) - - masked_sine_wave = sine_wave.mask_timesteps(tmin_s=0.25, tmax_s=0.75) - masked_sine_wave.istft() - - mask = ((0.3 < t) & (t < 0.7))[None, None] - assert torch.allclose( - masked_sine_wave.audio_data[mask], - torch.zeros_like(masked_sine_wave.audio_data[mask]), - ) - - -def test_shift_phase(): - sample_rate = 44100 - f = 440 - t = torch.linspace(0, 1, sample_rate) - sine_wave = torch.sin(2 * np.pi * f * t) - sine_wave = AudioSignal(sine_wave, sample_rate) - sine_wave2 = sine_wave.clone() - - shifted_sine_wave = sine_wave.shift_phase(np.pi) - shifted_sine_wave.istft() - - sine_wave2.phase = sine_wave2.phase + np.pi - sine_wave2.istft() - - assert torch.allclose(shifted_sine_wave.audio_data, sine_wave2.audio_data) - - -def test_corrupt_phase(): - sample_rate = 44100 - f = 440 - t = torch.linspace(0, 1, sample_rate) - sine_wave = torch.sin(2 * np.pi * f * t) - sine_wave = AudioSignal(sine_wave, sample_rate) - sine_wave2 = sine_wave.clone() - - shifted_sine_wave = sine_wave.corrupt_phase(scale=np.pi) - shifted_sine_wave.istft() - - assert (sine_wave2.phase - shifted_sine_wave.phase).abs().mean() > 0.0 - assert ((sine_wave2.phase - shifted_sine_wave.phase).std() / np.pi) < 1.0 - - -def test_preemphasis(): - x = AudioSignal.excerpt("tests/audio/spk/f10_script4_produced.wav", duration=5) - import matplotlib.pyplot as plt - - x.specshow(preemphasis=False) - - x.specshow(preemphasis=True) - - x.preemphasis() diff --git a/tests/core/test_effects.py b/tests/core/test_effects.py deleted file mode 100644 index c0d6765b..00000000 --- a/tests/core/test_effects.py +++ /dev/null @@ -1,359 +0,0 @@ -import numpy as np -import pytest -import torch -import torchaudio - -from audiotools import AudioSignal - - -def test_normalize(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=10) - signal = signal.normalize() - assert np.allclose(signal.loudness(), -24, atol=1e-1) - - array = np.random.randn(1, 2, 32000) - array = array / np.abs(array).max() - - signal = AudioSignal(array, sample_rate=16000) - for db_incr in np.arange(10, 75, 5): - db = -80 + db_incr - signal = signal.normalize(db) - loudness = signal.loudness() - assert np.allclose(loudness, db, atol=1e-1) - - batch_size = 16 - db = -60 + torch.linspace(10, 30, batch_size) - - array = np.random.randn(batch_size, 2, 32000) - array = array / np.abs(array).max() - signal = AudioSignal(array, sample_rate=16000) - - signal = signal.normalize(db) - assert np.allclose(signal.loudness(), db, 1e-1) - - -def test_volume_change(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=10) - - boost = 3 - before_db = signal.loudness().clone() - signal = signal.volume_change(boost) - after_db = signal.loudness() - assert np.allclose(before_db + boost, after_db) - - signal._loudness = None - after_db = signal.loudness() - assert np.allclose(before_db + boost, after_db, 1e-1) - - -def test_mix(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=10) - - audio_path = "tests/audio/nz/f5_script2_ipad_balcony1_room_tone.wav" - nz = AudioSignal(audio_path, offset=10, duration=10) - - spk.deepcopy().mix(nz, snr=-10) - snr = spk.loudness() - nz.loudness() - assert np.allclose(snr, -10, atol=1) - - # Test in batch - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=10) - - audio_path = "tests/audio/nz/f5_script2_ipad_balcony1_room_tone.wav" - nz = AudioSignal(audio_path, offset=10, duration=10) - - batch_size = 4 - tgt_snr = torch.linspace(-10, 10, batch_size) - - spk_batch = AudioSignal.batch([spk.deepcopy() for _ in range(batch_size)]) - nz_batch = AudioSignal.batch([nz.deepcopy() for _ in range(batch_size)]) - - spk_batch.deepcopy().mix(nz_batch, snr=tgt_snr) - snr = spk_batch.loudness() - nz_batch.loudness() - assert np.allclose(snr, tgt_snr, atol=1) - - # Test with "EQing" the other signal - db = 0 + 0 * torch.rand(10) - spk_batch.deepcopy().mix(nz_batch, snr=tgt_snr, other_eq=db) - snr = spk_batch.loudness() - nz_batch.loudness() - assert np.allclose(snr, tgt_snr, atol=1) - - -def test_convolve(): - np.random.seed(6) # Found a failing seed - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=10) - - impulse = np.zeros((1, 16000)) - impulse[..., 0] = 1 - ir = AudioSignal(impulse, 16000) - batch_size = 4 - - spk_batch = AudioSignal.batch([spk.deepcopy() for _ in range(batch_size)]) - ir_batch = AudioSignal.batch( - [ir.deepcopy().zero_pad(np.random.randint(1000), 0) for _ in range(batch_size)], - pad_signals=True, - ) - - convolved = spk_batch.deepcopy().convolve(ir_batch) - assert convolved == spk_batch - - # Short duration - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=0.1) - - impulse = np.zeros((1, 16000)) - impulse[..., 0] = 1 - ir = AudioSignal(impulse, 16000) - batch_size = 4 - - spk_batch = AudioSignal.batch([spk.deepcopy() for _ in range(batch_size)]) - ir_batch = AudioSignal.batch( - [ir.deepcopy().zero_pad(np.random.randint(1000), 0) for _ in range(batch_size)], - pad_signals=True, - ) - - convolved = spk_batch.deepcopy().convolve(ir_batch) - assert convolved == spk_batch - - -def test_pipeline(): - # An actual IR, no batching - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=5) - - audio_path = "tests/audio/ir/h179_Bar_1txts.wav" - ir = AudioSignal(audio_path) - spk.deepcopy().convolve(ir) - - audio_path = "tests/audio/nz/f5_script2_ipad_balcony1_room_tone.wav" - nz = AudioSignal(audio_path, offset=10, duration=5) - - batch_size = 16 - tgt_snr = torch.linspace(20, 30, batch_size) - - (spk @ ir).mix(nz, snr=tgt_snr) - - -def test_codec(): - torchaudio_version_070 = "0.7" in torchaudio.__version__ - if torchaudio_version_070: - return - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=10) - - with pytest.raises(ValueError): - spk.apply_codec("unknown preset") - - out = spk.deepcopy().apply_codec("Ogg") - out = spk.deepcopy().apply_codec("8-bit") - - -def test_pitch_shift(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=1) - - single = spk.deepcopy().pitch_shift(5) - - batch_size = 4 - spk_batch = AudioSignal.batch([spk.deepcopy() for _ in range(batch_size)]) - - batched = spk_batch.deepcopy().pitch_shift(5) - - assert np.allclose(batched[0].audio_data, single[0].audio_data) - - -def test_time_stretch(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=1) - - single = spk.deepcopy().time_stretch(0.8) - - batch_size = 4 - spk_batch = AudioSignal.batch([spk.deepcopy() for _ in range(batch_size)]) - - batched = spk_batch.deepcopy().time_stretch(0.8) - - assert np.allclose(batched[0].audio_data, single[0].audio_data) - - -@pytest.mark.parametrize("n_bands", [1, 2, 4, 8, 12, 16]) -def test_mel_filterbank(n_bands): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=1) - fbank = spk.deepcopy().mel_filterbank(n_bands) - - assert torch.allclose(fbank.sum(-1), spk.audio_data, atol=1e-6) - - # Check if it works in batches. - spk_batch = AudioSignal.batch( - [ - AudioSignal.excerpt("tests/audio/spk/f10_script4_produced.wav", duration=2) - for _ in range(16) - ] - ) - fbank = spk_batch.deepcopy().mel_filterbank(n_bands) - summed = fbank.sum(-1) - assert torch.allclose(summed, spk_batch.audio_data, atol=1e-6) - - -@pytest.mark.parametrize("n_bands", [1, 2, 4, 8, 12, 16]) -def test_equalizer(n_bands): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=10) - - db = -3 + 1 * torch.rand(n_bands) - spk.deepcopy().equalizer(db) - - db = -3 + 1 * np.random.rand(n_bands) - spk.deepcopy().equalizer(db) - - audio_path = "tests/audio/ir/h179_Bar_1txts.wav" - ir = AudioSignal(audio_path) - db = -3 + 1 * torch.rand(n_bands) - - spk.deepcopy().convolve(ir.equalizer(db)) - - spk_batch = AudioSignal.batch( - [ - AudioSignal.excerpt("tests/audio/spk/f10_script4_produced.wav", duration=2) - for _ in range(16) - ] - ) - - db = torch.zeros(spk_batch.batch_size, n_bands) - output = spk_batch.deepcopy().equalizer(db) - - assert output == spk_batch - - -def test_clip_distortion(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=2) - clipped = spk.deepcopy().clip_distortion(0.05) - - spk_batch = AudioSignal.batch( - [ - AudioSignal.excerpt("tests/audio/spk/f10_script4_produced.wav", duration=2) - for _ in range(16) - ] - ) - percs = torch.from_numpy(np.random.uniform(size=(16,))).float() - clipped_batch = spk_batch.deepcopy().clip_distortion(percs) - - assert clipped.audio_data.abs().max() < 1.0 - assert clipped_batch.audio_data.abs().max() < 1.0 - - -@pytest.mark.parametrize("quant_ch", [2, 4, 8, 16, 32, 64, 128]) -def test_quantization(quant_ch): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=2) - - quantized = spk.deepcopy().quantization(quant_ch) - - # Need to round audio_data off because torch ops with straight - # through estimator are sometimes a bit off past 3 decimal places. - found_quant_ch = len(np.unique(np.around(quantized.audio_data, decimals=3))) - assert found_quant_ch <= quant_ch - - spk_batch = AudioSignal.batch( - [ - AudioSignal.excerpt("tests/audio/spk/f10_script4_produced.wav", duration=2) - for _ in range(16) - ] - ) - - quant_ch = np.random.choice([2, 4, 8, 16, 32, 64, 128], size=(16,), replace=True) - quantized = spk_batch.deepcopy().quantization(quant_ch) - - for i, q_ch in enumerate(quant_ch): - found_quant_ch = len(np.unique(np.around(quantized.audio_data[i], decimals=3))) - assert found_quant_ch <= q_ch - - -@pytest.mark.parametrize("quant_ch", [2, 4, 8, 16, 32, 64, 128]) -def test_mulaw_quantization(quant_ch): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - spk = AudioSignal(audio_path, offset=10, duration=2) - - quantized = spk.deepcopy().mulaw_quantization(quant_ch) - - # Need to round audio_data off because torch ops with straight - # through estimator are sometimes a bit off past 3 decimal places. - found_quant_ch = len(np.unique(np.around(quantized.audio_data, decimals=3))) - assert found_quant_ch <= quant_ch - - spk_batch = AudioSignal.batch( - [ - AudioSignal.excerpt("tests/audio/spk/f10_script4_produced.wav", duration=2) - for _ in range(16) - ] - ) - - quant_ch = np.random.choice([2, 4, 8, 16, 32, 64, 128], size=(16,), replace=True) - quantized = spk_batch.deepcopy().mulaw_quantization(quant_ch) - - for i, q_ch in enumerate(quant_ch): - found_quant_ch = len(np.unique(np.around(quantized.audio_data[i], decimals=3))) - assert found_quant_ch <= q_ch - - -def test_impulse_response_augmentation(): - audio_path = "tests/audio/ir/h179_Bar_1txts.wav" - batch_size = 16 - ir = AudioSignal(audio_path) - ir_batch = AudioSignal.batch([ir for _ in range(batch_size)]) - early_response, late_field, window = ir_batch.decompose_ir() - - assert early_response.shape == late_field.shape - assert late_field.shape == window.shape - - drr = ir_batch.measure_drr() - - alpha = AudioSignal.solve_alpha(early_response, late_field, window, drr) - assert np.allclose(alpha, np.ones_like(alpha), 1e-5) - - target_drr = 5 - out = ir_batch.deepcopy().alter_drr(target_drr) - drr = out.measure_drr() - assert np.allclose(drr, np.ones_like(drr) * target_drr) - - target_drr = np.random.rand(batch_size).astype("float32") * 50 - altered_ir = ir_batch.deepcopy().alter_drr(target_drr) - drr = altered_ir.measure_drr() - assert np.allclose(drr.flatten(), target_drr.flatten()) - - -def test_apply_ir(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - ir_path = "tests/audio/ir/h179_Bar_1txts.wav" - - spk = AudioSignal(audio_path, offset=10, duration=2) - ir = AudioSignal(ir_path) - db = 0 + 0 * torch.rand(10) - output = spk.deepcopy().apply_ir(ir, drr=10, ir_eq=db) - - assert np.allclose(ir.measure_drr().flatten(), 10) - - output = spk.deepcopy().apply_ir(ir, drr=10, ir_eq=db, use_original_phase=True) - - -def test_ensure_max_of_audio(): - spk = AudioSignal(torch.randn(1, 1, 44100), 44100) - - max_vals = [1.0] + [np.random.rand() for _ in range(10)] - for val in max_vals: - after = spk.deepcopy().ensure_max_of_audio(val) - assert after.audio_data.abs().max() <= val + 1e-3 - - # Make sure it does nothing to a tiny signal - spk = AudioSignal(torch.rand(1, 1, 44100), 44100) - spk.audio_data = spk.audio_data * 0.5 - after = spk.deepcopy().ensure_max_of_audio() - - assert torch.allclose(after.audio_data, spk.audio_data) diff --git a/tests/core/test_ffmpeg.py b/tests/core/test_ffmpeg.py deleted file mode 100644 index d9cb158e..00000000 --- a/tests/core/test_ffmpeg.py +++ /dev/null @@ -1,128 +0,0 @@ -import shlex -import subprocess -import tempfile -from pathlib import Path - -import numpy as np -import pyloudnorm -import pytest -import torch - -from audiotools import AudioSignal - - -@pytest.mark.parametrize("sample_rate", [8000, 16000, 22050, 44100, 48000]) -def test_ffmpeg_resample(sample_rate): - array = np.random.randn(4, 2, 16000) - sr = 16000 - - signal = AudioSignal(array, sample_rate=sr) - - signal = signal.ffmpeg_resample(sample_rate) - assert signal.sample_rate == sample_rate - assert signal.signal_length == sample_rate - - -def test_ffmpeg_loudness(): - np.random.seed(0) - array = np.random.randn(16, 2, 16000) - array /= np.abs(array).max() - - gains = np.random.rand(array.shape[0])[:, None, None] - array = array * gains - - meter = pyloudnorm.Meter(16000) - py_loudness = [meter.integrated_loudness(array[i].T) for i in range(array.shape[0])] - - ffmpeg_loudness_iso = AudioSignal(array, 16000).ffmpeg_loudness() - assert np.allclose(py_loudness, ffmpeg_loudness_iso, atol=1) - - # if you normalize and then write, it should still work. - # if ffmpeg is float64, this fails - with tempfile.NamedTemporaryFile(suffix=".wav") as f: - x = AudioSignal(torch.randn(44100 * 10), 44100) - x.ffmpeg_loudness(-24) - x.normalize(-24) - x.write(f.name) - - -def test_ffmpeg_load(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - # convert to mp3 with ffmpeg - og_signal = AudioSignal(audio_path) - with tempfile.NamedTemporaryFile(suffix=".mp3") as f: - command = f"ffmpeg -i {audio_path} {f.name} -y -hide_banner -loglevel error" - subprocess.check_call(shlex.split(command)) - - signal_from_ffmpeg = AudioSignal.load_from_file_with_ffmpeg(f.name) - assert og_signal.signal_length == signal_from_ffmpeg.signal_length - - # test spaces in title - with tempfile.TemporaryDirectory() as tmpdir: - out_path = str(Path(tmpdir) / "Title with spaces.wav") - og_signal.write(out_path) - signal_from_ffmpeg = AudioSignal.load_from_file_with_ffmpeg(out_path) - - assert og_signal.signal_length == signal_from_ffmpeg.signal_length - - # test quotes in title - with tempfile.TemporaryDirectory() as tmpdir: - out_path = str(Path(tmpdir) / "Someone's title with spaces.wav") - og_signal.write(out_path) - signal_from_ffmpeg = AudioSignal.load_from_file_with_ffmpeg(out_path) - - assert og_signal.signal_length == signal_from_ffmpeg.signal_length - - -def test_ffmpeg_audio_offset(): - with tempfile.TemporaryDirectory() as d: - video_path = Path(d) / "test.mp4" - audio_path = Path(d) / "test.wav" - delayed_video = Path(d) / "test_delayed.mp4" - delayed_audio = Path(d) / "test_audio.wav" - remuxed_video = Path(d) / "test_remuxed.mp4" - - # Create a test video - subprocess.run( - shlex.split( - f"ffmpeg -y -f lavfi " - f"-i testsrc=d=5:s=120x120:r=60,format=yuv420p " - f"-f lavfi -i sine=f=440:b=4 " - f"-shortest {video_path} -loglevel error" - ) - ) - - signal = AudioSignal(video_path) - signal.write(audio_path) - - # Create a video with the audio offset by 1 second - subprocess.run( - shlex.split( - f"ffmpeg -y -i {video_path} " - f"-itsoffset 1.0 -i {audio_path} " - f"-c:v copy -c:a aac -map 0:v:0 -map 1:a:0 " - f"{delayed_video} -loglevel error " - ) - ) - signal = AudioSignal.load_from_file_with_ffmpeg(delayed_video) - - # Mux the read signal with the video, and then re-read it - # to make sure it stays the same. - signal.write(delayed_audio) - subprocess.run( - shlex.split( - f"ffmpeg -i {delayed_video} " - f"-i {delayed_audio} -c:v " - f"copy -c:a aac -map 0:v:0 " - f"-map 1:a:0 {remuxed_video} -loglevel error" - ) - ) - remuxed = AudioSignal.load_from_file_with_ffmpeg(remuxed_video) - - # Muxing encodes the audio, changing it so the best - # we can do is compare the first nonzero offset (which - # is the encoded delay) - idx_a = signal.audio_data[0, 0].nonzero()[0] - idx_b = remuxed.audio_data[0, 0].nonzero()[0] - # Error of less than 50 samples - assert abs(idx_a - idx_b) < 50 diff --git a/tests/core/test_grad.py b/tests/core/test_grad.py deleted file mode 100644 index ad9bee00..00000000 --- a/tests/core/test_grad.py +++ /dev/null @@ -1,106 +0,0 @@ -from typing import Callable - -import numpy as np -import pytest -import torch -import torchaudio - -from audiotools import AudioSignal - - -def test_audio_grad(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - ir_path = "tests/audio/ir/h179_Bar_1txts.wav" - - def _test_audio_grad(attr: str, target=True, kwargs: dict = {}): - signal = AudioSignal(audio_path) - signal.audio_data.requires_grad = True - - assert signal.audio_data.grad is None - - # Avoid overwriting leaf tensor by cloning signal - attr = getattr(signal.clone(), attr) - result = attr(**kwargs) if isinstance(attr, Callable) else attr - - try: - if isinstance(result, AudioSignal): - # If necessary, propagate spectrogram changes to waveform - if result.stft_data is not None: - result.istft() - if result.audio_data.dtype.is_complex: - result.audio_data.real.sum().backward() - else: - result.audio_data.sum().backward() - else: - if result.dtype.is_complex: - result.real.sum().backward() - else: - result.sum().backward() - - assert signal.audio_data.grad is not None or not target - except RuntimeError: - assert not target - - for a in [ - ["mix", True, {"other": AudioSignal(audio_path), "snr": 0}], - ["convolve", True, {"other": AudioSignal(ir_path)}], - [ - "apply_ir", - True, - {"ir": AudioSignal(ir_path), "drr": 0.1, "ir_eq": torch.randn(6)}, - ], - ["ensure_max_of_audio", True], - ["normalize", True], - ["volume_change", True, {"db": 1}], - ["pitch_shift", False, {"n_semitones": 1}], - ["time_stretch", False, {"factor": 2}], - ["apply_codec", False], - ["equalizer", True, {"db": torch.randn(6)}], - ["clip_distortion", True, {"clip_percentile": 0.5}], - ["quantization", True, {"quantization_channels": 8}], - ["mulaw_quantization", True, {"quantization_channels": 8}], - ["resample", True, {"sample_rate": 16000}], - ["low_pass", True, {"cutoffs": 1000}], - ["high_pass", True, {"cutoffs": 1000}], - ["to_mono", True], - ["zero_pad", True, {"before": 10, "after": 10}], - ["magnitude", True], - ["phase", True], - ["log_magnitude", True], - ["loudness", False], - ["stft", True], - ["clone", True], - ["mel_spectrogram", True], - ["zero_pad_to", True, {"length": 100000}], - ["truncate_samples", True, {"length_in_samples": 1000}], - ["corrupt_phase", True, {"scale": 0.5}], - ["shift_phase", True, {"shift": 1}], - ["mask_low_magnitudes", True, {"db_cutoff": 0}], - ["mask_frequencies", True, {"fmin_hz": 100, "fmax_hz": 1000}], - ["mask_timesteps", True, {"tmin_s": 0.1, "tmax_s": 0.5}], - ["__add__", True, {"other": AudioSignal(audio_path)}], - ["__iadd__", True, {"other": AudioSignal(audio_path)}], - ["__radd__", True, {"other": AudioSignal(audio_path)}], - ["__sub__", True, {"other": AudioSignal(audio_path)}], - ["__isub__", True, {"other": AudioSignal(audio_path)}], - ["__mul__", True, {"other": AudioSignal(audio_path)}], - ["__imul__", True, {"other": AudioSignal(audio_path)}], - ["__rmul__", True, {"other": AudioSignal(audio_path)}], - ]: - _test_audio_grad(*a) - - -def test_batch_grad(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - - signal = AudioSignal(audio_path) - signal.audio_data.requires_grad = True - - assert signal.audio_data.grad is None - - batch_size = 16 - batch = AudioSignal.batch([signal.clone() for _ in range(batch_size)]) - - batch.audio_data.sum().backward() - - assert signal.audio_data.grad is not None diff --git a/tests/core/test_loudness.py b/tests/core/test_loudness.py deleted file mode 100644 index ae32f04d..00000000 --- a/tests/core/test_loudness.py +++ /dev/null @@ -1,263 +0,0 @@ -import numpy as np -import pyloudnorm -import soundfile as sf - -from audiotools import AudioSignal -from audiotools import datasets -from audiotools import Meter -from audiotools import transforms - -ATOL = 1e-1 - - -def test_loudness_against_pyln(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=5, duration=10) - signal_loudness = signal.loudness() - - meter = pyloudnorm.Meter( - signal.sample_rate, filter_class="K-weighting", block_size=0.4 - ) - py_loudness = meter.integrated_loudness(signal.numpy()[0].T) - assert np.allclose(signal_loudness, py_loudness) - - -def test_loudness_short(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=0.25) - signal_loudness = signal.loudness() - - -def test_batch_loudness(): - np.random.seed(0) - array = np.random.randn(16, 2, 16000) - array /= np.abs(array).max() - - gains = np.random.rand(array.shape[0])[:, None, None] - array = array * gains - - meter = pyloudnorm.Meter(16000) - py_loudness = [meter.integrated_loudness(array[i].T) for i in range(array.shape[0])] - - meter = Meter(16000) - meter.filter_class - at_loudness_iso = [ - meter.integrated_loudness(array[i].T).item() for i in range(array.shape[0]) - ] - - assert np.allclose(py_loudness, at_loudness_iso, atol=1e-1) - - signal = AudioSignal(array, sample_rate=16000) - at_loudness_batch = signal.loudness() - assert np.allclose(py_loudness, at_loudness_batch, atol=1e-1) - - -# Tests below are copied from pyloudnorm -def test_integrated_loudness(): - data, rate = sf.read("tests/audio/loudness/sine_1000.wav") - meter = Meter(rate) - loudness = meter(data) - - targetLoudness = -3.0523438444331137 - assert np.allclose(loudness, targetLoudness) - - -def test_rel_gate_test(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_RelGateTest.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -10.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_abs_gate_test(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_AbsGateTest.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -69.5 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_24LKFS_25Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_24LKFS_25Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -24.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_24LKFS_100Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_24LKFS_100Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -24.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_24LKFS_500Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_24LKFS_500Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -24.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_24LKFS_1000Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_24LKFS_1000Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -24.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_24LKFS_2000Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_24LKFS_2000Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -24.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_24LKFS_10000Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_24LKFS_10000Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -24.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_23LKFS_25Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_23LKFS_25Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -23.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_23LKFS_100Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_23LKFS_100Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -23.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_23LKFS_500Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_23LKFS_500Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -23.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_23LKFS_1000Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_23LKFS_1000Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -23.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_23LKFS_2000Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_23LKFS_2000Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -23.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_23LKFS_10000Hz_2ch(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_23LKFS_10000Hz_2ch.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -23.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_18LKFS_frequency_sweep(): - data, rate = sf.read("tests/audio/loudness/1770-2_Comp_18LKFS_FrequencySweep.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -18.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_conf_stereo_vinL_R_23LKFS(): - data, rate = sf.read("tests/audio/loudness/1770-2_Conf_Stereo_VinL+R-23LKFS.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -23.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_conf_monovoice_music_24LKFS(): - data, rate = sf.read("tests/audio/loudness/1770-2_Conf_Mono_Voice+Music-24LKFS.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -24.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def conf_monovoice_music_24LKFS(): - data, rate = sf.read("tests/audio/loudness/1770-2_Conf_Mono_Voice+Music-24LKFS.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -24.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_conf_monovoice_music_23LKFS(): - data, rate = sf.read("tests/audio/loudness/1770-2_Conf_Mono_Voice+Music-23LKFS.wav") - meter = Meter(rate) - loudness = meter.integrated_loudness(data) - - targetLoudness = -23.0 - assert np.allclose(loudness, targetLoudness, atol=ATOL) - - -def test_fir_accuracy(): - transform = transforms.Compose( - transforms.ClippingDistortion(prob=0.5), - transforms.LowPass(prob=0.5), - transforms.HighPass(prob=0.5), - transforms.Equalizer(prob=0.5), - prob=0.5, - ) - loader = datasets.AudioLoader(sources=["tests/audio/spk.csv"]) - dataset = datasets.AudioDataset( - loader, - 44100, - 10, - 5.0, - transform=transform, - ) - - for i in range(10): - item = dataset[i] - kwargs = item["transform_args"] - signal = item["signal"] - signal = transform(signal, **kwargs) - - signal._loudness = None - iir_db = signal.clone().loudness() - fir_db = signal.clone().loudness(use_fir=True) - - assert np.allclose(iir_db, fir_db, atol=1e-2) diff --git a/tests/core/test_playback.py b/tests/core/test_playback.py deleted file mode 100644 index 3b4cdaa1..00000000 --- a/tests/core/test_playback.py +++ /dev/null @@ -1,25 +0,0 @@ -import numpy as np -from numpy.random import sample - -from audiotools import AudioSignal - - -def test_play(): - array = np.zeros((1, 100)) - AudioSignal(array, sample_rate=16000).play() - - -def test_embed(): - array = np.zeros((1, 100)) - AudioSignal(array, sample_rate=16000).embed() - AudioSignal(array, sample_rate=16000).embed(ext=".wav") - AudioSignal(array, sample_rate=16000).embed(ext=".mp3") - AudioSignal(array, sample_rate=16000).embed(ext=".wav", return_html=True) - - -def test_widget(): - array = np.zeros((1, 10000)) - AudioSignal(array, sample_rate=16000).widget() - AudioSignal(array, sample_rate=16000).widget(ext=".wav") - AudioSignal(array, sample_rate=16000).widget("Some title") - AudioSignal(array, sample_rate=16000).widget("Some title", return_html=True) diff --git a/tests/core/test_util.py b/tests/core/test_util.py deleted file mode 100644 index 3cd91b38..00000000 --- a/tests/core/test_util.py +++ /dev/null @@ -1,150 +0,0 @@ -import os -import random -import tempfile - -import numpy as np -import pytest -import torch - -from audiotools import util -from audiotools.core.audio_signal import AudioSignal - - -def test_check_random_state(): - # seed is None - rng_type = type(np.random.RandomState(10)) - rng = util.random_state(None) - assert type(rng) == rng_type - - # seed is int - rng = util.random_state(10) - assert type(rng) == rng_type - - # seed is RandomState - rng_test = np.random.RandomState(10) - rng = util.random_state(rng_test) - assert type(rng) == rng_type - - # seed is none of the above : error - pytest.raises(ValueError, util.random_state, "random") - - -def test_seed(): - util.seed(0) - torch_result_a = torch.randn(1) - np_result_a = np.random.randn(1) - py_result_a = random.random() - - util.seed(0, set_cudnn=True) - torch_result_b = torch.randn(1) - np_result_b = np.random.randn(1) - py_result_b = random.random() - - assert torch_result_a == torch_result_b - assert np_result_a == np_result_b - assert py_result_a == py_result_b - - -def test_hz_to_bin(): - hz = torch.from_numpy(np.array([100, 200, 300])) - sr = 1000 - n_fft = 2048 - - bins = util.hz_to_bin(hz, n_fft, sr) - - assert (((bins / n_fft) * sr) - hz).abs().max() < 1 - - -def test_find_audio(): - wav_files = util.find_audio("tests/", ["wav"]) - for a in wav_files: - assert "wav" in str(a) - - audio_files = util.find_audio("tests/", ["flac"]) - assert not audio_files - - # Make sure it works with single audio files - audio_files = util.find_audio("tests/audio/spk//f10_script4_produced.wav") - - # Make sure it works with globs - audio_files = util.find_audio("tests/**/*.wav") - assert len(audio_files) == len(wav_files) - - -def test_chdir(): - with tempfile.TemporaryDirectory(suffix="tmp") as d: - with util.chdir(d): - assert os.path.samefile(d, os.path.realpath(".")) - - -def test_prepare_batch(): - batch = {"tensor": torch.randn(1), "non_tensor": np.random.randn(1)} - util.prepare_batch(batch) - - batch = torch.randn(1) - util.prepare_batch(batch) - - batch = [torch.randn(1), np.random.randn(1)] - util.prepare_batch(batch) - - -def test_sample_dist(): - state = util.random_state(0) - v1 = state.uniform(0.0, 1.0) - v2 = util.sample_from_dist(("uniform", 0.0, 1.0), 0) - assert v1 == v2 - - assert util.sample_from_dist(("const", 1.0)) == 1.0 - - dist_tuple = ("choice", [8, 16, 32]) - assert util.sample_from_dist(dist_tuple) in [8, 16, 32] - - -def test_collate(): - batch_size = 16 - - def _one_item(): - return { - "signal": AudioSignal(torch.randn(1, 1, 44100), 44100), - "tensor": torch.randn(1), - "string": "Testing", - "dict": { - "nested_signal": AudioSignal(torch.randn(1, 1, 44100), 44100), - }, - } - - items = [_one_item() for _ in range(batch_size)] - collated = util.collate(items) - - assert collated["signal"].batch_size == batch_size - assert collated["tensor"].shape[0] == batch_size - assert len(collated["string"]) == batch_size - assert collated["dict"]["nested_signal"].batch_size == batch_size - - # test collate with splitting (evenly) - batch_size = 16 - n_splits = 4 - - items = [_one_item() for _ in range(batch_size)] - collated = util.collate(items, n_splits=n_splits) - - for x in collated: - assert x["signal"].batch_size == batch_size // n_splits - assert x["tensor"].shape[0] == batch_size // n_splits - assert len(x["string"]) == batch_size // n_splits - assert x["dict"]["nested_signal"].batch_size == batch_size // n_splits - - # test collate with splitting (unevenly) - batch_size = 15 - n_splits = 4 - - items = [_one_item() for _ in range(batch_size)] - collated = util.collate(items, n_splits=n_splits) - - tlen = [4, 4, 4, 3] - - for x, t in zip(collated, tlen): - assert x["signal"].batch_size == t - assert x["tensor"].shape[0] == t - assert len(x["string"]) == t - assert x["dict"]["nested_signal"].batch_size == t diff --git a/tests/core/test_whisper.py b/tests/core/test_whisper.py deleted file mode 100644 index b8d8c193..00000000 --- a/tests/core/test_whisper.py +++ /dev/null @@ -1,38 +0,0 @@ -import os -import random -import tempfile - -import numpy as np -import pytest -import torch - -from audiotools import util -from audiotools.core.audio_signal import AudioSignal - - -def test_whisper_features(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=10) - - input_features = signal.get_whisper_features() - - assert input_features.dtype == torch.float32 - assert input_features.shape == (1, 80, 3000) # (batch, channels, seq_len) - - -def test_whisper_transcript(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=10) - - transcript = signal.get_whisper_transcript() - - assert transcript.startswith("<|startoftranscript|>") - - -def test_whisper_embeddings(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=10) - embeddings = signal.get_whisper_embeddings() - - assert embeddings.dtype == torch.float32 - assert embeddings.shape == (1, 1500, 512) # (batch, seq_len, hidden_size) diff --git a/tests/data/test_datasets.py b/tests/data/test_datasets.py deleted file mode 100644 index ca47d1c2..00000000 --- a/tests/data/test_datasets.py +++ /dev/null @@ -1,218 +0,0 @@ -import tempfile -from pathlib import Path - -import numpy as np -import pytest -import torch - -import audiotools -from audiotools.data import transforms as tfm - - -def test_align_lists(): - input_lists = [ - ["a/1.wav", "b/1.wav", "c/1.wav", "d/1.wav"], - ["a/2.wav", "c/2.wav"], - ["c/3.wav"], - ] - target_lists = [ - ["a/1.wav", "b/1.wav", "c/1.wav", "d/1.wav"], - ["a/2.wav", "none", "c/2.wav", "none"], - ["none", "none", "c/3.wav", "none"], - ] - - def _preprocess(lists): - output = [] - for x in lists: - output.append([]) - for y in x: - output[-1].append({"path": y}) - return output - - input_lists = _preprocess(input_lists) - target_lists = _preprocess(target_lists) - - aligned_lists = audiotools.datasets.align_lists(input_lists) - assert target_lists == aligned_lists - - -def test_audio_dataset(): - transform = tfm.Compose( - [ - tfm.VolumeNorm(), - tfm.Silence(prob=0.5), - ], - ) - loader = audiotools.data.datasets.AudioLoader( - sources=["tests/audio/spk.csv"], - transform=transform, - ) - dataset = audiotools.data.datasets.AudioDataset( - loader, - 44100, - n_examples=100, - transform=transform, - ) - dataloader = torch.utils.data.DataLoader( - dataset, - batch_size=16, - num_workers=0, - collate_fn=dataset.collate, - ) - for batch in dataloader: - kwargs = batch["transform_args"] - signal = batch["signal"] - original = signal.clone() - - signal = dataset.transform(signal, **kwargs) - original = dataset.transform(original, **kwargs) - - mask = kwargs["Compose"]["1.Silence"]["mask"] - - zeros_ = torch.zeros_like(signal[mask].audio_data) - original_ = original[~mask].audio_data - - assert torch.allclose(signal[mask].audio_data, zeros_) - assert torch.allclose(signal[~mask].audio_data, original_) - - -def test_aligned_audio_dataset(): - with tempfile.TemporaryDirectory() as d: - dataset_dir = Path(d) - audiotools.util.generate_chord_dataset( - max_voices=8, num_items=3, output_dir=dataset_dir - ) - loaders = [ - audiotools.data.datasets.AudioLoader([dataset_dir / f"track_{i}"]) - for i in range(3) - ] - dataset = audiotools.data.datasets.AudioDataset( - loaders, 44100, n_examples=1000, aligned=True, shuffle_loaders=True - ) - dataloader = torch.utils.data.DataLoader( - dataset, - batch_size=16, - num_workers=0, - collate_fn=dataset.collate, - ) - - # Make sure the voice tracks are aligned. - for batch in dataloader: - paths = [] - for i in range(len(loaders)): - _paths = [p.split("/")[-1] for p in batch[i]["path"]] - paths.append(_paths) - paths = np.array(paths) - for i in range(paths.shape[1]): - col = paths[:, i] - col = col[col != "none"] - assert np.all(col == col[0]) - - -def test_loader_without_replacement(): - with tempfile.TemporaryDirectory() as d: - dataset_dir = Path(d) - num_items = 100 - audiotools.util.generate_chord_dataset( - max_voices=1, - num_items=num_items, - output_dir=dataset_dir, - duration=0.01, - ) - loader = audiotools.data.datasets.AudioLoader([dataset_dir], shuffle=False) - dataset = audiotools.data.datasets.AudioDataset(loader, 44100) - - for idx in range(num_items): - item = dataset[idx] - assert item["item_idx"] == idx - - -def test_loader_with_replacement(): - with tempfile.TemporaryDirectory() as d: - dataset_dir = Path(d) - num_items = 100 - audiotools.util.generate_chord_dataset( - max_voices=1, - num_items=num_items, - output_dir=dataset_dir, - duration=0.01, - ) - loader = audiotools.data.datasets.AudioLoader([dataset_dir]) - dataset = audiotools.data.datasets.AudioDataset( - loader, 44100, without_replacement=False - ) - - for idx in range(num_items): - item = dataset[idx] - - -def test_loader_out_of_range(): - with tempfile.TemporaryDirectory() as d: - dataset_dir = Path(d) - num_items = 100 - audiotools.util.generate_chord_dataset( - max_voices=1, - num_items=num_items, - output_dir=dataset_dir, - duration=0.01, - ) - loader = audiotools.data.datasets.AudioLoader([dataset_dir]) - - item = loader( - sample_rate=44100, - duration=0.01, - state=audiotools.util.random_state(0), - source_idx=0, - item_idx=101, - ) - assert item["path"] == "none" - - -def test_dataset_pipeline(): - transform = tfm.Compose( - [ - tfm.RoomImpulseResponse(sources=["tests/audio/irs.csv"]), - tfm.BackgroundNoise(sources=["tests/audio/noises.csv"]), - ] - ) - loader = audiotools.data.datasets.AudioLoader(sources=["tests/audio/spk.csv"]) - dataset = audiotools.data.datasets.AudioDataset( - loader, - 44100, - n_examples=10, - transform=transform, - ) - dataloader = torch.utils.data.DataLoader( - dataset, num_workers=0, batch_size=1, collate_fn=dataset.collate - ) - for batch in dataloader: - batch = audiotools.core.util.prepare_batch(batch, device="cpu") - kwargs = batch["transform_args"] - signal = batch["signal"] - batch = dataset.transform(signal, **kwargs) - - -class NumberDataset: - def __init__(self): - pass - - def __len__(self): - return 10 - - def __getitem__(self, idx): - return {"idx": idx} - - -def test_concat_dataset(): - d1 = NumberDataset() - d2 = NumberDataset() - d3 = NumberDataset() - - d = audiotools.datasets.ConcatDataset([d1, d2, d3]) - x = d.collate([d[i] for i in range(len(d))])["idx"].tolist() - - t = [] - for i in range(10): - t += [i, i, i] - - assert x == t diff --git a/tests/data/test_preprocess.py b/tests/data/test_preprocess.py deleted file mode 100644 index d5e25a8b..00000000 --- a/tests/data/test_preprocess.py +++ /dev/null @@ -1,27 +0,0 @@ -import tempfile -from pathlib import Path - -from audiotools.core.util import find_audio -from audiotools.core.util import read_sources -from audiotools.data import preprocess - - -def test_create_csv(): - with tempfile.NamedTemporaryFile(suffix=".csv") as f: - preprocess.create_csv( - find_audio("./tests/audio/spk", ext=["wav"]), f.name, loudness=True - ) - - -def test_create_csv_with_empty_rows(): - audio_files = find_audio("./tests/audio/spk", ext=["wav"]) - audio_files.insert(0, "") - audio_files.insert(2, "") - - with tempfile.NamedTemporaryFile(suffix=".csv") as f: - preprocess.create_csv(audio_files, f.name, loudness=True) - - audio_files = read_sources([f.name], remove_empty=True) - assert len(audio_files[0]) == 1 - audio_files = read_sources([f.name], remove_empty=False) - assert len(audio_files[0]) == 3 diff --git a/tests/data/test_transforms.py b/tests/data/test_transforms.py deleted file mode 100644 index f46a1b0f..00000000 --- a/tests/data/test_transforms.py +++ /dev/null @@ -1,439 +0,0 @@ -from pathlib import Path - -import numpy as np -import pytest -import torch - -import audiotools -from audiotools import AudioSignal -from audiotools import util -from audiotools.data import transforms as tfm -from audiotools.data.datasets import AudioDataset - -non_deterministic_transforms = ["TimeNoise", "FrequencyNoise"] -transforms_to_test = [] -for x in dir(tfm): - if hasattr(getattr(tfm, x), "transform"): - if x not in ["Compose", "Choose", "Repeat", "RepeatUpTo"]: - transforms_to_test.append(x) - - -def _compare_transform(transform_name, signal): - regression_data = Path(f"tests/regression/transforms/{transform_name}.wav") - regression_data.parent.mkdir(exist_ok=True, parents=True) - - if regression_data.exists(): - regression_signal = AudioSignal(regression_data) - assert torch.allclose( - signal.audio_data, regression_signal.audio_data, atol=1e-4 - ) - else: - signal.write(regression_data) - - -@pytest.mark.parametrize("transform_name", transforms_to_test) -def test_transform(transform_name): - seed = 0 - util.seed(seed) - transform_cls = getattr(tfm, transform_name) - - kwargs = {} - if transform_name == "BackgroundNoise": - kwargs["sources"] = ["tests/audio/noises.csv"] - if transform_name == "RoomImpulseResponse": - kwargs["sources"] = ["tests/audio/irs.csv"] - if transform_name == "CrossTalk": - kwargs["sources"] = ["tests/audio/spk.csv"] - - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=2) - signal.metadata["loudness"] = AudioSignal(audio_path).ffmpeg_loudness().item() - transform = transform_cls(prob=1.0, **kwargs) - - kwargs = transform.instantiate(seed, signal) - for k in kwargs[transform_name]: - assert k in transform.keys - - output = transform(signal, **kwargs) - assert isinstance(output, AudioSignal) - - _compare_transform(transform_name, output) - - if transform_name in non_deterministic_transforms: - return - - # Test that if you make a batch of signals and call it, - # the first item in the batch is still the same as above. - batch_size = 4 - signal = AudioSignal(audio_path, offset=10, duration=2) - signal_batch = AudioSignal.batch([signal.clone() for _ in range(batch_size)]) - signal_batch.metadata["loudness"] = AudioSignal(audio_path).ffmpeg_loudness().item() - - states = [seed + idx for idx in list(range(batch_size))] - kwargs = transform.batch_instantiate(states, signal_batch) - batch_output = transform(signal_batch, **kwargs) - - assert batch_output[0] == output - - ## Test that you can apply transform with the same args twice. - signal = AudioSignal(audio_path, offset=10, duration=2) - signal.metadata["loudness"] = AudioSignal(audio_path).ffmpeg_loudness().item() - kwargs = transform.instantiate(seed, signal) - output_a = transform(signal.clone(), **kwargs) - output_b = transform(signal.clone(), **kwargs) - - assert output_a == output_b - - -def test_compose_basic(): - seed = 0 - - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=2) - transform = tfm.Compose( - [ - tfm.RoomImpulseResponse(sources=["tests/audio/irs.csv"]), - tfm.BackgroundNoise(sources=["tests/audio/noises.csv"]), - ], - ) - - kwargs = transform.instantiate(seed, signal) - output = transform(signal, **kwargs) - - _compare_transform("Compose", output) - - assert isinstance(transform[0], tfm.RoomImpulseResponse) - assert isinstance(transform[1], tfm.BackgroundNoise) - assert len(transform) == 2 - - # Make sure __iter__ works - for _tfm in transform: - pass - - -class MulTransform(tfm.BaseTransform): - def __init__(self, num, name=None): - self.num = num - super().__init__(name=name, keys=["num"]) - - def _transform(self, signal, num): - signal.audio_data = signal.audio_data * num[:, None, None] - return signal - - def _instantiate(self, state): - return {"num": self.num} - - -def test_compose_with_duplicate_transforms(): - muls = [0.5, 0.25, 0.125] - transform = tfm.Compose([MulTransform(x) for x in muls]) - full_mul = np.prod(muls) - - kwargs = transform.instantiate(0) - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=2) - - output = transform(signal.clone(), **kwargs) - expected_output = signal.audio_data * full_mul - - assert torch.allclose(output.audio_data, expected_output) - - -def test_nested_compose(): - muls = [0.5, 0.25, 0.125] - transform = tfm.Compose( - [ - MulTransform(muls[0]), - tfm.Compose([MulTransform(muls[1]), tfm.Compose([MulTransform(muls[2])])]), - ] - ) - full_mul = np.prod(muls) - - kwargs = transform.instantiate(0) - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=2) - - output = transform(signal.clone(), **kwargs) - expected_output = signal.audio_data * full_mul - - assert torch.allclose(output.audio_data, expected_output) - - -def test_compose_filtering(): - muls = [0.5, 0.25, 0.125] - transform = tfm.Compose([MulTransform(x, name=str(x)) for x in muls]) - - kwargs = transform.instantiate(0) - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=2) - - for s in range(len(muls)): - for _ in range(10): - _muls = np.random.choice(muls, size=s, replace=False).tolist() - full_mul = np.prod(_muls) - with transform.filter(*[str(x) for x in _muls]): - output = transform(signal.clone(), **kwargs) - - expected_output = signal.audio_data * full_mul - assert torch.allclose(output.audio_data, expected_output) - - -def test_sequential_compose(): - muls = [0.5, 0.25, 0.125] - transform = tfm.Compose( - [ - tfm.Compose([MulTransform(muls[0])]), - tfm.Compose([MulTransform(muls[1]), MulTransform(muls[2])]), - ] - ) - full_mul = np.prod(muls) - - kwargs = transform.instantiate(0) - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=2) - - output = transform(signal.clone(), **kwargs) - expected_output = signal.audio_data * full_mul - - assert torch.allclose(output.audio_data, expected_output) - - -def test_choose_basic(): - seed = 0 - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=2) - transform = tfm.Choose( - [ - tfm.RoomImpulseResponse(sources=["tests/audio/irs.csv"]), - tfm.BackgroundNoise(sources=["tests/audio/noises.csv"]), - ] - ) - - kwargs = transform.instantiate(seed, signal) - output = transform(signal.clone(), **kwargs) - - _compare_transform("Choose", output) - - transform = tfm.Choose( - [ - MulTransform(0.0), - MulTransform(2.0), - ] - ) - targets = [signal.clone() * 0.0, signal.clone() * 2.0] - - for seed in range(10): - kwargs = transform.instantiate(seed, signal) - output = transform(signal.clone(), **kwargs) - - assert any([output == target for target in targets]) - - # Test that if you make a batch of signals and call it, - # the first item in the batch is still the same as above. - batch_size = 4 - signal = AudioSignal(audio_path, offset=10, duration=2) - signal_batch = AudioSignal.batch([signal.clone() for _ in range(batch_size)]) - - states = [seed + idx for idx in list(range(batch_size))] - kwargs = transform.batch_instantiate(states, signal_batch) - batch_output = transform(signal_batch, **kwargs) - - for nb in range(batch_size): - assert batch_output[nb] in targets - - -def test_choose_weighted(): - seed = 0 - audio_path = "tests/audio/spk/f10_script4_produced.wav" - transform = tfm.Choose( - [ - MulTransform(0.0), - MulTransform(2.0), - ], - weights=[0.0, 1.0], - ) - - # Test that if you make a batch of signals and call it, - # the first item in the batch is still the same as above. - batch_size = 4 - signal = AudioSignal(audio_path, offset=10, duration=2) - signal_batch = AudioSignal.batch([signal.clone() for _ in range(batch_size)]) - - targets = [signal.clone() * 0.0, signal.clone() * 2.0] - - states = [seed + idx for idx in list(range(batch_size))] - kwargs = transform.batch_instantiate(states, signal_batch) - batch_output = transform(signal_batch, **kwargs) - - for nb in range(batch_size): - assert batch_output[nb] == targets[1] - - -def test_choose_with_compose(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=2) - - transform = tfm.Choose( - [ - tfm.Compose([MulTransform(0.0)]), - tfm.Compose([MulTransform(2.0)]), - ] - ) - - targets = [signal.clone() * 0.0, signal.clone() * 2.0] - - for seed in range(10): - kwargs = transform.instantiate(seed, signal) - output = transform(signal, **kwargs) - - assert output in targets - - -def test_repeat(): - seed = 0 - audio_path = "tests/audio/spk/f10_script4_produced.wav" - signal = AudioSignal(audio_path, offset=10, duration=2) - - kwargs = {} - kwargs["transform"] = tfm.Compose( - tfm.FrequencyMask(), - tfm.TimeMask(), - ) - kwargs["n_repeat"] = 5 - - transform = tfm.Repeat(**kwargs) - kwargs = transform.instantiate(seed, signal) - output = transform(signal.clone(), **kwargs) - - _compare_transform("Repeat", output) - - kwargs = {} - kwargs["transform"] = tfm.Compose( - tfm.FrequencyMask(), - tfm.TimeMask(), - ) - kwargs["max_repeat"] = 10 - - transform = tfm.RepeatUpTo(**kwargs) - kwargs = transform.instantiate(seed, signal) - output = transform(signal.clone(), **kwargs) - - _compare_transform("RepeatUpTo", output) - - # Make sure repeat does what it says - transform = tfm.Repeat(MulTransform(0.5), n_repeat=3) - kwargs = transform.instantiate(seed, signal) - signal = AudioSignal(torch.randn(1, 1, 100).clamp(1e-5), 44100) - output = transform(signal.clone(), **kwargs) - - scale = (output.audio_data / signal.audio_data).mean() - assert scale == (0.5**3) - - -class DummyData(torch.utils.data.Dataset): - def __init__(self, audio_path): - super().__init__() - - self.audio_path = audio_path - self.length = 100 - self.transform = tfm.Silence(prob=0.5) - - def __getitem__(self, idx): - state = util.random_state(idx) - signal = AudioSignal.salient_excerpt( - self.audio_path, state=state, duration=1.0 - ).resample(44100) - - item = self.transform.instantiate(state, signal=signal) - item["signal"] = signal - - return item - - def __len__(self): - return self.length - - -def test_masking(): - dataset = DummyData("tests/audio/spk/f10_script4_produced.wav") - dataloader = torch.utils.data.DataLoader( - dataset, - batch_size=16, - num_workers=0, - collate_fn=util.collate, - ) - for batch in dataloader: - signal = batch.pop("signal") - original = signal.clone() - - signal = dataset.transform(signal, **batch) - original = dataset.transform(original, **batch) - mask = batch["Silence"]["mask"] - - zeros_ = torch.zeros_like(signal[mask].audio_data) - original_ = original[~mask].audio_data - - assert torch.allclose(signal[mask].audio_data, zeros_) - assert torch.allclose(original[~mask].audio_data, original_) - - -def test_nested_masking(): - transform = tfm.Compose( - [ - tfm.VolumeNorm(prob=0.5), - tfm.Silence(prob=0.9), - ], - prob=0.9, - ) - - loader = audiotools.data.datasets.AudioLoader(sources=["tests/audio/spk.csv"]) - dataset = audiotools.data.datasets.AudioDataset( - loader, - 44100, - n_examples=100, - transform=transform, - ) - dataloader = torch.utils.data.DataLoader( - dataset, num_workers=0, batch_size=10, collate_fn=dataset.collate - ) - - for batch in dataloader: - batch = util.prepare_batch(batch, device="cpu") - signal = batch["signal"] - kwargs = batch["transform_args"] - with torch.no_grad(): - output = dataset.transform(signal, **kwargs) - - -def test_smoothing_edge_case(): - transform = tfm.Smoothing() - zeros = torch.zeros(1, 1, 44100) - signal = AudioSignal(zeros, 44100) - kwargs = transform.instantiate(0, signal) - output = transform(signal, **kwargs) - - assert torch.allclose(output.audio_data, zeros) - - -def test_global_volume_norm(): - signal = AudioSignal.wave(440, 1, 44100, 1) - - # signal with -inf loudness should be unchanged - signal.metadata["loudness"] = float("-inf") - - transform = tfm.GlobalVolumeNorm(db=("const", -100)) - kwargs = transform.instantiate(0, signal) - - output = transform(signal.clone(), **kwargs) - assert torch.allclose(output.samples, signal.samples) - - # signal without a loudness key should be unchanged - signal.metadata.pop("loudness") - kwargs = transform.instantiate(0, signal) - output = transform(signal.clone(), **kwargs) - assert torch.allclose(output.samples, signal.samples) - - # signal with the actual loudness should be normalized - signal.metadata["loudness"] = signal.ffmpeg_loudness() - kwargs = transform.instantiate(0, signal) - output = transform(signal.clone(), **kwargs) - assert not torch.allclose(output.samples, signal.samples) diff --git a/tests/metrics/test_distance.py b/tests/metrics/test_distance.py deleted file mode 100644 index 1bdb84ff..00000000 --- a/tests/metrics/test_distance.py +++ /dev/null @@ -1,56 +0,0 @@ -import numpy as np -import pytest -from numpy.core.fromnumeric import clip - -from audiotools import AudioSignal -from audiotools import metrics - - -@pytest.mark.parametrize("scaling", [False, True]) -@pytest.mark.parametrize("reduction", ["mean", "sum"]) -@pytest.mark.parametrize("clip_min", [None, -30]) -@pytest.mark.parametrize("zero_mean", [False, True]) -def test_sisdr(scaling, reduction, clip_min, zero_mean): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - - x = AudioSignal.excerpt(audio_path, duration=1) - y = x.deepcopy() - - loss = metrics.distance.SISDRLoss( - scaling=scaling, reduction=reduction, clip_min=clip_min, zero_mean=zero_mean - ) - - loss_val_identity = loss(x, y) - lower_thresh = -np.inf if clip_min is None else clip_min - assert np.allclose(loss_val_identity, lower_thresh) - - # Pass as tensors rather than audio signals - loss_val_identity = loss(x.audio_data, y.audio_data) - lower_thresh = -np.inf if clip_min is None else clip_min - assert np.allclose(loss_val_identity, lower_thresh) - - y = AudioSignal.excerpt(audio_path, duration=1) - - loss_val_diff = loss(x, y) - assert loss_val_diff > loss_val_identity - - -def test_l1_loss(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - - x = AudioSignal.excerpt(audio_path, duration=1) - y = x.deepcopy() - - loss = metrics.distance.L1Loss() - - loss_val_identity = loss(x, y) - assert np.allclose(loss_val_identity, 0.0) - - # Pass as tensors rather than audio signals - loss_val_identity = loss(x.audio_data, y.audio_data) - assert np.allclose(loss_val_identity, 0.0) - - y = AudioSignal.excerpt(audio_path, duration=1) - - loss_val_diff = loss(x, y) - assert loss_val_diff > loss_val_identity diff --git a/tests/metrics/test_quality.py b/tests/metrics/test_quality.py deleted file mode 100644 index b26d9cfd..00000000 --- a/tests/metrics/test_quality.py +++ /dev/null @@ -1,57 +0,0 @@ -import numpy as np -import pytest -import torch -import torchaudio - -import audiotools -from audiotools import AudioSignal -from audiotools import metrics -from audiotools.core import audio_signal - - -def test_stoi(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - - x = AudioSignal.excerpt(audio_path, duration=1, state=5) - y = x.deepcopy() - nz = AudioSignal(torch.rand_like(x.audio_data), x.sample_rate) - nz.normalize(-24) - - loss_val_identity = metrics.quality.stoi(x, y) - assert np.allclose(loss_val_identity, 1.0) - - y = AudioSignal.excerpt(audio_path, duration=1, state=0) - - loss_val_diff = metrics.quality.stoi(x, y) - assert loss_val_diff < loss_val_identity - - old_stoi = 1.0 - for snr in [50, 25, 10, 5, 0, -10, -20]: - estimate = x.deepcopy().mix(nz.deepcopy(), snr=snr) - new_stoi = metrics.quality.stoi(estimate, x) - assert new_stoi < old_stoi - old_stoi = new_stoi - - -def test_pesq(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - - x = AudioSignal.excerpt(audio_path, duration=1, offset=5, state=5) - y = x.deepcopy() - nz = AudioSignal(torch.rand_like(x.audio_data), x.sample_rate) - nz.normalize(-24) - - loss_val_identity = metrics.quality.pesq(x, y) - assert loss_val_identity > 3.0 - - y = AudioSignal.excerpt(audio_path, duration=1, offset=5, state=0) - - loss_val_diff = metrics.quality.pesq(x, y) - assert loss_val_diff < loss_val_identity - - old_pesq = loss_val_identity - for snr in [50, 25, 10, 0, -10, -20]: - estimate = x.deepcopy().mix(nz.deepcopy(), snr=snr) - new_pesq = metrics.quality.pesq(estimate, x) - assert new_pesq < old_pesq - old_pesq = new_pesq diff --git a/tests/metrics/test_spectral.py b/tests/metrics/test_spectral.py deleted file mode 100644 index ae9e83de..00000000 --- a/tests/metrics/test_spectral.py +++ /dev/null @@ -1,67 +0,0 @@ -import numpy as np - -from audiotools import AudioSignal -from audiotools import metrics - - -def test_multiscale_stft(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - - x = AudioSignal.excerpt(audio_path, duration=1) - y = x.deepcopy() - - loss = metrics.spectral.MultiScaleSTFTLoss() - - loss_val_identity = loss(x, y) - assert np.allclose(loss_val_identity, 0) - - y = AudioSignal.excerpt(audio_path, duration=1) - - loss_val_diff = loss(x, y) - assert loss_val_diff > loss_val_identity - - # Using SI-SDR Loss - y = x.deepcopy() - loss = metrics.spectral.MultiScaleSTFTLoss(loss_fn=metrics.distance.SISDRLoss()) - - loss_val_identity = loss(x, y) - assert np.allclose(loss_val_identity, -np.inf) - - y = AudioSignal.excerpt(audio_path, duration=1) - - loss_val_diff = loss(x, y) - assert loss_val_diff > loss_val_identity - - -def test_mel_spectrogram_loss(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - - x = AudioSignal.excerpt(audio_path, duration=1) - y = x.deepcopy() - - loss = metrics.spectral.MelSpectrogramLoss() - - loss_val_identity = loss(x, y) - assert np.allclose(loss_val_identity, 0) - - y = AudioSignal.excerpt(audio_path, duration=1) - - loss_val_diff = loss(x, y) - assert loss_val_diff > loss_val_identity - - -def test_phase_loss(): - audio_path = "tests/audio/spk/f10_script4_produced.wav" - - x = AudioSignal.excerpt(audio_path, duration=1) - y = x.deepcopy() - - loss = metrics.spectral.PhaseLoss() - - loss_val_identity = loss(x, y) - assert np.allclose(loss_val_identity, 0) - - y = AudioSignal.excerpt(audio_path, duration=1) - - loss_val_diff = loss(x, y) - assert loss_val_diff > loss_val_identity diff --git a/tests/ml/test_decorators.py b/tests/ml/test_decorators.py deleted file mode 100644 index a034a72f..00000000 --- a/tests/ml/test_decorators.py +++ /dev/null @@ -1,89 +0,0 @@ -import time - -import torch -from torch.utils.tensorboard import SummaryWriter - -from audiotools.ml.decorators import timer -from audiotools.ml.decorators import Tracker -from audiotools.ml.decorators import when - - -def test_all_decorators(): - rank = 0 - max_iters = 100 - - writer = SummaryWriter("/tmp/logs") - tracker = Tracker(writer, log_file="/tmp/log.txt") - - train_data = range(100) - val_data = range(100) - - @tracker.log("train", "value", history=False) - @tracker.track("train", max_iters, tracker.step) - @timer() - def train_loop(): - i = tracker.step - time.sleep(0.01) - return { - "loss": torch.exp(torch.FloatTensor([-i / 100])), - "mel": torch.exp(torch.FloatTensor([-i / 100])), - "stft": torch.exp(torch.FloatTensor([-i / 100])), - "waveform": torch.exp(torch.FloatTensor([-i / 100])), - "not_scalar": torch.arange(10), - } - - @tracker.track("val", len(val_data)) - @timer() - def val_loop(): - i = tracker.step - time.sleep(0.01) - return { - "loss": torch.exp(torch.FloatTensor([-i / 100])), - "mel": torch.exp(torch.FloatTensor([-i / 100])), - "stft": torch.exp(torch.FloatTensor([-i / 100])), - "waveform": torch.exp(torch.FloatTensor([-i / 100])), - "not_scalar": torch.arange(10), - "string": "string", - } - - @when(lambda: tracker.step % 1000 == 0 and rank == 0) - @torch.no_grad() - def save_samples(): - tracker.print("Saving samples to TensorBoard.") - - @when(lambda: tracker.step % 100 == 0 and rank == 0) - def checkpoint(): - save_samples() - if tracker.is_best("val", "mel"): - tracker.print("Best model so far.") - tracker.print("Saving to /runs/exp1") - tracker.done("val", f"Iteration {tracker.step}") - - @when(lambda: tracker.step % 100 == 0) - @tracker.log("val", "mean") - @torch.no_grad() - def validate(): - for _ in range(len(val_data)): - output = val_loop() - return output - - with tracker.live: - for tracker.step in range(max_iters): - validate() - checkpoint() - train_loop() - - state_dict = tracker.state_dict() - tracker.load_state_dict(state_dict) - - # If train loop returned not a dict - @tracker.track("train", max_iters, tracker.step) - def train_loop_2(): - i = tracker.step - time.sleep(0.01) - - with tracker.live: - for tracker.step in range(max_iters): - validate() - checkpoint() - train_loop_2() diff --git a/tests/ml/test_experiment.py b/tests/ml/test_experiment.py deleted file mode 100644 index 2a2252d3..00000000 --- a/tests/ml/test_experiment.py +++ /dev/null @@ -1,10 +0,0 @@ -import os -import tempfile - -from audiotools.ml import Experiment - - -def test_experiment(): - with tempfile.TemporaryDirectory() as d: - with Experiment(d) as exp: - exp.snapshot() diff --git a/tests/ml/test_model.py b/tests/ml/test_model.py deleted file mode 100644 index fbb08ccc..00000000 --- a/tests/ml/test_model.py +++ /dev/null @@ -1,85 +0,0 @@ -import tempfile - -import torch -from torch import nn - -from audiotools import ml -from audiotools import util - -SEED = 0 - - -def seed_and_run(model, *args, **kwargs): - util.seed(SEED) - return model(*args, **kwargs) - - -class Model(ml.BaseModel): - def __init__(self, arg1: float = 1.0): - super().__init__() - self.arg1 = arg1 - self.linear = nn.Linear(1, 1) - - def forward(self, x): - return self.linear(x) - - -class OtherModel(ml.BaseModel): - def __init__(self): - super().__init__() - self.linear = nn.Linear(1, 1) - - def forward(self, x): - return self.linear(x) - - -def test_base_model(): - # Save and load - ml.BaseModel.EXTERN += ["test_model"] - - x = torch.randn(10, 1) - model1 = Model() - - assert model1.device == torch.device("cpu") - - out1 = seed_and_run(model1, x) - - with tempfile.NamedTemporaryFile(suffix=".pth") as f: - model1.save( - f.name, - ) - model2 = Model.load(f.name) - out2 = seed_and_run(model2, x) - assert torch.allclose(out1, out2) - - # test re-export - model2.save(f.name) - model3 = Model.load(f.name) - out3 = seed_and_run(model3, x) - assert torch.allclose(out1, out3) - - # make sure legacy/save load works - model1.save(f.name, package=False) - model2 = Model.load(f.name) - out2 = seed_and_run(model2, x) - assert torch.allclose(out1, out2) - - # make sure new way -> legacy save -> legacy load works - model1.save(f.name, package=True) - model2 = Model.load(f.name) - model2.save(f.name, package=False) - model3 = Model.load(f.name) - out3 = seed_and_run(model3, x) - - # save/load without package, but with model2 being a model - # without an argument of arg1 to its instantiation. - model1.save(f.name, package=False) - model2 = OtherModel.load(f.name) - out2 = seed_and_run(model2, x) - assert torch.allclose(out1, out2) - - assert torch.allclose(out1, out3) - - with tempfile.TemporaryDirectory() as d: - model1.save_to_folder(d, {"data": 1.0}) - Model.load_from_folder(d) diff --git a/tests/profilers/profile_load.py b/tests/profilers/profile_load.py deleted file mode 100644 index a948a7dc..00000000 --- a/tests/profilers/profile_load.py +++ /dev/null @@ -1,102 +0,0 @@ -import tempfile -import timeit - -import librosa -import torch -import torchaudio - -from audiotools import AudioSignal -from audiotools import util - - -class LibrosaSignal(AudioSignal): - def load_from_file(self, audio_path, offset, duration, device=None): - data, sample_rate = librosa.load( - audio_path, - offset=offset, - duration=duration, - sr=None, - ) - data = torch.from_numpy(data) - while data.ndim < 3: - data = data.unsqueeze(0) - - self.audio_data = data - self.original_signal_length = self.signal_length - - self.sample_rate = sample_rate - self.path_to_file = audio_path - return self.to(device) - - -class TorchSignal(AudioSignal): - def load_from_file(self, audio_path, offset, duration, device=None): - info = util.info(audio_path) - sample_rate = info.sample_rate - - frame_offset = min(int(sample_rate * offset), info.num_frames) - if duration is not None: - num_frames = min(int(sample_rate * duration), info.num_frames) - else: - num_frames = info.num_frames - - # Compatible with torchaudio 0.7.2 and 0.8.1. - torchaudio_version_070 = "0.7" in torchaudio.__version__ - kwargs = { - "offset" if torchaudio_version_070 else "frame_offset": frame_offset, - "num_frames": num_frames, - } - - data, sample_rate = torchaudio.load(audio_path, **kwargs) - while data.ndim < 3: - data = data.unsqueeze(0) - - self.audio_data = data - self.original_signal_length = self.signal_length - - self.sample_rate = sample_rate - self.path_to_file = audio_path - return self.to(device) - - -def profile_salient_excerpt(filename: str, duration: float, num_tries: int): - def func(): - signal = AudioSignal.salient_excerpt( - filename, num_tries=num_tries, duration=duration, loudness_cutoff=-40 - ) - - print(f"-------------------") - print(f"Profiling salient excerpt from {filename} with {num_tries} tries") - time = timeit.timeit(func, number=10) - print(f"Total time: {time}") - print(f"Time per try: {time / num_tries}") - print() - - -# Load 2 second excerpt from a 2 hour file -with tempfile.NamedTemporaryFile(suffix=".wav") as f: - signal = AudioSignal(torch.randn(44100 * 60 * 60), 44100) - signal.write(f.name) - - def func(): - LibrosaSignal.excerpt(f.name, duration=2.0) - - librosa_time = timeit.timeit(func, number=10) - - def func(): - TorchSignal.excerpt(f.name, duration=2.0) - - torch_time = timeit.timeit(func, number=10) - - print(f"Librosa loading took {librosa_time}") - print(f"Torch loading took {torch_time}") - - print(f"Librosa is {torch_time / librosa_time}x faster than Torch") - - signal = AudioSignal.zeros(2 * 60 * 60, 48000) - signal.write(f.name) - - profile_salient_excerpt(f.name, 5.0, 4) - profile_salient_excerpt(f.name, 5.0, 8) - profile_salient_excerpt(f.name, 5.0, 10) - profile_salient_excerpt(f.name, 5.0, 12) diff --git a/tests/profilers/profile_loudness.py b/tests/profilers/profile_loudness.py deleted file mode 100644 index 460a5724..00000000 --- a/tests/profilers/profile_loudness.py +++ /dev/null @@ -1,88 +0,0 @@ -import time - -import numpy as np -import torch -from flatten_dict import flatten -from flatten_dict import unflatten -from rich.console import Console -from rich.progress import track -from rich.table import Table - -from audiotools import AudioSignal -from audiotools.core import util -from audiotools.data.datasets import AudioDataset -from audiotools.data.datasets import AudioLoader - - -def collate(list_of_dicts): - # Flatten the dictionaries to avoid recursion. - list_of_dicts = [flatten(d) for d in list_of_dicts] - dict_of_lists = {k: [dic[k] for dic in list_of_dicts] for k in list_of_dicts[0]} - - batch = {} - for k, v in dict_of_lists.items(): - if isinstance(v, list): - if all(isinstance(s, AudioSignal) for s in v): - batch[k] = AudioSignal.batch(v, pad_signals=True) - else: - # Borrow the default collate fn from torch. - batch[k] = torch.utils.data._utils.collate.default_collate(v) - return unflatten(batch) - - -def run(batch_size=64, duration=5.0, device="cuda"): - loader = AudioLoader(sources=["tests/audio/spk.csv"]) - dataset = AudioDataset( - loader, - 44100, - 10 * batch_size, - duration, - ) - dataloader = torch.utils.data.DataLoader( - dataset, num_workers=16, batch_size=batch_size, collate_fn=collate - ) - - timings = [] - end_to_end_time = None - - for batch in track(dataloader, "Generating data"): - batch = util.prepare_batch(batch, device=device) - # This skips the load time of the first batch. - if end_to_end_time is None: - end_to_end_time = time.time() - with torch.no_grad(): - start_time = time.time() - batch["signal"].loudness() - torch.cuda.synchronize() - elapsed = time.time() - start_time - timings.append(elapsed) - - total_time = time.time() - end_to_end_time - loudness_time = np.mean(timings) - - stats = { - "n_batches": len(dataloader), - "batch_size": batch_size, - "duration": duration, - "device": device, - "loudness_time": loudness_time, - "total_time": total_time, - "items_per_sec": (len(dataset) - batch_size) / total_time, - } - - table = Table(expand=False) - - for k, v in stats.items(): - row_args = [ - k, - ] - row_args.append(str(v)) - table.add_row(*row_args) - - console = Console() - console.print(table) - - -if __name__ == "__main__": - run(64, 5.0, "cpu") - run(64, 5.0, "cuda") diff --git a/tests/profilers/profile_speed.py b/tests/profilers/profile_speed.py deleted file mode 100644 index 24994830..00000000 --- a/tests/profilers/profile_speed.py +++ /dev/null @@ -1,79 +0,0 @@ -import time - -import numpy as np -import torch -from rich.console import Console -from rich.progress import track -from rich.table import Table - -from audiotools import AudioSignal -from audiotools.core import util -from audiotools.data import transforms as tfm -from audiotools.data.datasets import AudioDataset -from audiotools.data.datasets import AudioLoader - - -def run(batch_size=64, duration=5.0, device="cuda"): - transform = tfm.Compose( - [ - tfm.RoomImpulseResponse(csv_files=["tests/audio/irs.csv"]), - tfm.BackgroundNoise(csv_files=["tests/audio/noises.csv"]), - ] - ) - loader = AudioLoader(sources=["tests/audio/spk.csv"]) - dataset = AudioDataset( - loader, - 44100, - n_examples=1000, - duration=duration, - transform=transform, - ) - dataloader = torch.utils.data.DataLoader( - dataset, num_workers=16, batch_size=batch_size, collate_fn=dataset.collate - ) - - timings = [] - end_to_end_time = None - - for batch in track(dataloader, "Generating data"): - batch = util.prepare_batch(batch, device=device) - # This skips the load time of the first batch. - if end_to_end_time is None: - end_to_end_time = time.time() - with torch.no_grad(): - start_time = time.time() - batch = dataset.transform(batch) - elapsed = time.time() - start_time - timings.append(elapsed) - - total_time = time.time() - end_to_end_time - transform_time = np.mean(timings) - - stats = { - "n_batches": len(dataloader), - "batch_size": batch_size, - "duration": duration, - "device": device, - "transform_time": transform_time, - "total_time": total_time, - "items_per_sec": (len(dataset) - batch_size) / total_time, - } - - table = Table(expand=False) - - for k, v in stats.items(): - row_args = [ - k, - ] - row_args.append(str(v)) - table.add_row(*row_args) - - console = Console() - console.print(table) - - -if __name__ == "__main__": - run(64, 0.5, "cpu") - run(64, 0.5, "cuda") - run(64, 2.0, "cuda") - run(64, 5.0, "cuda") diff --git a/tests/profilers/profile_transforms.py b/tests/profilers/profile_transforms.py deleted file mode 100644 index a2c8087e..00000000 --- a/tests/profilers/profile_transforms.py +++ /dev/null @@ -1,73 +0,0 @@ -import time - -import numpy as np -import torch -from rich.console import Console -from rich.progress import track -from rich.table import Table - -from audiotools import AudioSignal -from audiotools.core import util -from audiotools.data import transforms as tfm -from audiotools.data.datasets import AudioDataset -from audiotools.data.datasets import AudioLoader - -transforms_to_demo = [] -for x in dir(tfm): - if hasattr(getattr(tfm, x), "transform"): - if x not in ["Compose", "Choose", "Repeat", "RepeatUpTo"]: - transforms_to_demo.append(x) - - -def run(batch_size=64, duration=5.0, device="cuda"): - times = {} - - for transform_name in track(transforms_to_demo): - kwargs = {} - if transform_name == "BackgroundNoise": - kwargs["sources"] = ["tests/audio/noises.csv"] - if transform_name == "RoomImpulseResponse": - kwargs["sources"] = ["tests/audio/irs.csv"] - if "Quantization" in transform_name: - kwargs["channels"] = ("choice", [8, 16, 32]) - - transform_cls = getattr(tfm, transform_name) - t = transform_cls(prob=1.0, **kwargs) - - loader = AudioLoader(sources=["tests/audio/spk.csv"]) - dataset = AudioDataset( - loader, - 44100, - batch_size * 10, - duration, - transform=t, - ) - dataloader = torch.utils.data.DataLoader( - dataset, num_workers=0, batch_size=batch_size, collate_fn=dataset.collate - ) - batch = next(iter(dataloader)) - batch = util.prepare_batch(batch, device) - - with torch.no_grad(): - start_time = time.time() - output = t(batch["signal"], **batch["transform_args"]) - torch.cuda.synchronize() - elapsed = time.time() - start_time - - times[transform_name] = elapsed - - table = Table(expand=False) - - for k, v in times.items(): - row_args = [ - k, - ] - row_args.append(str(v)) - table.add_row(*row_args) - - console = Console() - console.print(table) - - -if __name__ == "__main__": - run(64, 5.0, "cuda") diff --git a/tests/regression/transforms/BackgroundNoise.wav b/tests/regression/transforms/BackgroundNoise.wav deleted file mode 100644 index daf9cce1..00000000 --- a/tests/regression/transforms/BackgroundNoise.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3d54d33d4ebfa647cd5cfbd0b8c7b9333fc6cce3b0bba7c127373de19a276cb6 -size 176444 diff --git a/tests/regression/transforms/BaseTransform.wav b/tests/regression/transforms/BaseTransform.wav deleted file mode 100644 index 864c96bd..00000000 --- a/tests/regression/transforms/BaseTransform.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4ae9b85271fcc0cdda6ec81cc9e119179647755763d1f12e7015333547d053d9 -size 176444 diff --git a/tests/regression/transforms/Choose.wav b/tests/regression/transforms/Choose.wav deleted file mode 100644 index 0d87aa11..00000000 --- a/tests/regression/transforms/Choose.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c5bebfd1bd6ec8ab0eb26dcef0cab1b68effb11de80538ecd34bf788e501fd00 -size 176444 diff --git a/tests/regression/transforms/ClippingDistortion.wav b/tests/regression/transforms/ClippingDistortion.wav deleted file mode 100644 index 3f244af9..00000000 --- a/tests/regression/transforms/ClippingDistortion.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:373df372149380e97f1c87cd8b65749792da023ff7828229626f59dc225622ad -size 176444 diff --git a/tests/regression/transforms/Compose.wav b/tests/regression/transforms/Compose.wav deleted file mode 100644 index a62f0343..00000000 --- a/tests/regression/transforms/Compose.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4e67c650af21c88b01c1ddb5d443abef9719d373f2d434973a5a29cab8609ec2 -size 176444 diff --git a/tests/regression/transforms/CorruptPhase.wav b/tests/regression/transforms/CorruptPhase.wav deleted file mode 100644 index 9c8cbd80..00000000 --- a/tests/regression/transforms/CorruptPhase.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:46f911803d68580b038b2d6b5b91dd4e07e5ccb5bd1b1046bfff4c4234f2f180 -size 176444 diff --git a/tests/regression/transforms/CrossTalk.wav b/tests/regression/transforms/CrossTalk.wav deleted file mode 100644 index 06ada81e..00000000 --- a/tests/regression/transforms/CrossTalk.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:718ab88ce7cc2a1e184f79d18c5c45a254900a0713a8c855f6ebe892b400d1f9 -size 176444 diff --git a/tests/regression/transforms/Equalizer.wav b/tests/regression/transforms/Equalizer.wav deleted file mode 100644 index 1b2bd930..00000000 --- a/tests/regression/transforms/Equalizer.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5fb5e279ead5519f96b55e635978dfdca00409ef7804333c230beedad2a79623 -size 176444 diff --git a/tests/regression/transforms/FrequencyMask.wav b/tests/regression/transforms/FrequencyMask.wav deleted file mode 100644 index 283ed7f9..00000000 --- a/tests/regression/transforms/FrequencyMask.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:be3c30adb4801f599771fa1dcb5f59c1630edb5cc7f78ee175781284914c3b02 -size 176444 diff --git a/tests/regression/transforms/FrequencyNoise.wav b/tests/regression/transforms/FrequencyNoise.wav deleted file mode 100644 index 06adc439..00000000 --- a/tests/regression/transforms/FrequencyNoise.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19ea8e2f05a8bf0564c85623ab1b54822bdacff64fbbb6b773d74d8e267db992 -size 176444 diff --git a/tests/regression/transforms/GlobalVolumeNorm.wav b/tests/regression/transforms/GlobalVolumeNorm.wav deleted file mode 100644 index 17fb6e57..00000000 --- a/tests/regression/transforms/GlobalVolumeNorm.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd1d2949a6be1d6bd08a787ffba8363c68ddcc36e3a73110e026088f8b8c1b6f -size 176444 diff --git a/tests/regression/transforms/HighPass.wav b/tests/regression/transforms/HighPass.wav deleted file mode 100644 index dd831d7a..00000000 --- a/tests/regression/transforms/HighPass.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:be66e2972da9dc2fee575cd36b0b153c1abd89cfbd05d9e221b506bba1677575 -size 176444 diff --git a/tests/regression/transforms/Identity.wav b/tests/regression/transforms/Identity.wav deleted file mode 100644 index 864c96bd..00000000 --- a/tests/regression/transforms/Identity.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4ae9b85271fcc0cdda6ec81cc9e119179647755763d1f12e7015333547d053d9 -size 176444 diff --git a/tests/regression/transforms/InvertPhase.wav b/tests/regression/transforms/InvertPhase.wav deleted file mode 100644 index 24d09766..00000000 --- a/tests/regression/transforms/InvertPhase.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9fe26073ef22c08dcffa45731d86bd17b3e1bae3613e81889d137b8ff60c1cb1 -size 176444 diff --git a/tests/regression/transforms/LowPass.wav b/tests/regression/transforms/LowPass.wav deleted file mode 100644 index 9592447b..00000000 --- a/tests/regression/transforms/LowPass.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0e965b9dd27fa2fcde431c7be9bbb0087111a954cec67dd28060c9a0cb10326f -size 176444 diff --git a/tests/regression/transforms/MaskLowMagnitudes.wav b/tests/regression/transforms/MaskLowMagnitudes.wav deleted file mode 100644 index 7b1efa85..00000000 --- a/tests/regression/transforms/MaskLowMagnitudes.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf442aa2b861653b0388d18c7b4cd54cb8554dcef2d9fb1f4ab755d564d4c465 -size 176444 diff --git a/tests/regression/transforms/MuLawQuantization.wav b/tests/regression/transforms/MuLawQuantization.wav deleted file mode 100644 index 5b2317de..00000000 --- a/tests/regression/transforms/MuLawQuantization.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4759c209a9d64a1ea33ddbf4365f704751913538c4d6b26f715ce5b2a5ae7ac6 -size 176444 diff --git a/tests/regression/transforms/NoiseFloor.wav b/tests/regression/transforms/NoiseFloor.wav deleted file mode 100644 index b3a3e9db..00000000 --- a/tests/regression/transforms/NoiseFloor.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bec9c93df61018139f76d3f480d653a42d4c92d1b10e513efc48a7041be7b0d4 -size 176444 diff --git a/tests/regression/transforms/Quantization.wav b/tests/regression/transforms/Quantization.wav deleted file mode 100644 index f6813b82..00000000 --- a/tests/regression/transforms/Quantization.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9361bc6f3a13e94fef422cf416f98fd71fe5a211bb649ce30eb5877865ad1449 -size 176444 diff --git a/tests/regression/transforms/Repeat.wav b/tests/regression/transforms/Repeat.wav deleted file mode 100644 index b87a407c..00000000 --- a/tests/regression/transforms/Repeat.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ea4d8f7a3cd040fd436209169943aa16c85666a8736631c4a4e96626ecde79d2 -size 176444 diff --git a/tests/regression/transforms/RepeatUpTo.wav b/tests/regression/transforms/RepeatUpTo.wav deleted file mode 100644 index 5f975ef8..00000000 --- a/tests/regression/transforms/RepeatUpTo.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7e4f826c181a383154cdd4cd6208738ac6c044be59f3f0c1c00167fc473c3068 -size 176444 diff --git a/tests/regression/transforms/RescaleAudio.wav b/tests/regression/transforms/RescaleAudio.wav deleted file mode 100644 index 864c96bd..00000000 --- a/tests/regression/transforms/RescaleAudio.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4ae9b85271fcc0cdda6ec81cc9e119179647755763d1f12e7015333547d053d9 -size 176444 diff --git a/tests/regression/transforms/RoomImpulseResponse.wav b/tests/regression/transforms/RoomImpulseResponse.wav deleted file mode 100644 index 0d87aa11..00000000 --- a/tests/regression/transforms/RoomImpulseResponse.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c5bebfd1bd6ec8ab0eb26dcef0cab1b68effb11de80538ecd34bf788e501fd00 -size 176444 diff --git a/tests/regression/transforms/ShiftPhase.wav b/tests/regression/transforms/ShiftPhase.wav deleted file mode 100644 index 6d47cf21..00000000 --- a/tests/regression/transforms/ShiftPhase.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4708845550eed14fc69a125a5ef3a26953afd8438101d4f1862dfbf2c5378935 -size 176444 diff --git a/tests/regression/transforms/Silence.wav b/tests/regression/transforms/Silence.wav deleted file mode 100644 index 14b225a2..00000000 --- a/tests/regression/transforms/Silence.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cfc6b206e99b298a229480f020e61d2b22a791e08a840fdd40ef62d8bd78b155 -size 176444 diff --git a/tests/regression/transforms/Smoothing.wav b/tests/regression/transforms/Smoothing.wav deleted file mode 100644 index 838eb3e9..00000000 --- a/tests/regression/transforms/Smoothing.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd1ee5b00278f342d5926ad9f9b662cdf2176f2f17a626d4ee8f5a37393ca7b7 -size 176444 diff --git a/tests/regression/transforms/SpectralDenoising.wav b/tests/regression/transforms/SpectralDenoising.wav deleted file mode 100644 index 2412d25e..00000000 --- a/tests/regression/transforms/SpectralDenoising.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ba7fdc5bff221a8c479cc1c5db598d91d02e26b0b4544948f406729b2b4a58b2 -size 176444 diff --git a/tests/regression/transforms/SpectralTransform.wav b/tests/regression/transforms/SpectralTransform.wav deleted file mode 100644 index 30161647..00000000 --- a/tests/regression/transforms/SpectralTransform.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2e294a2d121916653e64c811d71dc07faefa8b2c8c55fba390efa511b254c836 -size 176444 diff --git a/tests/regression/transforms/TimeMask.wav b/tests/regression/transforms/TimeMask.wav deleted file mode 100644 index 6cccd25b..00000000 --- a/tests/regression/transforms/TimeMask.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1b591bf382c79f7887b3ea3ffbb1a3c9b79e8290aeccd056c07e1ff4d8730e4d -size 176444 diff --git a/tests/regression/transforms/TimeNoise.wav b/tests/regression/transforms/TimeNoise.wav deleted file mode 100644 index 09c966c3..00000000 --- a/tests/regression/transforms/TimeNoise.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b50b14ed6aa3a878398654654da5ef6fb724e75cf9b529e092dfa1c1db535962 -size 176444 diff --git a/tests/regression/transforms/VolumeChange.wav b/tests/regression/transforms/VolumeChange.wav deleted file mode 100644 index 214e333f..00000000 --- a/tests/regression/transforms/VolumeChange.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1ff621636bc3199ea91ef342df787bf20fce7ee1a5a879080b59b036e0ed985a -size 176444 diff --git a/tests/regression/transforms/VolumeNorm.wav b/tests/regression/transforms/VolumeNorm.wav deleted file mode 100644 index d589b88b..00000000 --- a/tests/regression/transforms/VolumeNorm.wav +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd6417903e035d2a0838d3ee550c626fbc228934a12bfd3e1ecf08dc7019713b -size 176444 diff --git a/tests/test_post.py b/tests/test_post.py deleted file mode 100644 index 99bcb855..00000000 --- a/tests/test_post.py +++ /dev/null @@ -1,25 +0,0 @@ -from pathlib import Path - -from audiotools import AudioSignal -from audiotools import post -from audiotools import transforms - - -def test_audio_table(): - tfm = transforms.LowPass() - - audio_dict = {} - - audio_dict["inputs"] = [ - AudioSignal.excerpt("tests/audio/spk/f10_script4_produced.wav", duration=5) - for _ in range(3) - ] - audio_dict["outputs"] = [] - for i in range(3): - x = audio_dict["inputs"][i] - - kwargs = tfm.instantiate() - output = tfm(x.clone(), **kwargs) - audio_dict["outputs"].append(output) - - post.audio_table(audio_dict) diff --git a/tests/test_preference.py b/tests/test_preference.py deleted file mode 100644 index 6f8dfe24..00000000 --- a/tests/test_preference.py +++ /dev/null @@ -1,135 +0,0 @@ -import math -import string -import tempfile -from dataclasses import dataclass -from pathlib import Path - -import gradio as gr -import numpy as np -import soundfile as sf - -from audiotools import preference as pr - - -@dataclass -class Config: - folder: str = None - save_path: str = "results.csv" - conditions: list = None - reference: str = None - seed: int = 0 - - -####################################################### -### Create audio data for debugging listening tests ### -####################################################### - - -def random_sine(f): - fs = 44100 # sampling rate, Hz, must be integer - duration = 5.0 # in seconds, may be float - - # generate samples, note conversion to float32 array - volume = 0.1 - num_samples = int(fs * duration) - samples = volume * np.sin(2 * math.pi * (f / fs) * np.arange(num_samples)) - - return samples, fs - - -def create_data(path): - path = Path(path) - hz = [110, 140, 180] - - for i in range(6): - name = f"condition_{string.ascii_lowercase[i]}" - for j in range(3): - sample_path = path / name / f"sample_{j}.wav" - sample_path.parent.mkdir(exist_ok=True, parents=True) - audio, sr = random_sine(hz[j] * (2**i)) - sf.write(sample_path, audio, sr) - - -def _test_mushra(app, config): - """Launches a MUSHRA test, with an optional reference.""" - save_path = config.save_path - samples = gr.State(pr.Samples(config.folder)) - - reference = config.reference - conditions = config.conditions - - player = pr.Player(app) - player.create() - if reference is not None: - player.add("Play Reference") - - user = pr.create_tracker(app) - ratings = [] - - with gr.Row(): - gr.HTML("") - with gr.Column(scale=9): - gr.HTML(pr.slider_mushra) - - for i in range(len(conditions)): - with gr.Row().style(equal_height=True): - x = string.ascii_uppercase[i] - player.add(f"Play {x}") - with gr.Column(scale=9): - ratings.append(gr.Slider(value=50, interactive=True)) - - def build(user, samples, *ratings): - # Filter out samples user has done already, by looking in the CSV. - samples.filter_completed(user, save_path) - - # Write results to CSV - if samples.current > 0 and len(samples.names) > 0: - start_idx = 1 if reference is not None else 0 - name = samples.names[samples.current - 1] - result = {"sample": name, "user": user} - for k, r in zip(samples.order[start_idx:], ratings): - result[k] = r - pr.save_result(result, save_path) - - updates, done, pbar = samples.get_next_sample(reference, conditions) - return updates + [gr.update(value=50) for _ in ratings] + [done, samples, pbar] - - progress = gr.HTML() - begin = gr.Button("Submit", elem_id="start-survey") - begin.click( - fn=build, - inputs=[user, samples] + ratings, - outputs=player.to_list() + ratings + [begin, samples, progress], - ).then(None, _js=pr.reset_player) - - samples = pr.Samples(config.folder) - for i in range(len(samples) + 1): - build("test", samples, 95, 85) - - -def test_preference(): - with tempfile.TemporaryDirectory() as tmpdir: - tmpdir = Path(tmpdir) - config = Config( - folder=tmpdir, - save_path=tmpdir / "results.csv", - conditions=["condition_a", "condition_b"], - reference="condition_c", - ) - - create_data(config.folder) - with gr.Blocks() as app: - _test_mushra(app, config) - _test_mushra(app, config) - - with tempfile.TemporaryDirectory() as tmpdir: - tmpdir = Path(tmpdir) - config = Config( - folder=tmpdir, - save_path=tmpdir / "results.csv", - conditions=["condition_a", "condition_b"], - ) - - create_data(config.folder) - with gr.Blocks() as app: - _test_mushra(app, config)