From fc43b5cad167b013af9e377bcbfa5a7bc671aea3 Mon Sep 17 00:00:00 2001 From: LeonardoViotti Date: Tue, 2 Jan 2024 16:39:24 -0500 Subject: [PATCH 1/9] First version of opensoundscape.seed() function. Draft fuinction that worked on my initial testing using OpSo 0.9.1, but still needs testing on 0.10.1 --- opensoundscape/seed.py | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 opensoundscape/seed.py diff --git a/opensoundscape/seed.py b/opensoundscape/seed.py new file mode 100644 index 00000000..31b9e55e --- /dev/null +++ b/opensoundscape/seed.py @@ -0,0 +1,11 @@ +""" seed.py: Set random state across different libraries for reproducibility +""" +import numpy as np +import torch +import random + +def seed(seed): + np.random.seed(seed) + random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) \ No newline at end of file From a7c8f07d34a708363c8615e5a77cf197a7e6fbdc Mon Sep 17 00:00:00 2001 From: LeonardoViotti Date: Wed, 3 Jan 2024 13:53:00 -0500 Subject: [PATCH 2/9] First version of opensoundscape.seed(). --- opensoundscape/__init__.py | 1 + opensoundscape/seed.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/opensoundscape/__init__.py b/opensoundscape/__init__.py index 55e8ecbf..bd5bed6a 100644 --- a/opensoundscape/__init__.py +++ b/opensoundscape/__init__.py @@ -26,6 +26,7 @@ from .preprocess.actions import Action from .preprocess.preprocessors import SpectrogramPreprocessor, AudioPreprocessor from .sample import AudioSample +from .seed import seed from .annotations import BoxedAnnotations from .preprocess.utils import show_tensor, show_tensor_grid from .localization import SpatialEvent, SynchronizedRecorderArray diff --git a/opensoundscape/seed.py b/opensoundscape/seed.py index 31b9e55e..0099f4f9 100644 --- a/opensoundscape/seed.py +++ b/opensoundscape/seed.py @@ -4,8 +4,12 @@ import torch import random -def seed(seed): + +def seed(seed, verbose=True): + print(f"Random state set with seed {seed}") + + torch.backends.cudnn.deterministic = True np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) \ No newline at end of file + torch.cuda.manual_seed_all(seed) From d88524f7f66a021ccf5602068a36ee79915b9e18 Mon Sep 17 00:00:00 2001 From: LeonardoViotti Date: Wed, 3 Jan 2024 13:53:32 -0500 Subject: [PATCH 3/9] Unit testing for opensoundscape.seed(). --- tests/test_seed.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 tests/test_seed.py diff --git a/tests/test_seed.py b/tests/test_seed.py new file mode 100644 index 00000000..fe872cc1 --- /dev/null +++ b/tests/test_seed.py @@ -0,0 +1,66 @@ +from opensoundscape import seed +import numpy as np +import torch +import random +from opensoundscape.ml import cnn, cnn_architectures + +import pytest + +pytestmark = pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) + + +def test_torch_rand(input): + seed(input) + tr1 = torch.rand(100) + + seed(input) + tr2 = torch.rand(100) + + seed(input + 1) + tr3 = torch.rand(100) + + assert all(tr1 == tr2) & any(tr1 != tr3) + + +def test_numpy_random_rand(input): + seed(input) + nr1 = np.random.rand(100) + + seed(input) + nr2 = np.random.rand(100) + + seed(input + 1) + nr3 = np.random.rand(100) + + assert all(nr1 == nr2) & any(nr1 != nr3) + + +def test_radom_sample(input): + list1000 = list(range(1, 1000)) + + seed(input) + rs1 = random.sample(list1000, 100) + + seed(input) + rs2 = random.sample(list1000, 100) + + seed(input + 1) + rs3 = random.sample(list1000, 100) + + assert (rs1 == rs2) & (rs1 != rs3) + + +def test_cnn(input): + seed(input) + model_resnet1 = cnn_architectures.resnet18(num_classes=10, weights=None) + lw1 = model_resnet1.layer1[0].conv1.weight + + seed(input) + model_resnet2 = cnn_architectures.resnet18(num_classes=10, weights=None) + lw2 = model_resnet2.layer1[0].conv1.weight + + seed(input + 1) + model_resnet3 = cnn_architectures.resnet18(num_classes=10, weights=None) + lw3 = model_resnet3.layer1[0].conv1.weight + + assert torch.all(lw1 == lw2) & torch.any(lw1 != lw3) From 261851f12ddf0bc04829b616270005a427be0121 Mon Sep 17 00:00:00 2001 From: LeonardoViotti Date: Wed, 3 Jan 2024 15:48:16 -0500 Subject: [PATCH 4/9] Resolved comments for PR #929 --- opensoundscape/__init__.py | 1 - opensoundscape/seed.py | 15 -------------- opensoundscape/utils.py | 25 ++++++++++++++++++++++- tests/{test_seed.py => test_set_seed.py} | 26 ++++++++++++------------ 4 files changed, 37 insertions(+), 30 deletions(-) delete mode 100644 opensoundscape/seed.py rename tests/{test_seed.py => test_set_seed.py} (80%) diff --git a/opensoundscape/__init__.py b/opensoundscape/__init__.py index bd5bed6a..55e8ecbf 100644 --- a/opensoundscape/__init__.py +++ b/opensoundscape/__init__.py @@ -26,7 +26,6 @@ from .preprocess.actions import Action from .preprocess.preprocessors import SpectrogramPreprocessor, AudioPreprocessor from .sample import AudioSample -from .seed import seed from .annotations import BoxedAnnotations from .preprocess.utils import show_tensor, show_tensor_grid from .localization import SpatialEvent, SynchronizedRecorderArray diff --git a/opensoundscape/seed.py b/opensoundscape/seed.py deleted file mode 100644 index 0099f4f9..00000000 --- a/opensoundscape/seed.py +++ /dev/null @@ -1,15 +0,0 @@ -""" seed.py: Set random state across different libraries for reproducibility -""" -import numpy as np -import torch -import random - - -def seed(seed, verbose=True): - print(f"Random state set with seed {seed}") - - torch.backends.cudnn.deterministic = True - np.random.seed(seed) - random.seed(seed) - torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) diff --git a/opensoundscape/utils.py b/opensoundscape/utils.py index a28b9ae1..f29447c1 100644 --- a/opensoundscape/utils.py +++ b/opensoundscape/utils.py @@ -9,7 +9,8 @@ import soundfile import librosa from matplotlib.colors import LinearSegmentedColormap - +import torch +import random class GetDurationError(ValueError): """raised if librosa.get_duration(path=f) causes an error""" @@ -329,3 +330,25 @@ def generate_opacity_colormaps( colormaps.append(cmap) return colormaps + + +import numpy as np +import torch +import random + + +def set_seed(seed, verbose=True): + """Set random state across different libraries for reproducibility + + Args: + seed (int): Number to fix random number generators to a specific start. + verbose (bool, optional): Print set seed. Defaults to True. + """ + if verbose: + print(f"Random state set with seed {seed}") + + torch.backends.cudnn.deterministic = True + np.random.seed(seed) + random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) \ No newline at end of file diff --git a/tests/test_seed.py b/tests/test_set_seed.py similarity index 80% rename from tests/test_seed.py rename to tests/test_set_seed.py index fe872cc1..f135ca9e 100644 --- a/tests/test_seed.py +++ b/tests/test_set_seed.py @@ -1,4 +1,4 @@ -from opensoundscape import seed +from opensoundscape.utils import set_seed import numpy as np import torch import random @@ -10,26 +10,26 @@ def test_torch_rand(input): - seed(input) + set_seed(input) tr1 = torch.rand(100) - seed(input) + set_seed(input) tr2 = torch.rand(100) - seed(input + 1) + set_seed(input + 1) tr3 = torch.rand(100) assert all(tr1 == tr2) & any(tr1 != tr3) def test_numpy_random_rand(input): - seed(input) + set_seed(input) nr1 = np.random.rand(100) - seed(input) + set_seed(input) nr2 = np.random.rand(100) - seed(input + 1) + set_seed(input + 1) nr3 = np.random.rand(100) assert all(nr1 == nr2) & any(nr1 != nr3) @@ -38,28 +38,28 @@ def test_numpy_random_rand(input): def test_radom_sample(input): list1000 = list(range(1, 1000)) - seed(input) + set_seed(input) rs1 = random.sample(list1000, 100) - seed(input) + set_seed(input) rs2 = random.sample(list1000, 100) - seed(input + 1) + set_seed(input + 1) rs3 = random.sample(list1000, 100) assert (rs1 == rs2) & (rs1 != rs3) def test_cnn(input): - seed(input) + set_seed(input) model_resnet1 = cnn_architectures.resnet18(num_classes=10, weights=None) lw1 = model_resnet1.layer1[0].conv1.weight - seed(input) + set_seed(input) model_resnet2 = cnn_architectures.resnet18(num_classes=10, weights=None) lw2 = model_resnet2.layer1[0].conv1.weight - seed(input + 1) + set_seed(input + 1) model_resnet3 = cnn_architectures.resnet18(num_classes=10, weights=None) lw3 = model_resnet3.layer1[0].conv1.weight From 87cc881ce8d1c5672fa1ffe23c820906ac9ebfdd Mon Sep 17 00:00:00 2001 From: LeonardoViotti Date: Thu, 4 Jan 2024 09:51:12 -0500 Subject: [PATCH 5/9] Reorgnized utils unit testing. --- tests/test_set_seed.py | 66 ------------------------------------------ tests/test_utils.py | 62 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 66 deletions(-) delete mode 100644 tests/test_set_seed.py diff --git a/tests/test_set_seed.py b/tests/test_set_seed.py deleted file mode 100644 index f135ca9e..00000000 --- a/tests/test_set_seed.py +++ /dev/null @@ -1,66 +0,0 @@ -from opensoundscape.utils import set_seed -import numpy as np -import torch -import random -from opensoundscape.ml import cnn, cnn_architectures - -import pytest - -pytestmark = pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) - - -def test_torch_rand(input): - set_seed(input) - tr1 = torch.rand(100) - - set_seed(input) - tr2 = torch.rand(100) - - set_seed(input + 1) - tr3 = torch.rand(100) - - assert all(tr1 == tr2) & any(tr1 != tr3) - - -def test_numpy_random_rand(input): - set_seed(input) - nr1 = np.random.rand(100) - - set_seed(input) - nr2 = np.random.rand(100) - - set_seed(input + 1) - nr3 = np.random.rand(100) - - assert all(nr1 == nr2) & any(nr1 != nr3) - - -def test_radom_sample(input): - list1000 = list(range(1, 1000)) - - set_seed(input) - rs1 = random.sample(list1000, 100) - - set_seed(input) - rs2 = random.sample(list1000, 100) - - set_seed(input + 1) - rs3 = random.sample(list1000, 100) - - assert (rs1 == rs2) & (rs1 != rs3) - - -def test_cnn(input): - set_seed(input) - model_resnet1 = cnn_architectures.resnet18(num_classes=10, weights=None) - lw1 = model_resnet1.layer1[0].conv1.weight - - set_seed(input) - model_resnet2 = cnn_architectures.resnet18(num_classes=10, weights=None) - lw2 = model_resnet2.layer1[0].conv1.weight - - set_seed(input + 1) - model_resnet3 = cnn_architectures.resnet18(num_classes=10, weights=None) - lw3 = model_resnet3.layer1[0].conv1.weight - - assert torch.all(lw1 == lw2) & torch.any(lw1 != lw3) diff --git a/tests/test_utils.py b/tests/test_utils.py index bfea9cf3..f175852a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,6 +3,9 @@ import pandas as pd import pytz import datetime +import torch +import random +from opensoundscape.ml import cnn, cnn_architectures from opensoundscape import utils @@ -168,3 +171,62 @@ def test_make_clip_df_from_label_df(silence_10s_mp3_str, metadata_wav_str): # should copy labels for each file to all clips of that file # duplicate file should have labels from _first_ occurrence in label_df assert np.array_equal(clip_df["a"].values, [0, 0, 0, 0, 2, 2]) + + + +@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) +def test_torch_rand(input): + utils.set_seed(input) + tr1 = torch.rand(100) + + utils.set_seed(input) + tr2 = torch.rand(100) + + utils.set_seed(input + 1) + tr3 = torch.rand(100) + + assert all(tr1 == tr2) & any(tr1 != tr3) + +@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) +def test_numpy_random_rand(input): + utils.set_seed(input) + nr1 = np.random.rand(100) + + utils.set_seed(input) + nr2 = np.random.rand(100) + + utils.set_seed(input + 1) + nr3 = np.random.rand(100) + + assert all(nr1 == nr2) & any(nr1 != nr3) + +@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) +def test_radom_sample(input): + list1000 = list(range(1, 1000)) + + utils.set_seed(input) + rs1 = random.sample(list1000, 100) + + utils.set_seed(input) + rs2 = random.sample(list1000, 100) + + utils.set_seed(input + 1) + rs3 = random.sample(list1000, 100) + + assert (rs1 == rs2) & (rs1 != rs3) + +@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) +def test_cnn(input): + utils.set_seed(input) + model_resnet1 = cnn_architectures.resnet18(num_classes=10, weights=None) + lw1 = model_resnet1.layer1[0].conv1.weight + + utils.set_seed(input) + model_resnet2 = cnn_architectures.resnet18(num_classes=10, weights=None) + lw2 = model_resnet2.layer1[0].conv1.weight + + utils.set_seed(input + 1) + model_resnet3 = cnn_architectures.resnet18(num_classes=10, weights=None) + lw3 = model_resnet3.layer1[0].conv1.weight + + assert torch.all(lw1 == lw2) & torch.any(lw1 != lw3) From 75cf950b72aa8f07bc37f97f465ed3a1c0fe9e2f Mon Sep 17 00:00:00 2001 From: LeonardoViotti Date: Thu, 4 Jan 2024 09:55:06 -0500 Subject: [PATCH 6/9] Added decorator comments. --- tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index f175852a..26b8f2cd 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -174,7 +174,7 @@ def test_make_clip_df_from_label_df(silence_10s_mp3_str, metadata_wav_str): -@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) +@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) # Loops trough each value in list when running pytest def test_torch_rand(input): utils.set_seed(input) tr1 = torch.rand(100) From 22f48d936286000d4dc2c538c2c52551a82649ee Mon Sep 17 00:00:00 2001 From: LeonardoViotti Date: Thu, 4 Jan 2024 10:17:24 -0500 Subject: [PATCH 7/9] Updated decorator description in comments. --- tests/test_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 26b8f2cd..62d4a4c9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -174,7 +174,11 @@ def test_make_clip_df_from_label_df(silence_10s_mp3_str, metadata_wav_str): -@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) # Loops trough each value in list when running pytest +# The @pytest.mark.parametrize decorator loops trough each value in list when running pytest. +# If you add --verbose, it also prints if it passed for each value in the list for each function +# that takes it as input. + +@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) def test_torch_rand(input): utils.set_seed(input) tr1 = torch.rand(100) From 1fc6d3a00197f9141ec5d92efdbf4d5bb4fe81bd Mon Sep 17 00:00:00 2001 From: LeonardoViotti Date: Thu, 4 Jan 2024 12:05:26 -0500 Subject: [PATCH 8/9] Resolved PR's comments. --- opensoundscape/utils.py | 6 ------ tests/test_utils.py | 3 +++ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/opensoundscape/utils.py b/opensoundscape/utils.py index f29447c1..d377bacd 100644 --- a/opensoundscape/utils.py +++ b/opensoundscape/utils.py @@ -331,12 +331,6 @@ def generate_opacity_colormaps( return colormaps - -import numpy as np -import torch -import random - - def set_seed(seed, verbose=True): """Set random state across different libraries for reproducibility diff --git a/tests/test_utils.py b/tests/test_utils.py index 62d4a4c9..3b9dbd87 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -178,6 +178,9 @@ def test_make_clip_df_from_label_df(silence_10s_mp3_str, metadata_wav_str): # If you add --verbose, it also prints if it passed for each value in the list for each function # that takes it as input. +# For all utils.set_seed() tests, assert that results are determistic for the the same seed AND +# for different seeds, in a tensor/array at least one element is different. + @pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) def test_torch_rand(input): utils.set_seed(input) From ab858caf6333d5087e7644ab4ced8e4baa3b0e19 Mon Sep 17 00:00:00 2001 From: LeonardoViotti Date: Mon, 15 Jan 2024 12:59:16 -0500 Subject: [PATCH 9/9] Black formatted opensoundscape/utils.py and tests/test_utils.py. --- opensoundscape/utils.py | 4 +++- tests/test_utils.py | 13 ++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/opensoundscape/utils.py b/opensoundscape/utils.py index d377bacd..30a78122 100644 --- a/opensoundscape/utils.py +++ b/opensoundscape/utils.py @@ -12,6 +12,7 @@ import torch import random + class GetDurationError(ValueError): """raised if librosa.get_duration(path=f) causes an error""" @@ -331,6 +332,7 @@ def generate_opacity_colormaps( return colormaps + def set_seed(seed, verbose=True): """Set random state across different libraries for reproducibility @@ -345,4 +347,4 @@ def set_seed(seed, verbose=True): np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) - torch.cuda.manual_seed_all(seed) \ No newline at end of file + torch.cuda.manual_seed_all(seed) diff --git a/tests/test_utils.py b/tests/test_utils.py index 3b9dbd87..5ed2eb0f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -171,17 +171,17 @@ def test_make_clip_df_from_label_df(silence_10s_mp3_str, metadata_wav_str): # should copy labels for each file to all clips of that file # duplicate file should have labels from _first_ occurrence in label_df assert np.array_equal(clip_df["a"].values, [0, 0, 0, 0, 2, 2]) - - -# The @pytest.mark.parametrize decorator loops trough each value in list when running pytest. -# If you add --verbose, it also prints if it passed for each value in the list for each function + +# The @pytest.mark.parametrize decorator loops trough each value in list when running pytest. +# If you add --verbose, it also prints if it passed for each value in the list for each function # that takes it as input. # For all utils.set_seed() tests, assert that results are determistic for the the same seed AND # for different seeds, in a tensor/array at least one element is different. -@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) + +@pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) def test_torch_rand(input): utils.set_seed(input) tr1 = torch.rand(100) @@ -194,6 +194,7 @@ def test_torch_rand(input): assert all(tr1 == tr2) & any(tr1 != tr3) + @pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) def test_numpy_random_rand(input): utils.set_seed(input) @@ -207,6 +208,7 @@ def test_numpy_random_rand(input): assert all(nr1 == nr2) & any(nr1 != nr3) + @pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) def test_radom_sample(input): list1000 = list(range(1, 1000)) @@ -222,6 +224,7 @@ def test_radom_sample(input): assert (rs1 == rs2) & (rs1 != rs3) + @pytest.mark.parametrize("input", [1, 11, 13, 42, 59, 666, 1234]) def test_cnn(input): utils.set_seed(input)