diff --git a/dataset_loader.py b/dataset_loader.py index e10e0b6..1fae225 100644 --- a/dataset_loader.py +++ b/dataset_loader.py @@ -1,4 +1,3 @@ -from typing import Tuple, Dict import ast import tensorflow as tf @@ -7,7 +6,6 @@ import os import numpy as np from scipy.fft import fft, ifft import soundfile as sf -import librosa import math import pandas as pd import scipy as sp @@ -32,10 +30,7 @@ def __noise_sample_generator(info_file,fs, length_seq, split): else: num=np.random.randint(0,len(segments)) loaded_data, Fs=sf.read(os.path.join(head,load_data_split["recording"].loc[i],segments[num])) - - if fs!=Fs: - print("wrong fs, resampling...") - data=librosa.resample(loaded_data, Fs, fs) + assert(fs==Fs, "wrong sampling rate") yield __extend_sample_by_repeating(loaded_data,fs,length_seq) @@ -385,11 +380,7 @@ def generator_train(path_music, path_noises,split, fs=44100, seg_len_s=5, extend random.shuffle(train_samples) for file in train_samples: data, samplerate = sf.read(file) - if samplerate!=fs: - print("!!!!WRONG SAMPLE RATe!!!") - data=np.transpose(data) - data=librosa.resample(data, samplerate, 44100) - data=np.transpose(data) + assert(samplerate==fs, "wrong sampling rate") data_clean=data #Stereo to mono if len(data.shape)>1 : @@ -456,7 +447,7 @@ def generator_train(path_music, path_noises,split, fs=44100, seg_len_s=5, extend summed=summed.astype('float32') yield tf.convert_to_tensor(summed), tf.convert_to_tensor(segment) -def load_data(buffer_size, path_music_train, path_music_val, path_noises, fs=44100, seg_len_s=5, extend=True, stereo=False) -> Tuple[tf.data.Dataset, tf.data.Dataset]: +def load_data(buffer_size, path_music_train, path_music_val, path_noises, fs=44100, seg_len_s=5, extend=True, stereo=False) : print("Generating train dataset") trainshape=int(fs*seg_len_s) @@ -470,14 +461,14 @@ def load_data(buffer_size, path_music_train, path_music_val, path_noises, fs=4 return dataset_train.shuffle(buffer_size), dataset_val -def load_data_test(buffer_size, path_pianos_test, path_noises, **kwargs) -> Tuple[tf.data.Dataset]: +def load_data_test(buffer_size, path_pianos_test, path_noises, **kwargs): print("Generating test dataset") segments_noisy, segments_clean=generate_test_data(path_pianos_test, path_noises, extend=True, **kwargs) dataset_test=tf.data.Dataset.from_tensor_slices((segments_noisy, segments_clean)) #dataset_test=tf.data.Dataset.from_tensor_slices((segments_noisy[1:3], segments_clean[1:3])) #train_dataset = train.cache().shuffle(buffer_size).take(info.splits["train"].num_examples) return dataset_test -def load_data_formal( path_pianos_test, path_noises, **kwargs) -> Tuple[tf.data.Dataset]: +def load_data_formal( path_pianos_test, path_noises, **kwargs) : print("Generating test dataset") segments_noisy, segments_clean=generate_paired_data_test_formal(path_pianos_test, path_noises, extend=True, **kwargs) print("segments::") @@ -487,7 +478,7 @@ def load_data_formal( path_pianos_test, path_noises, **kwargs) -> Tuple[tf.da #train_dataset = train.cache().shuffle(buffer_size).take(info.splits["train"].num_examples) return dataset_test -def load_real_test_recordings(buffer_size, path_recordings, **kwargs) -> Tuple[tf.data.Dataset]: +def load_real_test_recordings(buffer_size, path_recordings, **kwargs): print("Generating real test dataset") segments_noisy=generate_real_recordings_data(path_recordings, **kwargs) diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..b20f62c --- /dev/null +++ b/environment.yml @@ -0,0 +1,81 @@ +name: historical_denoiser +channels: + - anaconda + - pytorch + - rapidsai + - defaults + - conda-forge + - nvidia + - gurobi + - IBMDecisionOptimization + - mosek +dependencies: + - _libgcc_mutex=0.1=main + - _openmp_mutex=4.5=1_gnu + - ca-certificates=2020.10.14=0 + - cudatoolkit=10.1.243=h6bb024c_0 + - cudnn=7.6.5=cuda10.1_0 + - ld_impl_linux-64=2.35.1=h7274673_9 + - libffi=3.3=he6710b0_2 + - libgcc-ng=9.3.0=h5101ec6_17 + - libgomp=9.3.0=h5101ec6_17 + - libstdcxx-ng=9.3.0=hd4cf53a_17 + - ncurses=6.2=he6710b0_1 + - openssl=1.1.1k=h27cfd23_0 + - pip=21.0.1=py37h06a4308_0 + - python=3.7.11=h12debd9_0 + - readline=8.1=h27cfd23_0 + - setuptools=52.0.0=py37h06a4308_0 + - sqlite=3.36.0=hc218d9a_0 + - tk=8.6.10=hbc83047_0 + - wheel=0.37.0=pyhd3eb1b0_0 + - xz=5.2.5=h7b6447c_0 + - zlib=1.2.11=h7b6447c_3 + - pip: + - absl-py==0.13.0 + - astunparse==1.6.3 + - cachetools==4.2.2 + - certifi==2021.5.30 + - cffi==1.14.6 + - charset-normalizer==2.0.4 + - gast==0.3.3 + - google-auth==1.35.0 + - google-auth-oauthlib==0.4.5 + - google-pasta==0.2.0 + - grpcio==1.39.0 + - h5py==2.10.0 + - hydra-core==0.11.3 + - idna==3.2 + - importlib-metadata==4.8.1 + - keras-preprocessing==1.1.2 + - markdown==3.3.4 + - numpy==1.18.5 + - oauthlib==3.1.1 + - omegaconf==1.4.1 + - opt-einsum==3.3.0 + - pandas==1.3.2 + - protobuf==3.17.3 + - pyasn1==0.4.8 + - pyasn1-modules==0.2.8 + - pycparser==2.20 + - python-dateutil==2.8.2 + - pytz==2021.1 + - pyyaml==5.4.1 + - requests==2.26.0 + - requests-oauthlib==1.3.0 + - rsa==4.7.2 + - scipy==1.4.1 + - six==1.16.0 + - soundfile==0.10.3.post1 + - tensorboard==2.5.0 + - tensorboard-data-server==0.6.1 + - tensorboard-plugin-wit==1.8.0 + - tensorflow==2.3.0 + - tensorflow-estimator==2.3.0 + - termcolor==1.1.0 + - tqdm==4.62.2 + - typing-extensions==3.10.0.2 + - urllib3==1.26.6 + - werkzeug==2.0.1 + - wrapt==1.12.1 + - zipp==3.5.0 diff --git a/inference.py b/inference.py index 25d855c..1b4b011 100644 --- a/inference.py +++ b/inference.py @@ -10,7 +10,7 @@ def run(args): import soundfile as sf import numpy as np from tqdm import tqdm - import librosa + import scipy.signal path_experiment=str(args.path_experiment) @@ -47,16 +47,17 @@ def run(args): audio=str(args.inference.audio) data, samplerate = sf.read(audio) - if samplerate!=44100: - print("Resampling") - data=np.transpose(data) - data=librosa.resample(data, samplerate, 44100) - data=np.transpose(data) - + print(data.dtype) #Stereo to mono if len(data.shape)>1: data=np.mean(data,axis=1) + if samplerate!=44100: + print("Resampling") + + data=scipy.signal.resample(data, int((44100 / samplerate )*len(data))+1) + + segment_size=44101*20 #20s segments diff --git a/inference.sh b/inference.sh index 35bcb63..5ecfe24 100644 --- a/inference.sh +++ b/inference.sh @@ -1,8 +1,8 @@ #!/bin/bash -module load anaconda +#module load anaconda -source activate /scratch/work/molinee2/conda_envs/unet_env +#source activate /scratch/work/molinee2/conda_envs/unet_env python inference.py path_experiment="experiments/trained_model" inference.audio=$1 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7da36ab --- /dev/null +++ b/requirements.txt @@ -0,0 +1,47 @@ +absl-py==0.13.0 +astunparse==1.6.3 +cachetools==4.2.2 +certifi==2021.5.30 +cffi==1.14.6 +charset-normalizer==2.0.4 +gast==0.3.3 +google-auth==1.35.0 +google-auth-oauthlib==0.4.5 +google-pasta==0.2.0 +grpcio==1.39.0 +h5py==2.10.0 +hydra-core==0.11.3 +idna==3.2 +importlib-metadata==4.8.1 +Keras-Preprocessing==1.1.2 +Markdown==3.3.4 +numpy==1.18.5 +oauthlib==3.1.1 +omegaconf==1.4.1 +opt-einsum==3.3.0 +pandas==1.3.2 +protobuf==3.17.3 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +python-dateutil==2.8.2 +pytz==2021.1 +PyYAML==5.4.1 +requests==2.26.0 +requests-oauthlib==1.3.0 +rsa==4.7.2 +scipy==1.4.1 +six==1.16.0 +SoundFile==0.10.3.post1 +tensorboard==2.5.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.0 +tensorflow==2.3.0 +tensorflow-estimator==2.3.0 +termcolor==1.1.0 +tqdm==4.62.2 +typing-extensions==3.10.0.2 +urllib3==1.26.6 +Werkzeug==2.0.1 +wrapt==1.12.1 +zipp==3.5.0 diff --git a/train.py b/train.py index 29f25e4..b2e96d3 100644 --- a/train.py +++ b/train.py @@ -7,7 +7,6 @@ logger = logging.getLogger(__name__) def run(args): import unet import tensorflow as tf - import tensorflow_addons as tfa import dataset_loader from tensorflow.keras.optimizers import Adam import soundfile as sf diff --git a/train.sh b/train.sh index 50f990b..36d15a1 100644 --- a/train.sh +++ b/train.sh @@ -1,9 +1,9 @@ #!/bin/bash -module load anaconda +#module load anaconda #module load matlab/r2020b # -source activate /scratch/work/molinee2/conda_envs/unet_env +#source activate /scratch/work/molinee2/conda_envs/unet_env n=1 PATH_EXPERIMENT=/scratch/work/molinee2/unet_dir/unet_denoising_github/experiments/${n} diff --git a/trainer.py b/trainer.py index 5018c9f..cdaf222 100644 --- a/trainer.py +++ b/trainer.py @@ -1,15 +1,10 @@ import os import numpy as np -import cv2 -import librosa -import imageio import tensorflow as tf import soundfile as sf -import subprocess from tqdm import tqdm import pandas as pd -from scipy.io import loadmat class Trainer(): def __init__(self, model, optimizer,loss, strategy, path_experiment, args):