import os import hydra import logging logger = logging.getLogger(__name__) def run(args): import unet import tensorflow as tf import soundfile as sf import numpy as np from tqdm import tqdm import librosa path_experiment=str(args.path_experiment) unet_model = unet.build_model_denoise(unet_args=args.unet) ckpt=os.path.join(os.path.dirname(os.path.abspath(__file__)),path_experiment, 'checkpoint') unet_model.load_weights(ckpt) def do_stft(noisy): window_fn = tf.signal.hamming_window win_size=args.stft.win_size hop_size=args.stft.hop_size stft_signal_noisy=tf.signal.stft(noisy,frame_length=win_size, window_fn=window_fn, frame_step=hop_size, pad_end=True) stft_noisy_stacked=tf.stack( values=[tf.math.real(stft_signal_noisy), tf.math.imag(stft_signal_noisy)], axis=-1) return stft_noisy_stacked def do_istft(data): window_fn = tf.signal.hamming_window win_size=args.stft.win_size hop_size=args.stft.hop_size inv_window_fn=tf.signal.inverse_stft_window_fn(hop_size, forward_window_fn=window_fn) pred_cpx=data[...,0] + 1j * data[...,1] pred_time=tf.signal.inverse_stft(pred_cpx, win_size, hop_size, window_fn=inv_window_fn) return pred_time audio=str(args.inference.audio) data, samplerate = sf.read(audio) if samplerate!=44100: print("Resampling") data=np.transpose(data) data=librosa.resample(data, samplerate, 44100) data=np.transpose(data) #Stereo to mono if len(data.shape)>1: data=np.mean(data,axis=1) segment_size=44101*20 #20s segments length_data=len(data) overlapsize=2048 #samples (46 ms) window=np.hanning(2*overlapsize) window_right=window[overlapsize::] window_left=window[0:overlapsize] audio_finished=False pointer=0 denoised_data=np.zeros(shape=(len(data),)) residual_noise=np.zeros(shape=(len(data),)) numchunks=int(np.ceil(length_data/segment_size)) for i in tqdm(range(numchunks)): if pointer+segment_size