<a href="https://colab.research.google.com/github/eloimoliner/denoising-historical-recordings/blob/colab/colab/demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# New Section

In [2]:
#download the files
! git clone https://github.com/eloimoliner/denoising-historical-recordings.git
! wget https://github.com/eloimoliner/denoising-historical-recordings/releases/download/v0.0/checkpoint.zip
! unzip checkpoint.zip -d denoising-historical-recordings/experiments/trained_model/

Cloning into 'denoising-historical-recordings'...
remote: Enumerating objects: 177, done.[K
remote: Counting objects: 100% (177/177), done.[K
remote: Compressing objects: 100% (153/153), done.[K
remote: Total 177 (delta 63), reused 99 (delta 16), pack-reused 0[K
Receiving objects: 100% (177/177), 97.29 KiB | 4.86 MiB/s, done.
Resolving deltas: 100% (63/63), done.


In [18]:

%cd denoising-historical-recordings

/content/denoising-historical-recordings


In [15]:
#install dependencies
! pip install hydra-core==0.11.3

Collecting hydra-core==0.11.3
  Downloading hydra_core-0.11.3-py3-none-any.whl (72 kB)
[?25l[K     |████▌                           | 10 kB 19.6 MB/s eta 0:00:01[K     |█████████                       | 20 kB 12.0 MB/s eta 0:00:01[K     |█████████████▋                  | 30 kB 8.9 MB/s eta 0:00:01[K     |██████████████████▏             | 40 kB 7.9 MB/s eta 0:00:01[K     |██████████████████████▊         | 51 kB 5.3 MB/s eta 0:00:01[K     |███████████████████████████▎    | 61 kB 5.4 MB/s eta 0:00:01[K     |███████████████████████████████▉| 71 kB 5.7 MB/s eta 0:00:01[K     |████████████████████████████████| 72 kB 372 kB/s 
[?25hCollecting omegaconf<1.5,>=1.4
  Downloading omegaconf-1.4.1-py3-none-any.whl (14 kB)
Collecting PyYAML
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 39.4 MB/s 
[?25hInstalling collected packages: PyYAML, omegacon

In [17]:
import unet
import tensorflow as tf
import soundfile as sf
import numpy as np
from tqdm import tqdm
import scipy.signal
import hydra
import os

In [62]:
#workaround to load hydra conf file
import yaml
from pathlib import Path
args = yaml.safe_load(Path('conf/conf.yaml').read_text())
class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__
args=dotdict(args)
unet_args=dotdict(args.unet)

In [67]:
path_experiment=str(args.path_experiment)

unet_model = unet.build_model_denoise(unet_args=unet_args)

ckpt=os.path.join("/content/denoising-historical-recordings",path_experiment, 'checkpoint')
unet_model.load_weights(ckpt)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f1d0d679dd0>

In [79]:
def do_stft(noisy):
        
    window_fn = tf.signal.hamming_window

    win_size=args.stft["win_size"]
    hop_size=args.stft["hop_size"]

    
    stft_signal_noisy=tf.signal.stft(noisy,frame_length=win_size, window_fn=window_fn, frame_step=hop_size, pad_end=True)
    stft_noisy_stacked=tf.stack( values=[tf.math.real(stft_signal_noisy), tf.math.imag(stft_signal_noisy)], axis=-1)

    return stft_noisy_stacked

def do_istft(data):
    
    window_fn = tf.signal.hamming_window

    win_size=args.stft["win_size"]
    hop_size=args.stft["hop_size"]

    inv_window_fn=tf.signal.inverse_stft_window_fn(hop_size, forward_window_fn=window_fn)

    pred_cpx=data[...,0] + 1j * data[...,1]
    pred_time=tf.signal.inverse_stft(pred_cpx, win_size, hop_size, window_fn=inv_window_fn)
    return pred_time


In [72]:
def denoise_audio(audio):

    data, samplerate = sf.read(audio)
    print(data.dtype)
    #Stereo to mono
    if len(data.shape)>1:
        data=np.mean(data,axis=1)
    
    if samplerate!=44100: 
        print("Resampling")
   
        data=scipy.signal.resample(data, int((44100  / samplerate )*len(data))+1)  
 
    
    
    segment_size=44100*5  #20s segments

    length_data=len(data)
    overlapsize=2048 #samples (46 ms)
    window=np.hanning(2*overlapsize)
    window_right=window[overlapsize::]
    window_left=window[0:overlapsize]
    audio_finished=False
    pointer=0
    denoised_data=np.zeros(shape=(len(data),))
    residual_noise=np.zeros(shape=(len(data),))
    numchunks=int(np.ceil(length_data/segment_size))
     
    for i in tqdm(range(numchunks)):
        if pointer+segment_size<length_data:
            segment=data[pointer:pointer+segment_size]
            #dostft
            segment_TF=do_stft(segment)
            segment_TF_ds=tf.data.Dataset.from_tensors(segment_TF)
            pred = unet_model.predict(segment_TF_ds.batch(1))
            pred=pred[0]
            residual=segment_TF-pred[0]
            residual=np.array(residual)
            pred_time=do_istft(pred[0])
            residual_time=do_istft(residual)
            residual_time=np.array(residual_time)

            if pointer==0:
                pred_time=np.concatenate((pred_time[0:int(segment_size-overlapsize)], np.multiply(pred_time[int(segment_size-overlapsize):segment_size],window_right)), axis=0)
                residual_time=np.concatenate((residual_time[0:int(segment_size-overlapsize)], np.multiply(residual_time[int(segment_size-overlapsize):segment_size],window_right)), axis=0)
            else:
                pred_time=np.concatenate((np.multiply(pred_time[0:int(overlapsize)], window_left), pred_time[int(overlapsize):int(segment_size-overlapsize)], np.multiply(pred_time[int(segment_size-overlapsize):int(segment_size)],window_right)), axis=0)
                residual_time=np.concatenate((np.multiply(residual_time[0:int(overlapsize)], window_left), residual_time[int(overlapsize):int(segment_size-overlapsize)], np.multiply(residual_time[int(segment_size-overlapsize):int(segment_size)],window_right)), axis=0)
                
            denoised_data[pointer:pointer+segment_size]=denoised_data[pointer:pointer+segment_size]+pred_time
            residual_noise[pointer:pointer+segment_size]=residual_noise[pointer:pointer+segment_size]+residual_time

            pointer=pointer+segment_size-overlapsize
        else: 
            segment=data[pointer::]
            lensegment=len(segment)
            segment=np.concatenate((segment, np.zeros(shape=(int(segment_size-len(segment)),))), axis=0)
            audio_finished=True
            #dostft
            segment_TF=do_stft(segment)

            segment_TF_ds=tf.data.Dataset.from_tensors(segment_TF)

            pred = unet_model.predict(segment_TF_ds.batch(1))
            pred=pred[0]
            residual=segment_TF-pred[0]
            residual=np.array(residual)
            pred_time=do_istft(pred[0])
            pred_time=np.array(pred_time)
            pred_time=pred_time[0:segment_size]
            residual_time=do_istft(residual)
            residual_time=np.array(residual_time)
            residual_time=residual_time[0:segment_size]
            if pointer==0:
                pred_time=pred_time
                residual_time=residual_time
            else:
                pred_time=np.concatenate((np.multiply(pred_time[0:int(overlapsize)], window_left), pred_time[int(overlapsize):int(segment_size)]),axis=0)
                residual_time=np.concatenate((np.multiply(residual_time[0:int(overlapsize)], window_left), residual_time[int(overlapsize):int(segment_size)]),axis=0)

            denoised_data[pointer::]=denoised_data[pointer::]+pred_time[0:lensegment]
            residual_noise[pointer::]=residual_noise[pointer::]+residual_time[0:lensegment]
    return denoised_data

In [75]:
from google.colab import files
uploaded=files.upload()

Saving Carmen-Habanera_(Love_is_Like_a_Woo_-_Marguerite_D'Alvarez_noisy_input.wav to Carmen-Habanera_(Love_is_Like_a_Woo_-_Marguerite_D'Alvarez_noisy_input.wav


In [None]:
for fn in uploaded.keys():
  print('Denoising uploaded file "{name}"'.format(
      name=fn))
  denoise_data=denoise_audio(fn)

  0%|          | 0/41 [00:00<?, ?it/s]

Denoising uploaded file "Carmen-Habanera_(Love_is_Like_a_Woo_-_Marguerite_D'Alvarez_noisy_input.wav"
float64


 71%|███████   | 29/41 [09:59<04:09, 20.77s/it]