diff --git a/dataset_loader.py b/dataset_loader.py
index 582af6c..91a6859 100644
--- a/dataset_loader.py
+++ b/dataset_loader.py
@@ -8,9 +8,14 @@ import soundfile as sf
 import pandas as pd
 import glob
 from tqdm import tqdm
+import pathlib
 
 
-# generator function. It reads the csv file with pandas and loads the largest audio segments from each recording. If extend=False, it will only read the segments with length>length_seg, trim them and yield them with no further processing. Otherwise, if the segment length is inferior, it will extend the length using concatenative synthesis.
+# generator function. It reads the csv file with pandas and loads the largest
+# audio segments from each recording. If extend=False, it will only read the
+# segments with length>length_seg, trim them and yield them with no further
+# processing. Otherwise, if the segment length is inferior, it will extend the
+# length using concatenative synthesis.
 def __noise_sample_generator(info_file, fs, length_seq, split):
     head = os.path.split(info_file)[0]
     load_data = pd.read_csv(info_file)
@@ -24,20 +29,24 @@ def __noise_sample_generator(info_file, fs, length_seq, split):
         for i in r:
             segments = ast.literal_eval(load_data_split.loc[i, "segments"])
             if split == "test":
-                loaded_data, Fs = sf.read(
-                    os.path.join(
-                        head,
-                        load_data_split["recording"].loc[i],
-                        load_data_split["largest_segment"].loc[i],
-                    )
+                path = os.path.join(
+                    head,
+                    load_data_split["recording"].loc[i],
+                    load_data_split["largest_segment"].loc[i],
                 )
+                if not pathlib.Path(path).is_file():
+                    print(f'WARNING! file does not exist: {path}')
+                    continue
+                loaded_data, Fs = sf.read(path)
             else:
                 num = np.random.randint(0, len(segments))
-                loaded_data, Fs = sf.read(
-                    os.path.join(
-                        head, load_data_split["recording"].loc[i], segments[num]
-                    )
+                path = os.path.join(
+                    head, load_data_split["recording"].loc[i], segments[num]
                 )
+                if not pathlib.Path(path).is_file():
+                    print(f'WARNING! file does not exist: {path}')
+                    continue
+                loaded_data, Fs = sf.read(path)
             assert fs == Fs, "wrong sampling rate"
 
             yield __extend_sample_by_repeating(loaded_data, fs, length_seq)