fix missing paths

2025-05-08 15:00:16 +03:00 · 2025-05-08 15:00:16 +03:00 · 03668d92cb
commit 03668d92cb
parent 9870240572
1 changed files with 20 additions and 11 deletions
--- a/dataset_loader.py
+++ b/dataset_loader.py
@ -8,9 +8,14 @@ import soundfile as sf
 import pandas as pd
 import glob
 from tqdm import tqdm
+import pathlib


-# generator function. It reads the csv file with pandas and loads the largest audio segments from each recording. If extend=False, it will only read the segments with length>length_seg, trim them and yield them with no further processing. Otherwise, if the segment length is inferior, it will extend the length using concatenative synthesis.
+# generator function. It reads the csv file with pandas and loads the largest
+# audio segments from each recording. If extend=False, it will only read the
+# segments with length>length_seg, trim them and yield them with no further
+# processing. Otherwise, if the segment length is inferior, it will extend the
+# length using concatenative synthesis.
 def __noise_sample_generator(info_file, fs, length_seq, split):
    head = os.path.split(info_file)[0]
    load_data = pd.read_csv(info_file)
@ -24,20 +29,24 @@ def __noise_sample_generator(info_file, fs, length_seq, split):
        for i in r:
            segments = ast.literal_eval(load_data_split.loc[i, "segments"])
            if split == "test":
-                loaded_data, Fs = sf.read(
-                    os.path.join(
-                        head,
-                        load_data_split["recording"].loc[i],
-                        load_data_split["largest_segment"].loc[i],
-                    )
+                path = os.path.join(
+                    head,
+                    load_data_split["recording"].loc[i],
+                    load_data_split["largest_segment"].loc[i],
                )
+                if not pathlib.Path(path).is_file():
+                    print(f'WARNING! file does not exist: {path}')
+                    continue
+                loaded_data, Fs = sf.read(path)
            else:
                num = np.random.randint(0, len(segments))
-                loaded_data, Fs = sf.read(
-                    os.path.join(
-                        head, load_data_split["recording"].loc[i], segments[num]
-                    )
+                path = os.path.join(
+                    head, load_data_split["recording"].loc[i], segments[num]
                )
+                if not pathlib.Path(path).is_file():
+                    print(f'WARNING! file does not exist: {path}')
+                    continue
+                loaded_data, Fs = sf.read(path)
            assert fs == Fs, "wrong sampling rate"

            yield __extend_sample_by_repeating(loaded_data, fs, length_seq)