fix missing paths

This commit is contained in:
festinuz 2025-05-08 15:00:16 +03:00
parent 9870240572
commit 03668d92cb

View File

@ -8,9 +8,14 @@ import soundfile as sf
import pandas as pd
import glob
from tqdm import tqdm
import pathlib
# generator function. It reads the csv file with pandas and loads the largest audio segments from each recording. If extend=False, it will only read the segments with length>length_seg, trim them and yield them with no further processing. Otherwise, if the segment length is inferior, it will extend the length using concatenative synthesis.
# generator function. It reads the csv file with pandas and loads the largest
# audio segments from each recording. If extend=False, it will only read the
# segments with length>length_seg, trim them and yield them with no further
# processing. Otherwise, if the segment length is inferior, it will extend the
# length using concatenative synthesis.
def __noise_sample_generator(info_file, fs, length_seq, split):
head = os.path.split(info_file)[0]
load_data = pd.read_csv(info_file)
@ -24,20 +29,24 @@ def __noise_sample_generator(info_file, fs, length_seq, split):
for i in r:
segments = ast.literal_eval(load_data_split.loc[i, "segments"])
if split == "test":
loaded_data, Fs = sf.read(
os.path.join(
head,
load_data_split["recording"].loc[i],
load_data_split["largest_segment"].loc[i],
)
path = os.path.join(
head,
load_data_split["recording"].loc[i],
load_data_split["largest_segment"].loc[i],
)
if not pathlib.Path(path).is_file():
print(f'WARNING! file does not exist: {path}')
continue
loaded_data, Fs = sf.read(path)
else:
num = np.random.randint(0, len(segments))
loaded_data, Fs = sf.read(
os.path.join(
head, load_data_split["recording"].loc[i], segments[num]
)
path = os.path.join(
head, load_data_split["recording"].loc[i], segments[num]
)
if not pathlib.Path(path).is_file():
print(f'WARNING! file does not exist: {path}')
continue
loaded_data, Fs = sf.read(path)
assert fs == Fs, "wrong sampling rate"
yield __extend_sample_by_repeating(loaded_data, fs, length_seq)