|
|
@@ -111,6 +111,7 @@ class ESPnetDataset(Dataset):
|
|
|
int_dtype: str = "long",
|
|
|
dest_sample_rate: int = 16000,
|
|
|
speed_perturb: tuple = None,
|
|
|
+ mode: str = "train",
|
|
|
):
|
|
|
assert check_argument_types()
|
|
|
if len(path_name_type_list) == 0:
|
|
|
@@ -125,6 +126,7 @@ class ESPnetDataset(Dataset):
|
|
|
self.int_dtype = int_dtype
|
|
|
self.dest_sample_rate = dest_sample_rate
|
|
|
self.speed_perturb = speed_perturb
|
|
|
+ self.mode = mode
|
|
|
|
|
|
self.loader_dict = {}
|
|
|
self.debug_info = {}
|
|
|
@@ -148,7 +150,8 @@ class ESPnetDataset(Dataset):
|
|
|
loader_type: loader_type. sound, npy, text, etc
|
|
|
"""
|
|
|
if loader_type == "sound":
|
|
|
- loader = SoundScpReader(path, self.dest_sample_rate, normalize=True, always_2d=False, speed_perturb=self.speed_perturb)
|
|
|
+ speed_perturb = self.speed_perturb if self.mode == "train" else None
|
|
|
+ loader = SoundScpReader(path, self.dest_sample_rate, normalize=True, always_2d=False, speed_perturb=speed_perturb)
|
|
|
return AdapterForSoundScpReader(loader, self.float_dtype)
|
|
|
elif loader_type == "kaldi_ark":
|
|
|
loader = kaldiio.load_scp(path)
|