嘉渊 před 2 roky
rodič
revize
e23b7dc34f

+ 1 - 1
egs/aishell/paraformerbert/local/extract_embeds.sh

@@ -14,7 +14,7 @@ nj=32
 
 for data_set in train dev test;do
     scp=$raw_dataset_path/dump/fbank/${data_set}/text
-    local_scp_dir_raw=${raw_dataset_path}/${data_set}
+    local_scp_dir_raw=${raw_dataset_path}/data/embeds/${data_set}
     local_scp_dir=$local_scp_dir_raw/split$nj
     local_records_dir=$local_scp_dir_raw/ark
 

+ 2 - 2
funasr/utils/prepare_data.py

@@ -183,9 +183,9 @@ def prepare_data(args, distributed_option):
         ]
         if args.embed_path is not None:
             args.train_data_path_and_name_and_type[0].append(
-                "{}/embed/kaldi_ark".format(os.path.join(args.embed_path, args.train_set, "embeds.scp")))
+                "{}/embed/kaldi_ark".format(os.path.join(args.embed_path, "embeds", args.train_set, "embeds.scp")))
             args.valid_data_path_and_name_and_type[0].append(
-                "{}/embed/kaldi_ark".format(os.path.join(args.embed_path, args.dev_set, "embeds.scp")))
+                "{}/embed/kaldi_ark".format(os.path.join(args.embed_path, "embeds", args.dev_set, "embeds.scp")))
     else:
         args.train_data_file = os.path.join(args.data_dir, args.train_set, "data.list")
         args.valid_data_file = os.path.join(args.data_dir, args.valid_set, "data.list")