subset_data_dir_tr_cv.sh 1.1 KB

123456789101112131415161718192021222324252627282930
  1. #!/usr/bin/env bash
  2. dev_num_utt=1000
  3. echo "$0 $@"
  4. . utils/parse_options.sh || exit 1;
  5. train_data=$1
  6. out_dir=$2
  7. [ ! -f ${train_data}/wav.scp ] && echo "$0: no such file ${train_data}/wav.scp" && exit 1;
  8. [ ! -f ${train_data}/text ] && echo "$0: no such file ${train_data}/text" && exit 1;
  9. mkdir -p ${out_dir}/train && mkdir -p ${out_dir}/dev
  10. cp ${train_data}/wav.scp ${out_dir}/train/wav.scp.bak
  11. cp ${train_data}/text ${out_dir}/train/text.bak
  12. num_utt=$(wc -l <${out_dir}/train/wav.scp.bak)
  13. utils/shuffle_list.pl --srand 1 ${out_dir}/train/wav.scp.bak > ${out_dir}/train/wav.scp.shuf
  14. head -n ${dev_num_utt} ${out_dir}/train/wav.scp.shuf > ${out_dir}/dev/wav.scp
  15. tail -n $((${num_utt}-${dev_num_utt})) ${out_dir}/train/wav.scp.shuf > ${out_dir}/train/wav.scp
  16. utils/shuffle_list.pl --srand 1 ${out_dir}/train/text.bak > ${out_dir}/train/text.shuf
  17. head -n ${dev_num_utt} ${out_dir}/train/text.shuf > ${out_dir}/dev/text
  18. tail -n $((${num_utt}-${dev_num_utt})) ${out_dir}/train/text.shuf > ${out_dir}/train/text
  19. rm ${out_dir}/train/wav.scp.bak ${out_dir}/train/text.bak
  20. rm ${out_dir}/train/wav.scp.shuf ${out_dir}/train/text.shuf