train_compile_ngram.sh 649 B

123456789101112131415161718192021
  1. chmod +x fst/*
  2. [ -f path.sh ] && . ./path.sh
  3. # download train corpus and lexicon
  4. wget https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/requirements/lm.tar.gz
  5. tar -zxvf lm.tar.gz
  6. # train lm, make sure that srilm is installed
  7. bash fst/train_lms.sh
  8. # generate lexicon
  9. python3 fst/generate_lexicon.py lm/corpus.dict lm/lexicon.txt lm/lexicon.out
  10. # Compile the lexicon and token FSTs
  11. fst/compile_dict_token.sh lm lm/tmp lm/lang
  12. # Compile the language-model FST and the final decoding graph TLG.fst
  13. fst/make_decode_graph.sh lm lm/lang || exit 1;
  14. # Collect resource files required for decoding
  15. fst/collect_resource_file.sh lm lm/resource