fstmakecontextsyms.cc 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. // fstbin/fstmakecontextsyms.cc
  2. // Copyright 2009-2011 Microsoft Corporation
  3. // See ../../COPYING for clarification regarding multiple authors
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License");
  6. // you may not use this file except in compliance with the License.
  7. // You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  12. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  13. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  14. // MERCHANTABLITY OR NON-INFRINGEMENT.
  15. // See the Apache 2 License for the specific language governing permissions and
  16. // limitations under the License.
  17. #include "tree/context-dep.h"
  18. #include "util/common-utils.h"
  19. #include "fst/fstlib.h"
  20. #include "fstext/fstext-utils.h"
  21. #include "fstext/context-fst.h"
  22. /*
  23. Test for this and makecontextfst:
  24. mkdir -p ~/tmpdir
  25. pushd ~/tmpdir
  26. (echo "<eps> 0"; echo "a 1"; echo "b 2"; echo "#0 3"; echo "#1 4"; echo "#$ 5" ) > phones.txt
  27. ( echo 3; echo 4 ) > disambig.list
  28. fstmakecontextfst --read-disambig-syms=disambig.list <(grep -v '#' phones.txt) 5 ilabels.int > C.fst
  29. fstmakecontextsyms phones.txt ilabels.int > context_syms.txt
  30. fstprint --isymbols=context_syms.txt --osymbols=phones.txt C.fst > C.txt
  31. fstrandgen C.fst | fstprint --isymbols=context_syms.txt --osymbols=phones.txt
  32. Example output:
  33. fstrandgen C.fst | fstprint --isymbols=context_syms.txt --osymbols=phones.txt
  34. 0 1 #-1 b
  35. 1 2 <eps>/b/<eps> #$
  36. 2 3 #1 #1
  37. 3 4 #0 #0
  38. 4 5 #0 #0
  39. 5 6 #0 #0
  40. 6 7 #0 #0
  41. 7 8 #0 #0
  42. 8 9 #1 #1
  43. 9
  44. */
  45. int main(int argc, char *argv[]) {
  46. try {
  47. using namespace kaldi;
  48. using namespace fst;
  49. typedef fst::StdArc::Label Label;
  50. const char *usage = "Create input symbols for CLG\n"
  51. "Usage: fstmakecontextsyms phones-symtab ilabels_input_file [output-symtab.txt]\n"
  52. "E.g.: fstmakecontextsyms phones.txt ilabels.sym > context_symbols.txt\n";
  53. ParseOptions po(usage);
  54. std::string disambig_list_file = "",
  55. phone_separator = "/",
  56. initial_disambig = "#-1";
  57. po.Register("phone-separator", &phone_separator,
  58. "Separator for phones in phone-in-context symbols.");
  59. po.Register("initial-disambig", &initial_disambig,
  60. "Name for special disambiguation symbol that occurs at start "
  61. "of context-dependent phone sequences");
  62. po.Read(argc, argv);
  63. if (po.NumArgs() < 2 || po.NumArgs() > 3) {
  64. po.PrintUsage();
  65. exit(1);
  66. }
  67. std::string phones_symtab_filename = po.GetArg(1),
  68. ilabel_info_filename = po.GetArg(2),
  69. clg_symtab_filename = po.GetOptArg(3);
  70. std::vector<std::vector<kaldi::int32> > ilabel_info;
  71. {
  72. bool binary;
  73. Input ki(ilabel_info_filename, &binary);
  74. ReadILabelInfo(ki.Stream(),
  75. binary, &ilabel_info);
  76. }
  77. fst::SymbolTable *phones_symtab = NULL;
  78. { // read phone symbol table.
  79. std::ifstream is(phones_symtab_filename.c_str());
  80. phones_symtab = fst::SymbolTable::ReadText(is, phones_symtab_filename);
  81. if (!phones_symtab) KALDI_ERR << "Could not read phones symbol-table file "<<phones_symtab_filename;
  82. }
  83. fst::SymbolTable *clg_symtab =
  84. CreateILabelInfoSymbolTable(ilabel_info,
  85. *phones_symtab,
  86. phone_separator,
  87. initial_disambig);
  88. if (clg_symtab_filename == "") {
  89. if (!clg_symtab->WriteText(std::cout))
  90. KALDI_ERR << "Cannot write symbol table to standard output.";
  91. } else {
  92. if (!clg_symtab->WriteText(clg_symtab_filename))
  93. KALDI_ERR << "Cannot open symbol table file "<<clg_symtab_filename<<" for writing.";
  94. }
  95. delete clg_symtab;
  96. delete phones_symtab;
  97. return 0;
  98. } catch(const std::exception &e) {
  99. std::cerr << e.what();
  100. return -1;
  101. }
  102. }