training-graph-compiler.h 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. // decoder/training-graph-compiler.h
  2. // Copyright 2009-2011 Microsoft Corporation
  3. // 2018 Johns Hopkins University (author: Daniel Povey)
  4. // See ../../COPYING for clarification regarding multiple authors
  5. //
  6. // Licensed under the Apache License, Version 2.0 (the "License");
  7. // you may not use this file except in compliance with the License.
  8. // You may obtain a copy of the License at
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  11. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  12. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  13. // MERCHANTABLITY OR NON-INFRINGEMENT.
  14. // See the Apache 2 License for the specific language governing permissions and
  15. // limitations under the License.
  16. #ifndef KALDI_DECODER_TRAINING_GRAPH_COMPILER_H_
  17. #define KALDI_DECODER_TRAINING_GRAPH_COMPILER_H_
  18. #include "base/kaldi-common.h"
  19. #include "hmm/transition-model.h"
  20. #include "fst/fstlib.h"
  21. #include "fstext/fstext-lib.h"
  22. #include "tree/context-dep.h"
  23. namespace kaldi {
  24. struct TrainingGraphCompilerOptions {
  25. BaseFloat transition_scale;
  26. BaseFloat self_loop_scale;
  27. bool rm_eps;
  28. bool reorder; // (Dan-style graphs)
  29. explicit TrainingGraphCompilerOptions(BaseFloat transition_scale = 1.0,
  30. BaseFloat self_loop_scale = 1.0,
  31. bool b = true) :
  32. transition_scale(transition_scale),
  33. self_loop_scale(self_loop_scale),
  34. rm_eps(false),
  35. reorder(b) { }
  36. void Register(OptionsItf *opts) {
  37. opts->Register("transition-scale", &transition_scale, "Scale of transition "
  38. "probabilities (excluding self-loops)");
  39. opts->Register("self-loop-scale", &self_loop_scale, "Scale of self-loop vs. "
  40. "non-self-loop probability mass ");
  41. opts->Register("reorder", &reorder, "Reorder transition ids for greater decoding efficiency.");
  42. opts->Register("rm-eps", &rm_eps, "Remove [most] epsilons before minimization (only applicable "
  43. "if disambig symbols present)");
  44. }
  45. };
  46. class TrainingGraphCompiler {
  47. public:
  48. TrainingGraphCompiler(const TransitionModel &trans_model, // Maintains reference to this object.
  49. const ContextDependency &ctx_dep, // And this.
  50. fst::VectorFst<fst::StdArc> *lex_fst, // Takes ownership of this object.
  51. // It should not contain disambiguation symbols or subsequential symbol,
  52. // but it should contain optional silence.
  53. const std::vector<int32> &disambig_syms, // disambig symbols in phone symbol table.
  54. const TrainingGraphCompilerOptions &opts);
  55. // CompileGraph compiles a single training graph its input is a
  56. // weighted acceptor (G) at the word level, its output is HCLG.
  57. // Note: G could actually be a transducer, it would also work.
  58. // This function is not const for technical reasons involving the cache.
  59. // if not for "table_compose" we could make it const.
  60. bool CompileGraph(const fst::VectorFst<fst::StdArc> &word_grammar,
  61. fst::VectorFst<fst::StdArc> *out_fst);
  62. // Same as `CompileGraph`, but uses an external LG fst.
  63. bool CompileGraphFromLG(const fst::VectorFst<fst::StdArc> &phone2word_fst,
  64. fst::VectorFst<fst::StdArc> * out_fst);
  65. // CompileGraphs allows you to compile a number of graphs at the same
  66. // time. This consumes more memory but is faster.
  67. bool CompileGraphs(
  68. const std::vector<const fst::VectorFst<fst::StdArc> *> &word_fsts,
  69. std::vector<fst::VectorFst<fst::StdArc> *> *out_fsts);
  70. // This version creates an FST from the text and calls CompileGraph.
  71. bool CompileGraphFromText(const std::vector<int32> &transcript,
  72. fst::VectorFst<fst::StdArc> *out_fst);
  73. // This function creates FSTs from the text and calls CompileGraphs.
  74. bool CompileGraphsFromText(
  75. const std::vector<std::vector<int32> > &word_grammar,
  76. std::vector<fst::VectorFst<fst::StdArc> *> *out_fsts);
  77. ~TrainingGraphCompiler() { delete lex_fst_; }
  78. private:
  79. const TransitionModel &trans_model_;
  80. const ContextDependency &ctx_dep_;
  81. fst::VectorFst<fst::StdArc> *lex_fst_; // lexicon FST (an input; we take
  82. // ownership as we need to modify it).
  83. std::vector<int32> disambig_syms_; // disambig symbols (if any) in the phone
  84. int32 subsequential_symbol_; // search in ../fstext/context-fst.h for more info.
  85. // symbol table.
  86. fst::TableComposeCache<fst::Fst<fst::StdArc> > lex_cache_; // stores matcher..
  87. // this is one of Dan's extensions.
  88. TrainingGraphCompilerOptions opts_;
  89. };
  90. } // end namespace kaldi.
  91. #endif