lattice-faster-decoder.h 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. // decoder/lattice-faster-decoder.h
  2. // Copyright 2009-2013 Microsoft Corporation; Mirko Hannemann;
  3. // 2013-2014 Johns Hopkins University (Author: Daniel Povey)
  4. // 2014 Guoguo Chen
  5. // 2018 Zhehuai Chen
  6. // See ../../COPYING for clarification regarding multiple authors
  7. //
  8. // Licensed under the Apache License, Version 2.0 (the "License");
  9. // you may not use this file except in compliance with the License.
  10. // You may obtain a copy of the License at
  11. //
  12. // http://www.apache.org/licenses/LICENSE-2.0
  13. //
  14. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  16. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  17. // MERCHANTABLITY OR NON-INFRINGEMENT.
  18. // See the Apache 2 License for the specific language governing permissions and
  19. // limitations under the License.
  20. #ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
  21. #define KALDI_DECODER_LATTICE_FASTER_DECODER_H_
  22. //#include "decoder/grammar-fst.h"
  23. #include "fst/fstlib.h"
  24. #include "fst/memory.h"
  25. #include "fstext/fstext-lib.h"
  26. #include "itf/decodable-itf.h"
  27. #include "lat/determinize-lattice-pruned.h"
  28. #include "lat/kaldi-lattice.h"
  29. #include "util/hash-list.h"
  30. #include "util/stl-utils.h"
  31. #include "bias-lm.h"
  32. namespace kaldi {
  33. struct LatticeFasterDecoderConfig {
  34. BaseFloat beam;
  35. int32 max_active;
  36. int32 min_active;
  37. BaseFloat lattice_beam;
  38. int32 prune_interval;
  39. bool determinize_lattice; // not inspected by this class... used in
  40. // command-line program.
  41. BaseFloat beam_delta;
  42. BaseFloat hash_ratio;
  43. // Note: we don't make prune_scale configurable on the command line, it's not
  44. // a very important parameter. It affects the algorithm that prunes the
  45. // tokens as we go.
  46. BaseFloat prune_scale;
  47. // Number of elements in the block for Token and ForwardLink memory
  48. // pool allocation.
  49. int32 memory_pool_tokens_block_size;
  50. int32 memory_pool_links_block_size;
  51. // Most of the options inside det_opts are not actually queried by the
  52. // LatticeFasterDecoder class itself, but by the code that calls it, for
  53. // example in the function DecodeUtteranceLatticeFaster.
  54. fst::DeterminizeLatticePhonePrunedOptions det_opts;
  55. LatticeFasterDecoderConfig(float glob_beam, float lat_beam)
  56. : beam(glob_beam),
  57. max_active(std::numeric_limits<int32>::max()),
  58. min_active(200),
  59. lattice_beam(lat_beam),
  60. prune_interval(25),
  61. determinize_lattice(true),
  62. beam_delta(0.5),
  63. hash_ratio(2.0),
  64. prune_scale(0.1),
  65. memory_pool_tokens_block_size(1 << 8),
  66. memory_pool_links_block_size(1 << 8) {}
  67. LatticeFasterDecoderConfig()
  68. : beam(3.0),
  69. max_active(std::numeric_limits<int32>::max()),
  70. min_active(200),
  71. lattice_beam(3.0),
  72. prune_interval(25),
  73. determinize_lattice(true),
  74. beam_delta(0.5),
  75. hash_ratio(2.0),
  76. prune_scale(0.1),
  77. memory_pool_tokens_block_size(1 << 8),
  78. memory_pool_links_block_size(1 << 8) {}
  79. void Register(OptionsItf *opts) {
  80. det_opts.Register(opts);
  81. opts->Register("beam", &beam, "Decoding beam. Larger->slower, more accurate.");
  82. opts->Register("max-active", &max_active, "Decoder max active states. Larger->slower; "
  83. "more accurate");
  84. opts->Register("min-active", &min_active, "Decoder minimum #active states.");
  85. opts->Register("lattice-beam", &lattice_beam, "Lattice generation beam. Larger->slower, "
  86. "and deeper lattices");
  87. opts->Register("prune-interval", &prune_interval, "Interval (in frames) at "
  88. "which to prune tokens");
  89. opts->Register("determinize-lattice", &determinize_lattice, "If true, "
  90. "determinize the lattice (lattice-determinization, keeping only "
  91. "best pdf-sequence for each word-sequence).");
  92. opts->Register("beam-delta", &beam_delta, "Increment used in decoding-- this "
  93. "parameter is obscure and relates to a speedup in the way the "
  94. "max-active constraint is applied. Larger is more accurate.");
  95. opts->Register("hash-ratio", &hash_ratio, "Setting used in decoder to "
  96. "control hash behavior");
  97. opts->Register("memory-pool-tokens-block-size", &memory_pool_tokens_block_size,
  98. "Memory pool block size suggestion for storing tokens (in elements). "
  99. "Smaller uses less memory but increases cache misses.");
  100. opts->Register("memory-pool-links-block-size", &memory_pool_links_block_size,
  101. "Memory pool block size suggestion for storing links (in elements). "
  102. "Smaller uses less memory but increases cache misses.");
  103. }
  104. void Check() const {
  105. KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0
  106. && min_active <= max_active
  107. && prune_interval > 0 && beam_delta > 0.0 && hash_ratio >= 1.0
  108. && prune_scale > 0.0 && prune_scale < 1.0);
  109. }
  110. };
  111. namespace decoder {
  112. // We will template the decoder on the token type as well as the FST type; this
  113. // is a mechanism so that we can use the same underlying decoder code for
  114. // versions of the decoder that support quickly getting the best path
  115. // (LatticeFasterOnlineDecoder, see lattice-faster-online-decoder.h) and also
  116. // those that do not (LatticeFasterDecoder).
  117. // ForwardLinks are the links from a token to a token on the next frame.
  118. // or sometimes on the current frame (for input-epsilon links).
  119. template <typename Token>
  120. struct ForwardLink {
  121. using Label = fst::StdArc::Label;
  122. Token *next_tok; // the next token [or NULL if represents final-state]
  123. Label ilabel; // ilabel on arc
  124. Label olabel; // olabel on arc
  125. BaseFloat graph_cost; // graph cost of traversing arc (contains LM, etc.)
  126. BaseFloat acoustic_cost; // acoustic cost (pre-scaled) of traversing arc
  127. ForwardLink *next; // next in singly-linked list of forward arcs (arcs
  128. // in the state-level lattice) from a token.
  129. inline ForwardLink(Token *next_tok, Label ilabel, Label olabel,
  130. BaseFloat graph_cost, BaseFloat acoustic_cost,
  131. ForwardLink *next):
  132. next_tok(next_tok), ilabel(ilabel), olabel(olabel),
  133. graph_cost(graph_cost), acoustic_cost(acoustic_cost),
  134. next(next) { }
  135. };
  136. struct StdToken {
  137. using ForwardLinkT = ForwardLink<StdToken>;
  138. using Token = StdToken;
  139. // Standard token type for LatticeFasterDecoder. Each active HCLG
  140. // (decoding-graph) state on each frame has one token.
  141. // tot_cost is the total (LM + acoustic) cost from the beginning of the
  142. // utterance up to this point. (but see cost_offset_, which is subtracted
  143. // to keep it in a good numerical range).
  144. BaseFloat tot_cost;
  145. // exta_cost is >= 0. After calling PruneForwardLinks, this equals the
  146. // minimum difference between the cost of the best path that this link is a
  147. // part of, and the cost of the absolute best path, under the assumption that
  148. // any of the currently active states at the decoding front may eventually
  149. // succeed (e.g. if you were to take the currently active states one by one
  150. // and compute this difference, and then take the minimum).
  151. BaseFloat extra_cost;
  152. // 'links' is the head of singly-linked list of ForwardLinks, which is what we
  153. // use for lattice generation.
  154. ForwardLinkT *links;
  155. //'next' is the next in the singly-linked list of tokens for this frame.
  156. Token *next;
  157. // bias_lm_state is used to record the state of tokens in the bias lm network
  158. LatticeArc::StateId bias_lm_state;
  159. // This function does nothing and should be optimized out; it's needed
  160. // so we can share the regular LatticeFasterDecoderTpl code and the code
  161. // for LatticeFasterOnlineDecoder that supports fast traceback.
  162. inline void SetBackpointer (Token *backpointer) { }
  163. // This constructor just ignores the 'backpointer' argument. That argument is
  164. // needed so that we can use the same decoder code for LatticeFasterDecoderTpl
  165. // and LatticeFasterOnlineDecoderTpl (which needs backpointers to support a
  166. // fast way to obtain the best path).
  167. inline StdToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
  168. Token *next, Token *backpointer):
  169. tot_cost(tot_cost), extra_cost(extra_cost), links(links), next(next), bias_lm_state(0) { }
  170. inline void GetLabelSeq(Token *tok, std::vector<int> &phn_id) {}
  171. };
  172. struct BackpointerToken {
  173. using ForwardLinkT = ForwardLink<BackpointerToken>;
  174. using Token = BackpointerToken;
  175. // BackpointerToken is like Token but also
  176. // Standard token type for LatticeFasterDecoder. Each active HCLG
  177. // (decoding-graph) state on each frame has one token.
  178. // tot_cost is the total (LM + acoustic) cost from the beginning of the
  179. // utterance up to this point. (but see cost_offset_, which is subtracted
  180. // to keep it in a good numerical range).
  181. BaseFloat tot_cost;
  182. // exta_cost is >= 0. After calling PruneForwardLinks, this equals
  183. // the minimum difference between the cost of the best path, and the cost of
  184. // this is on, and the cost of the absolute best path, under the assumption
  185. // that any of the currently active states at the decoding front may
  186. // eventually succeed (e.g. if you were to take the currently active states
  187. // one by one and compute this difference, and then take the minimum).
  188. BaseFloat extra_cost;
  189. // 'links' is the head of singly-linked list of ForwardLinks, which is what we
  190. // use for lattice generation.
  191. ForwardLinkT *links;
  192. //'next' is the next in the singly-linked list of tokens for this frame.
  193. BackpointerToken *next;
  194. // Best preceding BackpointerToken (could be a on this frame, connected to
  195. // this via an epsilon transition, or on a previous frame). This is only
  196. // required for an efficient GetBestPath function in
  197. // LatticeFasterOnlineDecoderTpl; it plays no part in the lattice generation
  198. // (the "links" list is what stores the forward links, for that).
  199. Token *backpointer;
  200. // bias_lm_state is used to record the state of tokens in the bias lm network
  201. LatticeArc::StateId bias_lm_state;
  202. inline void SetBackpointer (Token *backpointer) {
  203. this->backpointer = backpointer;
  204. }
  205. inline BackpointerToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
  206. Token *next, Token *backpointer):
  207. tot_cost(tot_cost), extra_cost(extra_cost), links(links), next(next),
  208. backpointer(backpointer), bias_lm_state(0) { }
  209. inline void GetLabelSeq(Token *token, std::vector<int> &phn_id) {
  210. ForwardLinkT* link;
  211. Token *tok = token;
  212. while (tok && tok->backpointer) {
  213. for (link = tok->backpointer->links; link != NULL; link = link->next) {
  214. if (link->next_tok == tok) {
  215. phn_id.push_back(link->ilabel - 1);
  216. break;
  217. }
  218. }
  219. tok = tok->backpointer;
  220. }
  221. }
  222. };
  223. } // namespace decoder
  224. /** This is the "normal" lattice-generating decoder.
  225. See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
  226. for more information.
  227. The decoder is templated on the FST type and the token type. The token type
  228. will normally be StdToken, but also may be BackpointerToken which is to support
  229. quick lookup of the current best path (see lattice-faster-online-decoder.h)
  230. The FST you invoke this decoder which is expected to equal
  231. Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst. If you invoke it with
  232. FST == StdFst and it notices that the actual FST type is
  233. fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
  234. will internally cast itself to one that is templated on those more specific
  235. types; this is an optimization for speed.
  236. */
  237. template <typename FST, typename Token = decoder::StdToken>
  238. class LatticeFasterDecoderTpl {
  239. public:
  240. using Arc = typename FST::Arc;
  241. using Label = typename Arc::Label;
  242. using StateId = typename Arc::StateId;
  243. using Weight = typename Arc::Weight;
  244. using ForwardLinkT = decoder::ForwardLink<Token>;
  245. // Instantiate this class once for each thing you have to decode.
  246. // This version of the constructor does not take ownership of
  247. // 'fst'.
  248. LatticeFasterDecoderTpl(const FST &fst,
  249. const LatticeFasterDecoderConfig &config);
  250. // This version of the constructor takes ownership of the fst, and will delete
  251. // it when this object is destroyed.
  252. LatticeFasterDecoderTpl(const LatticeFasterDecoderConfig &config,
  253. FST *fst);
  254. //LatticeFasterDecoderTpl() { }
  255. void SetOptions(const LatticeFasterDecoderConfig &config) {
  256. config_ = config;
  257. }
  258. const LatticeFasterDecoderConfig &GetOptions() const {
  259. return config_;
  260. }
  261. ~LatticeFasterDecoderTpl();
  262. /// Decodes until there are no more frames left in the "decodable" object..
  263. /// note, this may block waiting for input if the "decodable" object blocks.
  264. /// Returns true if any kind of traceback is available (not necessarily from a
  265. /// final state).
  266. bool Decode(DecodableInterface *decodable);
  267. /// says whether a final-state was active on the last frame. If it was not, the
  268. /// lattice (or traceback) will end with states that are not final-states.
  269. bool ReachedFinal() const {
  270. return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
  271. }
  272. /// Outputs an FST corresponding to the single best path through the lattice.
  273. /// Returns true if result is nonempty (using the return status is deprecated,
  274. /// it will become void). If "use_final_probs" is true AND we reached the
  275. /// final-state of the graph then it will include those as final-probs, else
  276. /// it will treat all final-probs as one. Note: this just calls GetRawLattice()
  277. /// and figures out the shortest path.
  278. bool GetBestPath(Lattice *ofst,
  279. bool use_final_probs = true) const;
  280. /// Outputs an FST corresponding to the raw, state-level
  281. /// tracebacks. Returns true if result is nonempty.
  282. /// If "use_final_probs" is true AND we reached the final-state
  283. /// of the graph then it will include those as final-probs, else
  284. /// it will treat all final-probs as one.
  285. /// The raw lattice will be topologically sorted.
  286. ///
  287. /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
  288. /// which also supports a pruning beam, in case for some reason
  289. /// you want it pruned tighter than the regular lattice beam.
  290. /// We could put that here in future needed.
  291. bool GetRawLattice(Lattice *ofst, bool use_final_probs = true) const;
  292. /// [Deprecated, users should now use GetRawLattice and determinize it
  293. /// themselves, e.g. using DeterminizeLatticePhonePrunedWrapper].
  294. /// Outputs an FST corresponding to the lattice-determinized
  295. /// lattice (one path per word sequence). Returns true if result is nonempty.
  296. /// If "use_final_probs" is true AND we reached the final-state of the graph
  297. /// then it will include those as final-probs, else it will treat all
  298. /// final-probs as one.
  299. bool GetLattice(CompactLattice *ofst,
  300. bool use_final_probs = true) const;
  301. /// InitDecoding initializes the decoding, and should only be used if you
  302. /// intend to call AdvanceDecoding(). If you call Decode(), you don't need to
  303. /// call this. You can also call InitDecoding if you have already decoded an
  304. /// utterance and want to start with a new utterance.
  305. void InitDecoding();
  306. /// This will decode until there are no more frames ready in the decodable
  307. /// object. You can keep calling it each time more frames become available.
  308. /// If max_num_frames is specified, it specifies the maximum number of frames
  309. /// the function will decode before returning.
  310. void AdvanceDecoding(DecodableInterface *decodable,
  311. int32 max_num_frames = -1);
  312. /// This function may be optionally called after AdvanceDecoding(), when you
  313. /// do not plan to decode any further. It does an extra pruning step that
  314. /// will help to prune the lattices output by GetLattice and (particularly)
  315. /// GetRawLattice more completely, particularly toward the end of the
  316. /// utterance. If you call this, you cannot call AdvanceDecoding again (it
  317. /// will fail), and you cannot call GetLattice() and related functions with
  318. /// use_final_probs = false. Used to be called PruneActiveTokensFinal().
  319. void FinalizeDecoding();
  320. /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
  321. /// more information. It returns the difference between the best (final-cost
  322. /// plus cost) of any token on the final frame, and the best cost of any token
  323. /// on the final frame. If it is infinity it means no final-states were
  324. /// present on the final frame. It will usually be nonnegative. If it not
  325. /// too positive (e.g. < 5 is my first guess, but this is not tested) you can
  326. /// take it as a good indication that we reached the final-state with
  327. /// reasonable likelihood.
  328. BaseFloat FinalRelativeCost() const;
  329. // Returns the number of frames decoded so far. The value returned changes
  330. // whenever we call ProcessEmitting().
  331. inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
  332. std::string GetTokResult(Token *tok);
  333. void SetBiasLm(std::shared_ptr<funasr::BiasLm> &bias_lm) {
  334. bias_lm_ = bias_lm;
  335. }
  336. void ClearBiasLm() {
  337. bias_lm_.reset();
  338. }
  339. protected:
  340. // we make things protected instead of private, as code in
  341. // LatticeFasterOnlineDecoderTpl, which inherits from this, also uses the
  342. // internals.
  343. // Deletes the elements of the singly linked list tok->links.
  344. void DeleteForwardLinks(Token *tok);
  345. // head of per-frame list of Tokens (list is in topological order),
  346. // and something saying whether we ever pruned it using PruneForwardLinks.
  347. struct TokenList {
  348. Token *toks;
  349. bool must_prune_forward_links;
  350. bool must_prune_tokens;
  351. TokenList(): toks(NULL), must_prune_forward_links(true),
  352. must_prune_tokens(true) { }
  353. };
  354. using Elem = typename HashList<StateId, Token*>::Elem;
  355. // Equivalent to:
  356. // struct Elem {
  357. // StateId key;
  358. // Token *val;
  359. // Elem *tail;
  360. // };
  361. void PossiblyResizeHash(size_t num_toks);
  362. // FindOrAddToken either locates a token in hash of toks_, or if necessary
  363. // inserts a new, empty token (i.e. with no forward links) for the current
  364. // frame. [note: it's inserted if necessary into hash toks_ and also into the
  365. // singly linked list of tokens active on this frame (whose head is at
  366. // active_toks_[frame]). The frame_plus_one argument is the acoustic frame
  367. // index plus one, which is used to index into the active_toks_ array.
  368. // Returns the Token pointer. Sets "changed" (if non-NULL) to true if the
  369. // token was newly created or the cost changed.
  370. // If Token == StdToken, the 'backpointer' argument has no purpose (and will
  371. // hopefully be optimized out).
  372. inline Elem *FindOrAddToken(StateId state, int32 frame_plus_one,
  373. BaseFloat tot_cost, Token *backpointer,
  374. bool *changed, StateId bias_lm_state = 0);
  375. // prunes outgoing links for all tokens in active_toks_[frame]
  376. // it's called by PruneActiveTokens
  377. // all links, that have link_extra_cost > lattice_beam are pruned
  378. // delta is the amount by which the extra_costs must change
  379. // before we set *extra_costs_changed = true.
  380. // If delta is larger, we'll tend to go back less far
  381. // toward the beginning of the file.
  382. // extra_costs_changed is set to true if extra_cost was changed for any token
  383. // links_pruned is set to true if any link in any token was pruned
  384. void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
  385. bool *links_pruned,
  386. BaseFloat delta);
  387. // This function computes the final-costs for tokens active on the final
  388. // frame. It outputs to final-costs, if non-NULL, a map from the Token*
  389. // pointer to the final-prob of the corresponding state, for all Tokens
  390. // that correspond to states that have final-probs. This map will be
  391. // empty if there were no final-probs. It outputs to
  392. // final_relative_cost, if non-NULL, the difference between the best
  393. // forward-cost including the final-prob cost, and the best forward-cost
  394. // without including the final-prob cost (this will usually be positive), or
  395. // infinity if there were no final-probs. [c.f. FinalRelativeCost(), which
  396. // outputs this quanitity]. It outputs to final_best_cost, if
  397. // non-NULL, the lowest for any token t active on the final frame, of
  398. // forward-cost[t] + final-cost[t], where final-cost[t] is the final-cost in
  399. // the graph of the state corresponding to token t, or the best of
  400. // forward-cost[t] if there were no final-probs active on the final frame.
  401. // You cannot call this after FinalizeDecoding() has been called; in that
  402. // case you should get the answer from class-member variables.
  403. void ComputeFinalCosts(unordered_map<Token*, BaseFloat> *final_costs,
  404. BaseFloat *final_relative_cost,
  405. BaseFloat *final_best_cost) const;
  406. // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
  407. // on the final frame. If there are final tokens active, it uses
  408. // the final-probs for pruning, otherwise it treats all tokens as final.
  409. void PruneForwardLinksFinal();
  410. // Prune away any tokens on this frame that have no forward links.
  411. // [we don't do this in PruneForwardLinks because it would give us
  412. // a problem with dangling pointers].
  413. // It's called by PruneActiveTokens if any forward links have been pruned
  414. void PruneTokensForFrame(int32 frame_plus_one);
  415. // Go backwards through still-alive tokens, pruning them if the
  416. // forward+backward cost is more than lat_beam away from the best path. It's
  417. // possible to prove that this is "correct" in the sense that we won't lose
  418. // anything outside of lat_beam, regardless of what happens in the future.
  419. // delta controls when it considers a cost to have changed enough to continue
  420. // going backward and propagating the change. larger delta -> will recurse
  421. // less far.
  422. void PruneActiveTokens(BaseFloat delta);
  423. /// Gets the weight cutoff. Also counts the active tokens.
  424. BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
  425. BaseFloat *adaptive_beam, Elem **best_elem);
  426. /// Processes emitting arcs for one frame. Propagates from prev_toks_ to
  427. /// cur_toks_. Returns the cost cutoff for subsequent ProcessNonemitting() to
  428. /// use.
  429. BaseFloat ProcessEmitting(DecodableInterface *decodable);
  430. /// Processes nonemitting (epsilon) arcs for one frame. Called after
  431. /// ProcessEmitting() on each frame. The cost cutoff is computed by the
  432. /// preceding ProcessEmitting().
  433. void ProcessNonemitting(BaseFloat cost_cutoff);
  434. // HashList defined in ../util/hash-list.h. It actually allows us to maintain
  435. // more than one list (e.g. for current and previous frames), but only one of
  436. // them at a time can be indexed by StateId. It is indexed by frame-index
  437. // plus one, where the frame-index is zero-based, as used in decodable object.
  438. // That is, the emitting probs of frame t are accounted for in tokens at
  439. // toks_[t+1]. The zeroth frame is for nonemitting transition at the start of
  440. // the graph.
  441. HashList<StateId, Token*> toks_;
  442. std::vector<TokenList> active_toks_; // Lists of tokens, indexed by
  443. // frame (members of TokenList are toks, must_prune_forward_links,
  444. // must_prune_tokens).
  445. std::vector<const Elem* > queue_; // temp variable used in ProcessNonemitting,
  446. std::vector<BaseFloat> tmp_array_; // used in GetCutoff.
  447. // fst_ is a pointer to the FST we are decoding from.
  448. const FST *fst_;
  449. // delete_fst_ is true if the pointer fst_ needs to be deleted when this
  450. // object is destroyed.
  451. bool delete_fst_;
  452. std::vector<BaseFloat> cost_offsets_; // This contains, for each
  453. // frame, an offset that was added to the acoustic log-likelihoods on that
  454. // frame in order to keep everything in a nice dynamic range i.e. close to
  455. // zero, to reduce roundoff errors.
  456. LatticeFasterDecoderConfig config_;
  457. int32 num_toks_; // current total #toks allocated...
  458. bool warned_;
  459. /// decoding_finalized_ is true if someone called FinalizeDecoding(). [note,
  460. /// calling this is optional]. If true, it's forbidden to decode more. Also,
  461. /// if this is set, then the output of ComputeFinalCosts() is in the next
  462. /// three variables. The reason we need to do this is that after
  463. /// FinalizeDecoding() calls PruneTokensForFrame() for the final frame, some
  464. /// of the tokens on the last frame are freed, so we free the list from toks_
  465. /// to avoid having dangling pointers hanging around.
  466. bool decoding_finalized_;
  467. /// For the meaning of the next 3 variables, see the comment for
  468. /// decoding_finalized_ above., and ComputeFinalCosts().
  469. unordered_map<Token*, BaseFloat> final_costs_;
  470. BaseFloat final_relative_cost_;
  471. BaseFloat final_best_cost_;
  472. // Memory pools for storing tokens and forward links.
  473. // We use it to decrease the work put on allocator and to move some of data
  474. // together. Too small block sizes will result in more work to allocator but
  475. // bigger ones increase the memory usage.
  476. fst::MemoryPool<Token> token_pool_;
  477. fst::MemoryPool<ForwardLinkT> forward_link_pool_;
  478. // There are various cleanup tasks... the toks_ structure contains
  479. // singly linked lists of Token pointers, where Elem is the list type.
  480. // It also indexes them in a hash, indexed by state (this hash is only
  481. // maintained for the most recent frame). toks_.Clear()
  482. // deletes them from the hash and returns the list of Elems. The
  483. // function DeleteElems calls toks_.Delete(elem) for each elem in
  484. // the list, which returns ownership of the Elem to the toks_ structure
  485. // for reuse, but does not delete the Token pointer. The Token pointers
  486. // are reference-counted and are ultimately deleted in PruneTokensForFrame,
  487. // but are also linked together on each frame by their own linked-list,
  488. // using the "next" pointer. We delete them manually.
  489. void DeleteElems(Elem *list);
  490. // This function takes a singly linked list of tokens for a single frame, and
  491. // outputs a list of them in topological order (it will crash if no such order
  492. // can be found, which will typically be due to decoding graphs with epsilon
  493. // cycles, which are not allowed). Note: the output list may contain NULLs,
  494. // which the caller should pass over; it just happens to be more efficient for
  495. // the algorithm to output a list that contains NULLs.
  496. static void TopSortTokens(Token *tok_list,
  497. std::vector<Token*> *topsorted_list);
  498. void ClearActiveTokens();
  499. KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterDecoderTpl);
  500. std::shared_ptr<funasr::BiasLm> bias_lm_ = nullptr;
  501. };
  502. typedef LatticeFasterDecoderTpl<fst::StdFst, decoder::StdToken> LatticeFasterDecoder;
  503. } // end namespace kaldi.
  504. #endif