lattice-incremental-decoder.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734
  1. // decoder/lattice-incremental-decoder.h
  2. // Copyright 2019 Zhehuai Chen, Daniel Povey
  3. // See ../../COPYING for clarification regarding multiple authors
  4. //
  5. // Licensed under the Apache License, Version 2.0 (the "License");
  6. // you may not use this file except in compliance with the License.
  7. // You may obtain a copy of the License at
  8. //
  9. // http://www.apache.org/licenses/LICENSE-2.0
  10. //
  11. // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  12. // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
  13. // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
  14. // MERCHANTABLITY OR NON-INFRINGEMENT.
  15. // See the Apache 2 License for the specific language governing permissions and
  16. // limitations under the License.
  17. #ifndef KALDI_DECODER_LATTICE_INCREMENTAL_DECODER_H_
  18. #define KALDI_DECODER_LATTICE_INCREMENTAL_DECODER_H_
  19. #include "util/stl-utils.h"
  20. #include "util/hash-list.h"
  21. #include "fst/fstlib.h"
  22. #include "itf/decodable-itf.h"
  23. #include "fstext/fstext-lib.h"
  24. #include "lat/determinize-lattice-pruned.h"
  25. #include "lat/kaldi-lattice.h"
  26. #include "decoder/grammar-fst.h"
  27. #include "decoder/lattice-faster-decoder.h"
  28. namespace kaldi {
  29. /**
  30. The normal decoder, lattice-faster-decoder.h, sometimes has an issue when
  31. doing real-time applications with long utterances, that each time you get the
  32. lattice the lattice determinization can take a considerable amount of time;
  33. this introduces latency. This version of the decoder spreads the work of
  34. lattice determinization out throughout the decoding process.
  35. NOTE:
  36. Please see https://www.danielpovey.com/files/ *TBD* .pdf for a technical
  37. explanation of what is going on here.
  38. GLOSSARY OF TERMS:
  39. chunk: We do the determinization on chunks of frames; these
  40. may coincide with the chunks on which the user calls
  41. AdvanceDecoding(). The basic idea is to extract chunks
  42. of the raw lattice and determinize them individually, but
  43. it gets much more complicated than that. The chunks
  44. should normally be at least as long as a word (let's say,
  45. at least 20 frames), or the overhead of this algorithm
  46. might become excessive and affect RTF.
  47. raw lattice chunk: A chunk of raw (i.e. undeterminized) lattice
  48. that we will determinize. In the paper this corresponds
  49. to the FST B that is described in Section 5.2.
  50. token_label, state_label / token-label, state-label:
  51. In the paper these are both referred to as `state labels` (these are
  52. special, large integer id's that refer to states in the undeterminized
  53. lattice and in the the determinized lattice); but we use two separate
  54. terms here, for more clarity, when referring to the undeterminized
  55. vs. determinized lattice.
  56. token_label conceptually refers to states in the
  57. raw lattice, but we don't materialize the entire
  58. raw lattice as a physical FST and and these tokens
  59. are actually tokens (template type Token) held by
  60. the decoder
  61. state_label when used in this code refers specifically
  62. to labels that identify states in the determinized
  63. lattice (i.e. state indexes in lat_).
  64. token-final state
  65. A state in a raw lattice or in a determinized chunk that has an arc
  66. entering it that has a `token-label` on it (as defined above).
  67. These states will have nonzero final-probs.
  68. redeterminized-non-splice-state, aka ns_redet:
  69. A redeterminized state which is not also a splice state;
  70. refer to the paper for explanation. In the already-determinized
  71. part this means a redeterminized state which is not final.
  72. canonical appended lattice: This is the appended compact lattice
  73. that we conceptually have (i.e. what we described in the paper).
  74. The difference from the "actual appended lattice" stored
  75. in LatticeIncrementalDeterminizer::clat_ is that the
  76. actual appended lattice has all its final-arcs replaced with
  77. final-probs, and we keep the real final-arcs "on the side" in a
  78. separate data structure. The final-probs in clat_ aren't
  79. necessarily related to the costs on the final-arcs; instead
  80. they can have arbitrary values passed in by the user (e.g.
  81. if we want to include final-probs). This means that the
  82. clat_ can be returned without modification to the user who wants
  83. a partially determinized result.
  84. final-arc: An arc in the canonical appended CompactLattice which
  85. goes to a final-state. These arcs will have `state-labels` as
  86. their labels.
  87. */
  88. struct LatticeIncrementalDecoderConfig {
  89. // All the configuration values until det_opts are the same as in
  90. // LatticeFasterDecoder. For clarity we repeat them rather than inheriting.
  91. BaseFloat beam;
  92. int32 max_active;
  93. int32 min_active;
  94. BaseFloat lattice_beam;
  95. int32 prune_interval;
  96. BaseFloat beam_delta; // has nothing to do with beam_ratio
  97. BaseFloat hash_ratio;
  98. BaseFloat prune_scale; // Note: we don't make this configurable on the command line,
  99. // it's not a very important parameter. It affects the
  100. // algorithm that prunes the tokens as we go.
  101. // Most of the options inside det_opts are not actually queried by the
  102. // LatticeIncrementalDecoder class itself, but by the code that calls it, for
  103. // example in the function DecodeUtteranceLatticeIncremental.
  104. fst::DeterminizeLatticePhonePrunedOptions det_opts;
  105. // The configuration values from this point on are specific to the
  106. // incremental determinization. See where they are registered for
  107. // explanation.
  108. // Caution: these are only inspected in UpdateLatticeDeterminization().
  109. // If you call
  110. int32 determinize_max_delay;
  111. int32 determinize_min_chunk_size;
  112. int32 determinize_max_active;
  113. LatticeIncrementalDecoderConfig()
  114. : beam(16.0),
  115. max_active(std::numeric_limits<int32>::max()),
  116. min_active(200),
  117. lattice_beam(10.0),
  118. prune_interval(25),
  119. beam_delta(0.5),
  120. hash_ratio(2.0),
  121. prune_scale(0.01),
  122. determinize_max_delay(60),
  123. determinize_min_chunk_size(20),
  124. determinize_max_active(200) {
  125. det_opts.minimize = false;
  126. }
  127. void Register(OptionsItf *opts) {
  128. det_opts.Register(opts);
  129. opts->Register("beam", &beam, "Decoding beam. Larger->slower, more accurate.");
  130. opts->Register("max-active", &max_active,
  131. "Decoder max active states. Larger->slower; "
  132. "more accurate");
  133. opts->Register("min-active", &min_active, "Decoder minimum #active states.");
  134. opts->Register("lattice-beam", &lattice_beam,
  135. "Lattice generation beam. Larger->slower, "
  136. "and deeper lattices");
  137. opts->Register("prune-interval", &prune_interval,
  138. "Interval (in frames) at "
  139. "which to prune tokens");
  140. opts->Register("beam-delta", &beam_delta,
  141. "Increment used in decoding-- this "
  142. "parameter is obscure and relates to a speedup in the way the "
  143. "max-active constraint is applied. Larger is more accurate.");
  144. opts->Register("hash-ratio", &hash_ratio,
  145. "Setting used in decoder to "
  146. "control hash behavior");
  147. opts->Register("determinize-max-delay", &determinize_max_delay,
  148. "Maximum frames of delay between decoding a frame and "
  149. "determinizing it");
  150. opts->Register("determinize-min-chunk-size", &determinize_min_chunk_size,
  151. "Minimum chunk size used in determinization");
  152. opts->Register("determinize-max-active", &determinize_max_active,
  153. "Maximum number of active tokens to update determinization");
  154. }
  155. void Check() const {
  156. if (!(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
  157. min_active <= max_active && prune_interval > 0 &&
  158. beam_delta > 0.0 && hash_ratio >= 1.0 &&
  159. prune_scale > 0.0 && prune_scale < 1.0 &&
  160. determinize_max_delay > determinize_min_chunk_size &&
  161. determinize_min_chunk_size > 0 &&
  162. determinize_max_active >= 0))
  163. KALDI_ERR << "Invalid options given to decoder";
  164. /* Minimization of the chunks is not compatible withour algorithm (or at
  165. least, would require additional complexity to implement.) */
  166. if (det_opts.minimize || !det_opts.word_determinize)
  167. KALDI_ERR << "Invalid determinization options given to decoder.";
  168. }
  169. };
  170. /**
  171. This class is used inside LatticeIncrementalDecoderTpl; it handles
  172. some of the details of incremental determinization.
  173. https://www.danielpovey.com/files/ *TBD*.pdf for the paper.
  174. */
  175. class LatticeIncrementalDeterminizer {
  176. public:
  177. using Label = typename LatticeArc::Label; /* Actualy the same labels appear
  178. in both lattice and compact
  179. lattice, so we don't use the
  180. specific type all the time but
  181. just say 'Label' */
  182. LatticeIncrementalDeterminizer(
  183. const TransitionInformation &trans_model,
  184. const LatticeIncrementalDecoderConfig &config):
  185. trans_model_(trans_model), config_(config) { }
  186. // Resets the lattice determinization data for new utterance
  187. void Init();
  188. // Returns the current determinized lattice.
  189. const CompactLattice &GetDeterminizedLattice() const { return clat_; }
  190. /**
  191. Starts the process of creating a raw lattice chunk. (Search the glossary
  192. for "raw lattice chunk"). This just sets up the initial states and
  193. redeterminized-states in the chunk. Relates to sec. 5.2 in the paper,
  194. specifically the initial-state i and the redeterminized-states.
  195. After calling this, the caller would add the remaining arcs and states
  196. to `olat` and then call AcceptRawLatticeChunk() with the result.
  197. @param [out] olat The lattice to be (partially) created
  198. @param [out] token_label2state This function outputs to here
  199. a map from `token-label` to the state we created for
  200. it in *olat. See glossary for `token-label`.
  201. The keys actually correspond to the .nextstate fields
  202. in the arcs in final_arcs_; values are states in `olat`.
  203. See the last bullet point before Sec. 5.3 in the paper.
  204. */
  205. void InitializeRawLatticeChunk(
  206. Lattice *olat,
  207. unordered_map<Label, LatticeArc::StateId> *token_label2state);
  208. /**
  209. This function accepts the raw FST (state-level lattice) corresponding to a
  210. single chunk of the lattice, determinizes it and appends it to this->clat_.
  211. Unless this was the
  212. Note: final-probs in `raw_fst` are treated specially: they are used to
  213. guide the pruned determinization, but when you call GetLattice() it will be
  214. -- except for pruning effects-- as if all nonzero final-probs in `raw_fst`
  215. were: One() if final_costs == NULL; else the value present in `final_costs`.
  216. @param [in] raw_fst (Consumed destructively). The input
  217. raw (state-level) lattice. Would correspond to the
  218. FST A in the paper if first_frame == 0, and B
  219. otherwise.
  220. @return returns false if determinization finished earlier than the beam
  221. or the determinized lattice was empty; true otherwise.
  222. NOTE: if this is not the final chunk, you will probably want to call
  223. SetFinalCosts() directly after calling this.
  224. */
  225. bool AcceptRawLatticeChunk(Lattice *raw_fst);
  226. /*
  227. Sets final-probs in `clat_`. Must only be called if the final chunk
  228. has not been processed. (The final chunk is whenever GetLattice() is
  229. called with finalize == true).
  230. The reason this is a separate function from AcceptRawLatticeChunk() is that
  231. there may be situations where a user wants to get the latice with
  232. final-probs in it, after previously getting it without final-probs; or
  233. vice versa. By final-probs, we mean the Final() probabilities in the
  234. HCLG (decoding graph; this->fst_).
  235. @param [in] token_label2final_cost A map from the token-label
  236. corresponding to Tokens active on the final frame of the
  237. lattice in the object, to the final-cost we want to use for
  238. those tokens. If NULL, it means all Tokens should be treated
  239. as final with probability One(). If non-NULL, and a particular
  240. token-label is not a key of this map, it means that Token
  241. corresponded to a state that was not final in HCLG; and
  242. such tokens will be treated as non-final. However,
  243. if this would result in no states in the lattice being final,
  244. we will treat all Tokens as final with probability One(),
  245. a warning will be printed (this should not happen.)
  246. */
  247. void SetFinalCosts(const unordered_map<Label, BaseFloat> *token_label2final_cost = NULL);
  248. const CompactLattice &GetLattice() { return clat_; }
  249. // kStateLabelOffset is what we add to state-ids in clat_ to produce labels
  250. // to identify them in the raw lattice chunk
  251. // kTokenLabelOffset is where we start allocating labels corresponding to Tokens
  252. // (these correspond with raw lattice states);
  253. enum { kStateLabelOffset = (int)1e8, kTokenLabelOffset = (int)2e8, kMaxTokenLabel = (int)3e8 };
  254. private:
  255. // [called from AcceptRawLatticeChunk()]
  256. // Gets the final costs from token-final states in the raw lattice (see
  257. // glossary for definition). These final costs will be subtracted after
  258. // determinization; in the normal case they are `temporaries` used to guide
  259. // pruning. NOTE: the index of the array is not the FST state that is final,
  260. // but the label on arcs entering it (these will be `token-labels`). Each
  261. // token-final state will have the same label on all arcs entering it.
  262. //
  263. // `old_final_costs` is assumed to be empty at entry.
  264. void GetRawLatticeFinalCosts(const Lattice &raw_fst,
  265. std::unordered_map<Label, BaseFloat> *old_final_costs);
  266. // Sets up non_final_redet_states_. See documentation for that variable.
  267. void GetNonFinalRedetStates();
  268. /** [called from AcceptRawLatticeChunk()] Processes arcs that leave the
  269. start-state of `chunk_clat` (if this is not the first chunk); does nothing
  270. if this is the first chunk. This includes using the `state-labels` to
  271. work out which states in clat_ these states correspond to, and writing
  272. that mapping to `state_map`.
  273. Also modifies forward_costs_, because it has to do a kind of reweighting
  274. of the clat states that are the values it puts in `state_map`, to take
  275. account of the probabilities on the arcs from the start state of
  276. chunk_clat to the states corresponding to those redeterminized-states
  277. (i.e. the states in clat corresponding to the values it puts in
  278. `*state_map`). It also modifies arcs_in_, mostly because there
  279. are rare cases when we end up `merging` sets of those redeterminized-states,
  280. because the determinization process mapped them to a single state,
  281. and that means we need to reroute the arcs into members of that
  282. set into one single member (which will appear as a value in
  283. `*state_map`).
  284. @param [in] chunk_clat The determinized chunk of lattice we are
  285. processing
  286. @param [out] state_map Mapping from states in chunk_clat to
  287. the state in clat_ they correspond to.
  288. @return Returns true if this is the first chunk.
  289. */
  290. bool ProcessArcsFromChunkStartState(
  291. const CompactLattice &chunk_clat,
  292. std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> *state_map);
  293. /**
  294. This function, called from AcceptRawLatticeChunk(), transfers arcs from
  295. `chunk_clat` to clat_. For those arcs that have `token-labels` on them,
  296. they don't get written to clat_ but instead are stored in the arcs_ array.
  297. @param [in] chunk_clat The determinized lattice for the chunk
  298. we are processing; this is the source of the arcs
  299. we are moving.
  300. @param [in] is_first_chunk True if this is the first chunk in the
  301. utterance; it's needed because if it is, we
  302. will also transfer arcs from the start state of
  303. chunk_clat.
  304. @param [in] state_map Map from state-ids in chunk_clat to state-ids
  305. in clat_.
  306. @param [in] chunk_state_to_token Map from `token-final states`
  307. (see glossary) in chunk_clat, to the token-label
  308. on arcs entering those states.
  309. @param [in] old_final_costs Map from token-label to the
  310. final-costs that were on the corresponding
  311. token-final states in the undeterminized lattice;
  312. these final-costs need to be removed when
  313. we record the weights in final_arcs_, because
  314. they were just temporary.
  315. */
  316. void TransferArcsToClat(
  317. const CompactLattice &chunk_clat,
  318. bool is_first_chunk,
  319. const std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> &state_map,
  320. const std::unordered_map<CompactLattice::StateId, Label> &chunk_state_to_token,
  321. const std::unordered_map<Label, BaseFloat> &old_final_costs);
  322. /**
  323. Adds one arc to `clat_`. It's like clat_.AddArc(state, arc), except
  324. it also modifies arcs_in_ and forward_costs_.
  325. */
  326. void AddArcToClat(CompactLattice::StateId state,
  327. const CompactLatticeArc &arc);
  328. CompactLattice::StateId AddStateToClat();
  329. // Identifies token-final states in `chunk_clat`; see glossary above for
  330. // definition of `token-final`. This function outputs a map from such states
  331. // in chunk_clat, to the `token-label` on arcs entering them. (It is not
  332. // possible that the same state would have multiple arcs entering it with
  333. // different token-labels, or some arcs entering with one token-label and some
  334. // another, or be both initial and have such arcs; this is true due to how we
  335. // construct the raw lattice.)
  336. void IdentifyTokenFinalStates(
  337. const CompactLattice &chunk_clat,
  338. std::unordered_map<CompactLattice::StateId, CompactLatticeArc::Label> *token_map) const;
  339. // trans_model_ is needed by DeterminizeLatticePhonePrunedWrapper() which this
  340. // class calls.
  341. const TransitionInformation &trans_model_;
  342. // config_ is needed by DeterminizeLatticePhonePrunedWrapper() which this
  343. // class calls.
  344. const LatticeIncrementalDecoderConfig &config_;
  345. // Contains the set of redeterminized-states which are not final in the
  346. // canonical appended lattice. Since the final ones don't physically appear
  347. // in clat_, this means the set of redeterminized-states which are physically
  348. // in clat_. In code terms, this means set of .first elements in final_arcs,
  349. // plus whatever other states in clat_ are reachable from such states.
  350. std::unordered_set<CompactLattice::StateId> non_final_redet_states_;
  351. // clat_ is the appended lattice (containing all chunks processed so
  352. // far), except its `final-arcs` (i.e. arcs which in the canonical
  353. // lattice would go to final-states) are not present (they are stored
  354. // separately in final_arcs_) and states which in the canonical lattice
  355. // should have final-arcs leaving them will instead have a final-prob.
  356. CompactLattice clat_;
  357. // arcs_in_ is indexed by (state-id in clat_), and is a list of
  358. // arcs that come into this state, in the form (prev-state,
  359. // arc-index). CAUTION: not all these input-arc records will always
  360. // be valid (some may be out-of-date, and may refer to an out-of-range
  361. // arc or an arc that does not point to this state). But all
  362. // input arcs will always be listed.
  363. std::vector<std::vector<std::pair<CompactLattice::StateId, int32> > > arcs_in_;
  364. // final_arcs_ contains arcs which would appear in the canonical appended
  365. // lattice but for implementation reasons are not physically present in clat_.
  366. // These are arcs to final states in the canonical appended lattice. The
  367. // .first elements are the source states in clat_ (these will all be elements
  368. // of non_final_redet_states_); the .nextstate elements of the arcs does not
  369. // contain a physical state, but contain state-labels allocated by
  370. // AllocateNewStateLabel().
  371. std::vector<CompactLatticeArc> final_arcs_;
  372. // forward_costs_, indexed by the state-id in clat_, stores the alpha
  373. // (forward) costs, i.e. the minimum cost from the start state to each state
  374. // in clat_. This is relevant for pruned determinization. The BaseFloat can
  375. // be thought of as the sum of a Value1() + Value2() in a LatticeWeight.
  376. std::vector<BaseFloat> forward_costs_;
  377. // temporary used in a function, kept here to avoid excessive reallocation.
  378. std::unordered_set<int32> temp_;
  379. KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeIncrementalDeterminizer);
  380. };
  381. /** This is an extention to the "normal" lattice-generating decoder.
  382. See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
  383. for more information.
  384. The main difference is the incremental determinization which will be
  385. discussed in the function GetLattice(). This means that the work of determinizatin
  386. isn't done all at once at the end of the file, but incrementally while decoding.
  387. See the comment at the top of this file for more explanation.
  388. The decoder is templated on the FST type and the token type. The token type
  389. will normally be StdToken, but also may be BackpointerToken which is to support
  390. quick lookup of the current best path (see lattice-faster-online-decoder.h)
  391. The FST you invoke this decoder with is expected to be of type
  392. Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst. If you invoke it with
  393. FST == StdFst and it notices that the actual FST type is
  394. fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
  395. will internally cast itself to one that is templated on those more specific
  396. types; this is an optimization for speed.
  397. */
  398. template <typename FST, typename Token = decoder::StdToken>
  399. class LatticeIncrementalDecoderTpl {
  400. public:
  401. using Arc = typename FST::Arc;
  402. using Label = typename Arc::Label;
  403. using StateId = typename Arc::StateId;
  404. using Weight = typename Arc::Weight;
  405. using ForwardLinkT = decoder::ForwardLink<Token>;
  406. // Instantiate this class once for each thing you have to decode.
  407. // This version of the constructor does not take ownership of
  408. // 'fst'.
  409. LatticeIncrementalDecoderTpl(const FST &fst, const TransitionInformation &trans_model,
  410. const LatticeIncrementalDecoderConfig &config);
  411. // This version of the constructor takes ownership of the fst, and will delete
  412. // it when this object is destroyed.
  413. LatticeIncrementalDecoderTpl(const LatticeIncrementalDecoderConfig &config,
  414. FST *fst, const TransitionInformation &trans_model);
  415. void SetOptions(const LatticeIncrementalDecoderConfig &config) { config_ = config; }
  416. const LatticeIncrementalDecoderConfig &GetOptions() const { return config_; }
  417. ~LatticeIncrementalDecoderTpl();
  418. /**
  419. CAUTION: it's unlikely that you will ever want to call this function. In a
  420. scenario where you have the entire file and just want to decode it, there
  421. is no point using this decoder.
  422. An example of how to do decoding together with incremental
  423. determinization. It decodes until there are no more frames left in the
  424. "decodable" object.
  425. In this example, config_.determinize_max_delay, config_.determinize_min_chunk_size
  426. and config_.determinize_max_active are used to determine the time to
  427. call GetLattice().
  428. Users will probably want to use appropriate combinations of
  429. AdvanceDecoding() and GetLattice() to build their application; this just
  430. gives you some idea how.
  431. The function returns true if any kind of traceback is available (not
  432. necessarily from a final state).
  433. */
  434. bool Decode(DecodableInterface *decodable);
  435. /// says whether a final-state was active on the last frame. If it was not,
  436. /// the lattice (or traceback) will end with states that are not final-states.
  437. bool ReachedFinal() const {
  438. return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
  439. }
  440. /**
  441. This decoder has no GetBestPath() function.
  442. If you need that functionality you should probably use lattice-incremental-online-decoder.h,
  443. which makes it very efficient to obtain the best path. */
  444. /**
  445. This GetLattice() function returns the lattice containing
  446. `num_frames_to_decode` frames; this will be all frames decoded so
  447. far, if you let num_frames_to_decode == NumFramesDecoded(),
  448. but it will generally be better to make it a few frames less than
  449. that to avoid the lattice having too many active states at
  450. the end.
  451. @param [in] num_frames_to_include The number of frames that you want
  452. to be included in the lattice. Must be >=
  453. NumFramesInLattice() and <= NumFramesDecoded().
  454. @param [in] use_final_probs True if you want the final-probs
  455. of HCLG to be included in the output lattice. Must not
  456. be set to true if num_frames_to_include !=
  457. NumFramesDecoded(). Must be set to true if you have
  458. previously called FinalizeDecoding().
  459. (If no state was final on frame `num_frames_to_include`, the
  460. final-probs won't be included regardless of
  461. `use_final_probs`; you can test whether this
  462. was the case by calling ReachedFinal().
  463. @return clat The CompactLattice representing what has been decoded
  464. up until `num_frames_to_include` (e.g., LatticeStateTimes()
  465. on this lattice would return `num_frames_to_include`).
  466. See also UpdateLatticeDeterminizaton(). Caution: this const ref
  467. is only valid until the next time you call AdvanceDecoding() or
  468. GetLattice().
  469. CAUTION: the lattice may contain disconnnected states; you should
  470. call Connect() on the output before writing it out.
  471. */
  472. const CompactLattice &GetLattice(int32 num_frames_to_include,
  473. bool use_final_probs = false);
  474. /*
  475. Returns the number of frames in the currently-determinized part of the
  476. lattice which will be a number in [0, NumFramesDecoded()]. It will
  477. be the largest number that GetLattice() was called with, but note
  478. that GetLattice() may be called from UpdateLatticeDeterminization().
  479. Made available in case the user wants to give that same number to
  480. GetLattice().
  481. */
  482. int NumFramesInLattice() const { return num_frames_in_lattice_; }
  483. /**
  484. InitDecoding initializes the decoding, and should only be used if you
  485. intend to call AdvanceDecoding(). If you call Decode(), you don't need to
  486. call this. You can also call InitDecoding if you have already decoded an
  487. utterance and want to start with a new utterance.
  488. */
  489. void InitDecoding();
  490. /**
  491. This will decode until there are no more frames ready in the decodable
  492. object. You can keep calling it each time more frames become available
  493. (this is the normal pattern in a real-time/online decoding scenario).
  494. If max_num_frames is specified, it specifies the maximum number of frames
  495. the function will decode before returning.
  496. */
  497. void AdvanceDecoding(DecodableInterface *decodable, int32 max_num_frames = -1);
  498. /** FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
  499. more information. It returns the difference between the best (final-cost
  500. plus cost) of any token on the final frame, and the best cost of any token
  501. on the final frame. If it is infinity it means no final-states were
  502. present on the final frame. It will usually be nonnegative. If it not
  503. too positive (e.g. < 5 is my first guess, but this is not tested) you can
  504. take it as a good indication that we reached the final-state with
  505. reasonable likelihood. */
  506. BaseFloat FinalRelativeCost() const;
  507. /** Returns the number of frames decoded so far. */
  508. inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
  509. /**
  510. Finalizes the decoding, doing an extra pruning step on the last frame
  511. that uses the final-probs. May be called only once.
  512. */
  513. void FinalizeDecoding();
  514. protected:
  515. /* Some protected things are needed in LatticeIncrementalOnlineDecoderTpl. */
  516. /** NOTE: for parts the internal implementation that are shared with LatticeFasterDecoer,
  517. we have removed the comments.*/
  518. inline static void DeleteForwardLinks(Token *tok);
  519. struct TokenList {
  520. Token *toks;
  521. bool must_prune_forward_links;
  522. bool must_prune_tokens;
  523. int32 num_toks; /* Note: you can only trust `num_toks` if must_prune_tokens
  524. * == false, because it is only set in
  525. * PruneTokensForFrame(). */
  526. TokenList()
  527. : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true),
  528. num_toks(-1) {}
  529. };
  530. using Elem = typename HashList<StateId, Token *>::Elem;
  531. void PossiblyResizeHash(size_t num_toks);
  532. inline Token *FindOrAddToken(StateId state, int32 frame_plus_one,
  533. BaseFloat tot_cost, Token *backpointer, bool *changed);
  534. void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
  535. bool *links_pruned, BaseFloat delta);
  536. void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
  537. BaseFloat *final_relative_cost,
  538. BaseFloat *final_best_cost) const;
  539. void PruneForwardLinksFinal();
  540. void PruneTokensForFrame(int32 frame_plus_one);
  541. void PruneActiveTokens(BaseFloat delta);
  542. BaseFloat GetCutoff(Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
  543. Elem **best_elem);
  544. BaseFloat ProcessEmitting(DecodableInterface *decodable);
  545. void ProcessNonemitting(BaseFloat cost_cutoff);
  546. HashList<StateId, Token *> toks_;
  547. std::vector<TokenList> active_toks_; // indexed by frame.
  548. std::vector<StateId> queue_; // temp variable used in ProcessNonemitting,
  549. std::vector<BaseFloat> tmp_array_; // used in GetCutoff.
  550. const FST *fst_;
  551. bool delete_fst_;
  552. std::vector<BaseFloat> cost_offsets_;
  553. int32 num_toks_;
  554. bool warned_;
  555. bool decoding_finalized_;
  556. unordered_map<Token *, BaseFloat> final_costs_;
  557. BaseFloat final_relative_cost_;
  558. BaseFloat final_best_cost_;
  559. /***********************
  560. Variables below this point relate to the incremental
  561. determinization.
  562. *********************/
  563. LatticeIncrementalDecoderConfig config_;
  564. /** Much of the the incremental determinization algorithm is encapsulated in
  565. the determinize_ object. */
  566. LatticeIncrementalDeterminizer determinizer_;
  567. /* Just a temporary used in a function; stored here to avoid reallocation. */
  568. unordered_map<Token*, StateId> temp_token_map_;
  569. /** num_frames_in_lattice_ is the highest `num_frames_to_include_` argument
  570. for any prior call to GetLattice(). */
  571. int32 num_frames_in_lattice_;
  572. // A map from Token to its token_label. Will contain an entry for
  573. // each Token in active_toks_[num_frames_in_lattice_].
  574. unordered_map<Token*, Label> token2label_map_;
  575. // A temporary used in a function, kept here to avoid reallocation.
  576. unordered_map<Token*, Label> token2label_map_temp_;
  577. // we allocate a unique id for each Token
  578. Label next_token_label_;
  579. inline Label AllocateNewTokenLabel() { return next_token_label_++; }
  580. // There are various cleanup tasks... the the toks_ structure contains
  581. // singly linked lists of Token pointers, where Elem is the list type.
  582. // It also indexes them in a hash, indexed by state (this hash is only
  583. // maintained for the most recent frame). toks_.Clear()
  584. // deletes them from the hash and returns the list of Elems. The
  585. // function DeleteElems calls toks_.Delete(elem) for each elem in
  586. // the list, which returns ownership of the Elem to the toks_ structure
  587. // for reuse, but does not delete the Token pointer. The Token pointers
  588. // are reference-counted and are ultimately deleted in PruneTokensForFrame,
  589. // but are also linked together on each frame by their own linked-list,
  590. // using the "next" pointer. We delete them manually.
  591. void DeleteElems(Elem *list);
  592. void ClearActiveTokens();
  593. // Returns the number of active tokens on frame `frame`. Can be used as part
  594. // of a heuristic to decide which frame to determinize until, if you are not
  595. // at the end of an utterance.
  596. int32 GetNumToksForFrame(int32 frame);
  597. /**
  598. UpdateLatticeDeterminization() ensures the work of determinization is kept
  599. up to date so that when you do need the lattice you can get it fast. It
  600. uses the configuration values `determinize_max_delay`, `determinize_min_chunk_size`
  601. and `determinize_max_active`, to decide whether and when to call
  602. GetLattice(). You can safely call this as often as you want (e.g. after
  603. each time you call AdvanceDecoding(); it won't do subtantially more work if
  604. it is called frequently.
  605. */
  606. void UpdateLatticeDeterminization();
  607. KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeIncrementalDecoderTpl);
  608. };
  609. typedef LatticeIncrementalDecoderTpl<fst::StdFst, decoder::StdToken>
  610. LatticeIncrementalDecoder;
  611. } // end namespace kaldi.
  612. #endif