| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734 |
- // decoder/lattice-incremental-decoder.h
- // Copyright 2019 Zhehuai Chen, Daniel Povey
- // See ../../COPYING for clarification regarding multiple authors
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
- // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
- // MERCHANTABLITY OR NON-INFRINGEMENT.
- // See the Apache 2 License for the specific language governing permissions and
- // limitations under the License.
- #ifndef KALDI_DECODER_LATTICE_INCREMENTAL_DECODER_H_
- #define KALDI_DECODER_LATTICE_INCREMENTAL_DECODER_H_
- #include "util/stl-utils.h"
- #include "util/hash-list.h"
- #include "fst/fstlib.h"
- #include "itf/decodable-itf.h"
- #include "fstext/fstext-lib.h"
- #include "lat/determinize-lattice-pruned.h"
- #include "lat/kaldi-lattice.h"
- #include "decoder/grammar-fst.h"
- #include "decoder/lattice-faster-decoder.h"
- namespace kaldi {
- /**
- The normal decoder, lattice-faster-decoder.h, sometimes has an issue when
- doing real-time applications with long utterances, that each time you get the
- lattice the lattice determinization can take a considerable amount of time;
- this introduces latency. This version of the decoder spreads the work of
- lattice determinization out throughout the decoding process.
- NOTE:
- Please see https://www.danielpovey.com/files/ *TBD* .pdf for a technical
- explanation of what is going on here.
- GLOSSARY OF TERMS:
- chunk: We do the determinization on chunks of frames; these
- may coincide with the chunks on which the user calls
- AdvanceDecoding(). The basic idea is to extract chunks
- of the raw lattice and determinize them individually, but
- it gets much more complicated than that. The chunks
- should normally be at least as long as a word (let's say,
- at least 20 frames), or the overhead of this algorithm
- might become excessive and affect RTF.
- raw lattice chunk: A chunk of raw (i.e. undeterminized) lattice
- that we will determinize. In the paper this corresponds
- to the FST B that is described in Section 5.2.
- token_label, state_label / token-label, state-label:
- In the paper these are both referred to as `state labels` (these are
- special, large integer id's that refer to states in the undeterminized
- lattice and in the the determinized lattice); but we use two separate
- terms here, for more clarity, when referring to the undeterminized
- vs. determinized lattice.
- token_label conceptually refers to states in the
- raw lattice, but we don't materialize the entire
- raw lattice as a physical FST and and these tokens
- are actually tokens (template type Token) held by
- the decoder
- state_label when used in this code refers specifically
- to labels that identify states in the determinized
- lattice (i.e. state indexes in lat_).
- token-final state
- A state in a raw lattice or in a determinized chunk that has an arc
- entering it that has a `token-label` on it (as defined above).
- These states will have nonzero final-probs.
- redeterminized-non-splice-state, aka ns_redet:
- A redeterminized state which is not also a splice state;
- refer to the paper for explanation. In the already-determinized
- part this means a redeterminized state which is not final.
- canonical appended lattice: This is the appended compact lattice
- that we conceptually have (i.e. what we described in the paper).
- The difference from the "actual appended lattice" stored
- in LatticeIncrementalDeterminizer::clat_ is that the
- actual appended lattice has all its final-arcs replaced with
- final-probs, and we keep the real final-arcs "on the side" in a
- separate data structure. The final-probs in clat_ aren't
- necessarily related to the costs on the final-arcs; instead
- they can have arbitrary values passed in by the user (e.g.
- if we want to include final-probs). This means that the
- clat_ can be returned without modification to the user who wants
- a partially determinized result.
- final-arc: An arc in the canonical appended CompactLattice which
- goes to a final-state. These arcs will have `state-labels` as
- their labels.
- */
- struct LatticeIncrementalDecoderConfig {
- // All the configuration values until det_opts are the same as in
- // LatticeFasterDecoder. For clarity we repeat them rather than inheriting.
- BaseFloat beam;
- int32 max_active;
- int32 min_active;
- BaseFloat lattice_beam;
- int32 prune_interval;
- BaseFloat beam_delta; // has nothing to do with beam_ratio
- BaseFloat hash_ratio;
- BaseFloat prune_scale; // Note: we don't make this configurable on the command line,
- // it's not a very important parameter. It affects the
- // algorithm that prunes the tokens as we go.
- // Most of the options inside det_opts are not actually queried by the
- // LatticeIncrementalDecoder class itself, but by the code that calls it, for
- // example in the function DecodeUtteranceLatticeIncremental.
- fst::DeterminizeLatticePhonePrunedOptions det_opts;
- // The configuration values from this point on are specific to the
- // incremental determinization. See where they are registered for
- // explanation.
- // Caution: these are only inspected in UpdateLatticeDeterminization().
- // If you call
- int32 determinize_max_delay;
- int32 determinize_min_chunk_size;
- int32 determinize_max_active;
- LatticeIncrementalDecoderConfig()
- : beam(16.0),
- max_active(std::numeric_limits<int32>::max()),
- min_active(200),
- lattice_beam(10.0),
- prune_interval(25),
- beam_delta(0.5),
- hash_ratio(2.0),
- prune_scale(0.01),
- determinize_max_delay(60),
- determinize_min_chunk_size(20),
- determinize_max_active(200) {
- det_opts.minimize = false;
- }
- void Register(OptionsItf *opts) {
- det_opts.Register(opts);
- opts->Register("beam", &beam, "Decoding beam. Larger->slower, more accurate.");
- opts->Register("max-active", &max_active,
- "Decoder max active states. Larger->slower; "
- "more accurate");
- opts->Register("min-active", &min_active, "Decoder minimum #active states.");
- opts->Register("lattice-beam", &lattice_beam,
- "Lattice generation beam. Larger->slower, "
- "and deeper lattices");
- opts->Register("prune-interval", &prune_interval,
- "Interval (in frames) at "
- "which to prune tokens");
- opts->Register("beam-delta", &beam_delta,
- "Increment used in decoding-- this "
- "parameter is obscure and relates to a speedup in the way the "
- "max-active constraint is applied. Larger is more accurate.");
- opts->Register("hash-ratio", &hash_ratio,
- "Setting used in decoder to "
- "control hash behavior");
- opts->Register("determinize-max-delay", &determinize_max_delay,
- "Maximum frames of delay between decoding a frame and "
- "determinizing it");
- opts->Register("determinize-min-chunk-size", &determinize_min_chunk_size,
- "Minimum chunk size used in determinization");
- opts->Register("determinize-max-active", &determinize_max_active,
- "Maximum number of active tokens to update determinization");
- }
- void Check() const {
- if (!(beam > 0.0 && max_active > 1 && lattice_beam > 0.0 &&
- min_active <= max_active && prune_interval > 0 &&
- beam_delta > 0.0 && hash_ratio >= 1.0 &&
- prune_scale > 0.0 && prune_scale < 1.0 &&
- determinize_max_delay > determinize_min_chunk_size &&
- determinize_min_chunk_size > 0 &&
- determinize_max_active >= 0))
- KALDI_ERR << "Invalid options given to decoder";
- /* Minimization of the chunks is not compatible withour algorithm (or at
- least, would require additional complexity to implement.) */
- if (det_opts.minimize || !det_opts.word_determinize)
- KALDI_ERR << "Invalid determinization options given to decoder.";
- }
- };
- /**
- This class is used inside LatticeIncrementalDecoderTpl; it handles
- some of the details of incremental determinization.
- https://www.danielpovey.com/files/ *TBD*.pdf for the paper.
- */
- class LatticeIncrementalDeterminizer {
- public:
- using Label = typename LatticeArc::Label; /* Actualy the same labels appear
- in both lattice and compact
- lattice, so we don't use the
- specific type all the time but
- just say 'Label' */
- LatticeIncrementalDeterminizer(
- const TransitionInformation &trans_model,
- const LatticeIncrementalDecoderConfig &config):
- trans_model_(trans_model), config_(config) { }
- // Resets the lattice determinization data for new utterance
- void Init();
- // Returns the current determinized lattice.
- const CompactLattice &GetDeterminizedLattice() const { return clat_; }
- /**
- Starts the process of creating a raw lattice chunk. (Search the glossary
- for "raw lattice chunk"). This just sets up the initial states and
- redeterminized-states in the chunk. Relates to sec. 5.2 in the paper,
- specifically the initial-state i and the redeterminized-states.
- After calling this, the caller would add the remaining arcs and states
- to `olat` and then call AcceptRawLatticeChunk() with the result.
- @param [out] olat The lattice to be (partially) created
- @param [out] token_label2state This function outputs to here
- a map from `token-label` to the state we created for
- it in *olat. See glossary for `token-label`.
- The keys actually correspond to the .nextstate fields
- in the arcs in final_arcs_; values are states in `olat`.
- See the last bullet point before Sec. 5.3 in the paper.
- */
- void InitializeRawLatticeChunk(
- Lattice *olat,
- unordered_map<Label, LatticeArc::StateId> *token_label2state);
- /**
- This function accepts the raw FST (state-level lattice) corresponding to a
- single chunk of the lattice, determinizes it and appends it to this->clat_.
- Unless this was the
- Note: final-probs in `raw_fst` are treated specially: they are used to
- guide the pruned determinization, but when you call GetLattice() it will be
- -- except for pruning effects-- as if all nonzero final-probs in `raw_fst`
- were: One() if final_costs == NULL; else the value present in `final_costs`.
- @param [in] raw_fst (Consumed destructively). The input
- raw (state-level) lattice. Would correspond to the
- FST A in the paper if first_frame == 0, and B
- otherwise.
- @return returns false if determinization finished earlier than the beam
- or the determinized lattice was empty; true otherwise.
- NOTE: if this is not the final chunk, you will probably want to call
- SetFinalCosts() directly after calling this.
- */
- bool AcceptRawLatticeChunk(Lattice *raw_fst);
- /*
- Sets final-probs in `clat_`. Must only be called if the final chunk
- has not been processed. (The final chunk is whenever GetLattice() is
- called with finalize == true).
- The reason this is a separate function from AcceptRawLatticeChunk() is that
- there may be situations where a user wants to get the latice with
- final-probs in it, after previously getting it without final-probs; or
- vice versa. By final-probs, we mean the Final() probabilities in the
- HCLG (decoding graph; this->fst_).
- @param [in] token_label2final_cost A map from the token-label
- corresponding to Tokens active on the final frame of the
- lattice in the object, to the final-cost we want to use for
- those tokens. If NULL, it means all Tokens should be treated
- as final with probability One(). If non-NULL, and a particular
- token-label is not a key of this map, it means that Token
- corresponded to a state that was not final in HCLG; and
- such tokens will be treated as non-final. However,
- if this would result in no states in the lattice being final,
- we will treat all Tokens as final with probability One(),
- a warning will be printed (this should not happen.)
- */
- void SetFinalCosts(const unordered_map<Label, BaseFloat> *token_label2final_cost = NULL);
- const CompactLattice &GetLattice() { return clat_; }
- // kStateLabelOffset is what we add to state-ids in clat_ to produce labels
- // to identify them in the raw lattice chunk
- // kTokenLabelOffset is where we start allocating labels corresponding to Tokens
- // (these correspond with raw lattice states);
- enum { kStateLabelOffset = (int)1e8, kTokenLabelOffset = (int)2e8, kMaxTokenLabel = (int)3e8 };
- private:
- // [called from AcceptRawLatticeChunk()]
- // Gets the final costs from token-final states in the raw lattice (see
- // glossary for definition). These final costs will be subtracted after
- // determinization; in the normal case they are `temporaries` used to guide
- // pruning. NOTE: the index of the array is not the FST state that is final,
- // but the label on arcs entering it (these will be `token-labels`). Each
- // token-final state will have the same label on all arcs entering it.
- //
- // `old_final_costs` is assumed to be empty at entry.
- void GetRawLatticeFinalCosts(const Lattice &raw_fst,
- std::unordered_map<Label, BaseFloat> *old_final_costs);
- // Sets up non_final_redet_states_. See documentation for that variable.
- void GetNonFinalRedetStates();
- /** [called from AcceptRawLatticeChunk()] Processes arcs that leave the
- start-state of `chunk_clat` (if this is not the first chunk); does nothing
- if this is the first chunk. This includes using the `state-labels` to
- work out which states in clat_ these states correspond to, and writing
- that mapping to `state_map`.
- Also modifies forward_costs_, because it has to do a kind of reweighting
- of the clat states that are the values it puts in `state_map`, to take
- account of the probabilities on the arcs from the start state of
- chunk_clat to the states corresponding to those redeterminized-states
- (i.e. the states in clat corresponding to the values it puts in
- `*state_map`). It also modifies arcs_in_, mostly because there
- are rare cases when we end up `merging` sets of those redeterminized-states,
- because the determinization process mapped them to a single state,
- and that means we need to reroute the arcs into members of that
- set into one single member (which will appear as a value in
- `*state_map`).
- @param [in] chunk_clat The determinized chunk of lattice we are
- processing
- @param [out] state_map Mapping from states in chunk_clat to
- the state in clat_ they correspond to.
- @return Returns true if this is the first chunk.
- */
- bool ProcessArcsFromChunkStartState(
- const CompactLattice &chunk_clat,
- std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> *state_map);
- /**
- This function, called from AcceptRawLatticeChunk(), transfers arcs from
- `chunk_clat` to clat_. For those arcs that have `token-labels` on them,
- they don't get written to clat_ but instead are stored in the arcs_ array.
- @param [in] chunk_clat The determinized lattice for the chunk
- we are processing; this is the source of the arcs
- we are moving.
- @param [in] is_first_chunk True if this is the first chunk in the
- utterance; it's needed because if it is, we
- will also transfer arcs from the start state of
- chunk_clat.
- @param [in] state_map Map from state-ids in chunk_clat to state-ids
- in clat_.
- @param [in] chunk_state_to_token Map from `token-final states`
- (see glossary) in chunk_clat, to the token-label
- on arcs entering those states.
- @param [in] old_final_costs Map from token-label to the
- final-costs that were on the corresponding
- token-final states in the undeterminized lattice;
- these final-costs need to be removed when
- we record the weights in final_arcs_, because
- they were just temporary.
- */
- void TransferArcsToClat(
- const CompactLattice &chunk_clat,
- bool is_first_chunk,
- const std::unordered_map<CompactLattice::StateId, CompactLattice::StateId> &state_map,
- const std::unordered_map<CompactLattice::StateId, Label> &chunk_state_to_token,
- const std::unordered_map<Label, BaseFloat> &old_final_costs);
- /**
- Adds one arc to `clat_`. It's like clat_.AddArc(state, arc), except
- it also modifies arcs_in_ and forward_costs_.
- */
- void AddArcToClat(CompactLattice::StateId state,
- const CompactLatticeArc &arc);
- CompactLattice::StateId AddStateToClat();
- // Identifies token-final states in `chunk_clat`; see glossary above for
- // definition of `token-final`. This function outputs a map from such states
- // in chunk_clat, to the `token-label` on arcs entering them. (It is not
- // possible that the same state would have multiple arcs entering it with
- // different token-labels, or some arcs entering with one token-label and some
- // another, or be both initial and have such arcs; this is true due to how we
- // construct the raw lattice.)
- void IdentifyTokenFinalStates(
- const CompactLattice &chunk_clat,
- std::unordered_map<CompactLattice::StateId, CompactLatticeArc::Label> *token_map) const;
- // trans_model_ is needed by DeterminizeLatticePhonePrunedWrapper() which this
- // class calls.
- const TransitionInformation &trans_model_;
- // config_ is needed by DeterminizeLatticePhonePrunedWrapper() which this
- // class calls.
- const LatticeIncrementalDecoderConfig &config_;
- // Contains the set of redeterminized-states which are not final in the
- // canonical appended lattice. Since the final ones don't physically appear
- // in clat_, this means the set of redeterminized-states which are physically
- // in clat_. In code terms, this means set of .first elements in final_arcs,
- // plus whatever other states in clat_ are reachable from such states.
- std::unordered_set<CompactLattice::StateId> non_final_redet_states_;
- // clat_ is the appended lattice (containing all chunks processed so
- // far), except its `final-arcs` (i.e. arcs which in the canonical
- // lattice would go to final-states) are not present (they are stored
- // separately in final_arcs_) and states which in the canonical lattice
- // should have final-arcs leaving them will instead have a final-prob.
- CompactLattice clat_;
- // arcs_in_ is indexed by (state-id in clat_), and is a list of
- // arcs that come into this state, in the form (prev-state,
- // arc-index). CAUTION: not all these input-arc records will always
- // be valid (some may be out-of-date, and may refer to an out-of-range
- // arc or an arc that does not point to this state). But all
- // input arcs will always be listed.
- std::vector<std::vector<std::pair<CompactLattice::StateId, int32> > > arcs_in_;
- // final_arcs_ contains arcs which would appear in the canonical appended
- // lattice but for implementation reasons are not physically present in clat_.
- // These are arcs to final states in the canonical appended lattice. The
- // .first elements are the source states in clat_ (these will all be elements
- // of non_final_redet_states_); the .nextstate elements of the arcs does not
- // contain a physical state, but contain state-labels allocated by
- // AllocateNewStateLabel().
- std::vector<CompactLatticeArc> final_arcs_;
- // forward_costs_, indexed by the state-id in clat_, stores the alpha
- // (forward) costs, i.e. the minimum cost from the start state to each state
- // in clat_. This is relevant for pruned determinization. The BaseFloat can
- // be thought of as the sum of a Value1() + Value2() in a LatticeWeight.
- std::vector<BaseFloat> forward_costs_;
- // temporary used in a function, kept here to avoid excessive reallocation.
- std::unordered_set<int32> temp_;
- KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeIncrementalDeterminizer);
- };
- /** This is an extention to the "normal" lattice-generating decoder.
- See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
- for more information.
- The main difference is the incremental determinization which will be
- discussed in the function GetLattice(). This means that the work of determinizatin
- isn't done all at once at the end of the file, but incrementally while decoding.
- See the comment at the top of this file for more explanation.
- The decoder is templated on the FST type and the token type. The token type
- will normally be StdToken, but also may be BackpointerToken which is to support
- quick lookup of the current best path (see lattice-faster-online-decoder.h)
- The FST you invoke this decoder with is expected to be of type
- Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst. If you invoke it with
- FST == StdFst and it notices that the actual FST type is
- fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
- will internally cast itself to one that is templated on those more specific
- types; this is an optimization for speed.
- */
- template <typename FST, typename Token = decoder::StdToken>
- class LatticeIncrementalDecoderTpl {
- public:
- using Arc = typename FST::Arc;
- using Label = typename Arc::Label;
- using StateId = typename Arc::StateId;
- using Weight = typename Arc::Weight;
- using ForwardLinkT = decoder::ForwardLink<Token>;
- // Instantiate this class once for each thing you have to decode.
- // This version of the constructor does not take ownership of
- // 'fst'.
- LatticeIncrementalDecoderTpl(const FST &fst, const TransitionInformation &trans_model,
- const LatticeIncrementalDecoderConfig &config);
- // This version of the constructor takes ownership of the fst, and will delete
- // it when this object is destroyed.
- LatticeIncrementalDecoderTpl(const LatticeIncrementalDecoderConfig &config,
- FST *fst, const TransitionInformation &trans_model);
- void SetOptions(const LatticeIncrementalDecoderConfig &config) { config_ = config; }
- const LatticeIncrementalDecoderConfig &GetOptions() const { return config_; }
- ~LatticeIncrementalDecoderTpl();
- /**
- CAUTION: it's unlikely that you will ever want to call this function. In a
- scenario where you have the entire file and just want to decode it, there
- is no point using this decoder.
- An example of how to do decoding together with incremental
- determinization. It decodes until there are no more frames left in the
- "decodable" object.
- In this example, config_.determinize_max_delay, config_.determinize_min_chunk_size
- and config_.determinize_max_active are used to determine the time to
- call GetLattice().
- Users will probably want to use appropriate combinations of
- AdvanceDecoding() and GetLattice() to build their application; this just
- gives you some idea how.
- The function returns true if any kind of traceback is available (not
- necessarily from a final state).
- */
- bool Decode(DecodableInterface *decodable);
- /// says whether a final-state was active on the last frame. If it was not,
- /// the lattice (or traceback) will end with states that are not final-states.
- bool ReachedFinal() const {
- return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
- }
- /**
- This decoder has no GetBestPath() function.
- If you need that functionality you should probably use lattice-incremental-online-decoder.h,
- which makes it very efficient to obtain the best path. */
- /**
- This GetLattice() function returns the lattice containing
- `num_frames_to_decode` frames; this will be all frames decoded so
- far, if you let num_frames_to_decode == NumFramesDecoded(),
- but it will generally be better to make it a few frames less than
- that to avoid the lattice having too many active states at
- the end.
- @param [in] num_frames_to_include The number of frames that you want
- to be included in the lattice. Must be >=
- NumFramesInLattice() and <= NumFramesDecoded().
- @param [in] use_final_probs True if you want the final-probs
- of HCLG to be included in the output lattice. Must not
- be set to true if num_frames_to_include !=
- NumFramesDecoded(). Must be set to true if you have
- previously called FinalizeDecoding().
- (If no state was final on frame `num_frames_to_include`, the
- final-probs won't be included regardless of
- `use_final_probs`; you can test whether this
- was the case by calling ReachedFinal().
- @return clat The CompactLattice representing what has been decoded
- up until `num_frames_to_include` (e.g., LatticeStateTimes()
- on this lattice would return `num_frames_to_include`).
- See also UpdateLatticeDeterminizaton(). Caution: this const ref
- is only valid until the next time you call AdvanceDecoding() or
- GetLattice().
- CAUTION: the lattice may contain disconnnected states; you should
- call Connect() on the output before writing it out.
- */
- const CompactLattice &GetLattice(int32 num_frames_to_include,
- bool use_final_probs = false);
- /*
- Returns the number of frames in the currently-determinized part of the
- lattice which will be a number in [0, NumFramesDecoded()]. It will
- be the largest number that GetLattice() was called with, but note
- that GetLattice() may be called from UpdateLatticeDeterminization().
- Made available in case the user wants to give that same number to
- GetLattice().
- */
- int NumFramesInLattice() const { return num_frames_in_lattice_; }
- /**
- InitDecoding initializes the decoding, and should only be used if you
- intend to call AdvanceDecoding(). If you call Decode(), you don't need to
- call this. You can also call InitDecoding if you have already decoded an
- utterance and want to start with a new utterance.
- */
- void InitDecoding();
- /**
- This will decode until there are no more frames ready in the decodable
- object. You can keep calling it each time more frames become available
- (this is the normal pattern in a real-time/online decoding scenario).
- If max_num_frames is specified, it specifies the maximum number of frames
- the function will decode before returning.
- */
- void AdvanceDecoding(DecodableInterface *decodable, int32 max_num_frames = -1);
- /** FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
- more information. It returns the difference between the best (final-cost
- plus cost) of any token on the final frame, and the best cost of any token
- on the final frame. If it is infinity it means no final-states were
- present on the final frame. It will usually be nonnegative. If it not
- too positive (e.g. < 5 is my first guess, but this is not tested) you can
- take it as a good indication that we reached the final-state with
- reasonable likelihood. */
- BaseFloat FinalRelativeCost() const;
- /** Returns the number of frames decoded so far. */
- inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }
- /**
- Finalizes the decoding, doing an extra pruning step on the last frame
- that uses the final-probs. May be called only once.
- */
- void FinalizeDecoding();
- protected:
- /* Some protected things are needed in LatticeIncrementalOnlineDecoderTpl. */
- /** NOTE: for parts the internal implementation that are shared with LatticeFasterDecoer,
- we have removed the comments.*/
- inline static void DeleteForwardLinks(Token *tok);
- struct TokenList {
- Token *toks;
- bool must_prune_forward_links;
- bool must_prune_tokens;
- int32 num_toks; /* Note: you can only trust `num_toks` if must_prune_tokens
- * == false, because it is only set in
- * PruneTokensForFrame(). */
- TokenList()
- : toks(NULL), must_prune_forward_links(true), must_prune_tokens(true),
- num_toks(-1) {}
- };
- using Elem = typename HashList<StateId, Token *>::Elem;
- void PossiblyResizeHash(size_t num_toks);
- inline Token *FindOrAddToken(StateId state, int32 frame_plus_one,
- BaseFloat tot_cost, Token *backpointer, bool *changed);
- void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
- bool *links_pruned, BaseFloat delta);
- void ComputeFinalCosts(unordered_map<Token *, BaseFloat> *final_costs,
- BaseFloat *final_relative_cost,
- BaseFloat *final_best_cost) const;
- void PruneForwardLinksFinal();
- void PruneTokensForFrame(int32 frame_plus_one);
- void PruneActiveTokens(BaseFloat delta);
- BaseFloat GetCutoff(Elem *list_head, size_t *tok_count, BaseFloat *adaptive_beam,
- Elem **best_elem);
- BaseFloat ProcessEmitting(DecodableInterface *decodable);
- void ProcessNonemitting(BaseFloat cost_cutoff);
- HashList<StateId, Token *> toks_;
- std::vector<TokenList> active_toks_; // indexed by frame.
- std::vector<StateId> queue_; // temp variable used in ProcessNonemitting,
- std::vector<BaseFloat> tmp_array_; // used in GetCutoff.
- const FST *fst_;
- bool delete_fst_;
- std::vector<BaseFloat> cost_offsets_;
- int32 num_toks_;
- bool warned_;
- bool decoding_finalized_;
- unordered_map<Token *, BaseFloat> final_costs_;
- BaseFloat final_relative_cost_;
- BaseFloat final_best_cost_;
- /***********************
- Variables below this point relate to the incremental
- determinization.
- *********************/
- LatticeIncrementalDecoderConfig config_;
- /** Much of the the incremental determinization algorithm is encapsulated in
- the determinize_ object. */
- LatticeIncrementalDeterminizer determinizer_;
- /* Just a temporary used in a function; stored here to avoid reallocation. */
- unordered_map<Token*, StateId> temp_token_map_;
- /** num_frames_in_lattice_ is the highest `num_frames_to_include_` argument
- for any prior call to GetLattice(). */
- int32 num_frames_in_lattice_;
- // A map from Token to its token_label. Will contain an entry for
- // each Token in active_toks_[num_frames_in_lattice_].
- unordered_map<Token*, Label> token2label_map_;
- // A temporary used in a function, kept here to avoid reallocation.
- unordered_map<Token*, Label> token2label_map_temp_;
- // we allocate a unique id for each Token
- Label next_token_label_;
- inline Label AllocateNewTokenLabel() { return next_token_label_++; }
- // There are various cleanup tasks... the the toks_ structure contains
- // singly linked lists of Token pointers, where Elem is the list type.
- // It also indexes them in a hash, indexed by state (this hash is only
- // maintained for the most recent frame). toks_.Clear()
- // deletes them from the hash and returns the list of Elems. The
- // function DeleteElems calls toks_.Delete(elem) for each elem in
- // the list, which returns ownership of the Elem to the toks_ structure
- // for reuse, but does not delete the Token pointer. The Token pointers
- // are reference-counted and are ultimately deleted in PruneTokensForFrame,
- // but are also linked together on each frame by their own linked-list,
- // using the "next" pointer. We delete them manually.
- void DeleteElems(Elem *list);
- void ClearActiveTokens();
- // Returns the number of active tokens on frame `frame`. Can be used as part
- // of a heuristic to decide which frame to determinize until, if you are not
- // at the end of an utterance.
- int32 GetNumToksForFrame(int32 frame);
- /**
- UpdateLatticeDeterminization() ensures the work of determinization is kept
- up to date so that when you do need the lattice you can get it fast. It
- uses the configuration values `determinize_max_delay`, `determinize_min_chunk_size`
- and `determinize_max_active`, to decide whether and when to call
- GetLattice(). You can safely call this as often as you want (e.g. after
- each time you call AdvanceDecoding(); it won't do subtantially more work if
- it is called frequently.
- */
- void UpdateLatticeDeterminization();
- KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeIncrementalDecoderTpl);
- };
- typedef LatticeIncrementalDecoderTpl<fst::StdFst, decoder::StdToken>
- LatticeIncrementalDecoder;
- } // end namespace kaldi.
- #endif
|