| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253 |
- // decoder/decodable-matrix.h
- // Copyright 2009-2011 Microsoft Corporation
- // 2013 Johns Hopkins University (author: Daniel Povey)
- // See ../../COPYING for clarification regarding multiple authors
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
- // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
- // MERCHANTABLITY OR NON-INFRINGEMENT.
- // See the Apache 2 License for the specific language governing permissions and
- // limitations under the License.
- #ifndef KALDI_DECODER_DECODABLE_MATRIX_H_
- #define KALDI_DECODER_DECODABLE_MATRIX_H_
- #include <vector>
- #include "base/kaldi-common.h"
- #include "itf/decodable-itf.h"
- #include "itf/transition-information.h"
- #include "matrix/kaldi-matrix.h"
- namespace kaldi {
- class DecodableMatrixScaledMapped: public DecodableInterface {
- public:
- // This constructor creates an object that will not delete "likes" when done.
- DecodableMatrixScaledMapped(const TransitionInformation &tm,
- const Matrix<BaseFloat> &likes,
- BaseFloat scale): trans_model_(tm), likes_(&likes),
- tid_to_pdf_(trans_model_.TransitionIdToPdfArray()),
- scale_(scale), delete_likes_(false) {
- if (likes.NumCols() != tm.NumPdfs())
- KALDI_ERR << "DecodableMatrixScaledMapped: mismatch, matrix has "
- << likes.NumCols() << " cols but transition-model has "
- << tm.NumPdfs() << " pdf-ids.";
- }
- // This constructor creates an object that will delete "likes"
- // when done.
- DecodableMatrixScaledMapped(const TransitionInformation &tm,
- BaseFloat scale,
- const Matrix<BaseFloat> *likes):
- trans_model_(tm), likes_(likes),
- tid_to_pdf_(trans_model_.TransitionIdToPdfArray()),
- scale_(scale), delete_likes_(true) {
- if (likes->NumCols() != tm.NumPdfs())
- KALDI_ERR << "DecodableMatrixScaledMapped: mismatch, matrix has "
- << likes->NumCols() << " cols but transition-model has "
- << tm.NumPdfs() << " pdf-ids.";
- }
- virtual int32 NumFramesReady() const { return likes_->NumRows(); }
- virtual bool IsLastFrame(int32 frame) const {
- KALDI_ASSERT(frame < NumFramesReady());
- return (frame == NumFramesReady() - 1);
- }
- // Note, frames are numbered from zero.
- virtual BaseFloat LogLikelihood(int32 frame, int32 tid) {
- KALDI_PARANOID_ASSERT(tid >= 1 && tid < tid_to_pdf_.size());
- return scale_ * (*likes_)(frame, tid_to_pdf_[tid]);
- }
- // Indices are one-based! This is for compatibility with OpenFst.
- virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }
- virtual ~DecodableMatrixScaledMapped() {
- if (delete_likes_) delete likes_;
- }
- private:
- const TransitionInformation &trans_model_; // for tid to pdf mapping
- const Matrix<BaseFloat> *likes_;
- const std::vector<int32> &tid_to_pdf_;
- BaseFloat scale_;
- bool delete_likes_;
- KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableMatrixScaledMapped);
- };
- /**
- This is like DecodableMatrixScaledMapped, but it doesn't support an acoustic
- scale, and it does support a frame offset, whereby you can state that the
- first row of 'likes' is actually the n'th row of the matrix of available
- log-likelihoods. It's useful if the neural net output comes in chunks for
- different frame ranges.
- Note: DecodableMatrixMappedOffset solves the same problem in a slightly
- different way, where you use the same decodable object. This one, unlike
- DecodableMatrixMappedOffset, is compatible with when the loglikes are in a
- SubMatrix.
- */
- class DecodableMatrixMapped: public DecodableInterface {
- public:
- // This constructor creates an object that will not delete "likes" when done.
- // the frame_offset is the frame the row 0 of 'likes' corresponds to, would be
- // greater than one if this is not the first chunk of likelihoods.
- DecodableMatrixMapped(const TransitionInformation &tm,
- const MatrixBase<BaseFloat> &likes,
- int32 frame_offset = 0);
- // This constructor creates an object that will delete "likes"
- // when done.
- DecodableMatrixMapped(const TransitionInformation &tm,
- const Matrix<BaseFloat> *likes,
- int32 frame_offset = 0);
- virtual int32 NumFramesReady() const;
- virtual bool IsLastFrame(int32 frame) const;
- virtual BaseFloat LogLikelihood(int32 frame, int32 tid);
- // Note: these indices are 1-based.
- virtual int32 NumIndices() const;
- virtual ~DecodableMatrixMapped();
- private:
- const TransitionInformation &trans_model_; // for tid to pdf mapping
- const std::vector<int32>& tid_to_pdf_;
- const MatrixBase<BaseFloat> *likes_;
- const Matrix<BaseFloat> *likes_to_delete_;
- int32 frame_offset_;
- // raw_data_ and stride_ are a kind of fast look-aside for 'likes_', to be
- // used when KALDI_PARANOID is false.
- const BaseFloat *raw_data_;
- int32 stride_;
- KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableMatrixMapped);
- };
- /**
- This decodable class returns log-likes stored in a matrix; it supports
- repeatedly writing to the matrix and setting a time-offset representing the
- frame-index of the first row of the matrix. It's intended for use in
- multi-threaded decoding; mutex and semaphores are not included. External
- code will call SetLoglikes() each time more log-likelihods are available.
- If you try to access a log-likelihood that's no longer available because
- the frame index is less than the current offset, it is of course an error.
- See also DecodableMatrixMapped, which supports the same type of thing but
- with a different interface where you are expected to re-construct the
- object each time you want to decode.
- */
- class DecodableMatrixMappedOffset: public DecodableInterface {
- public:
- DecodableMatrixMappedOffset(const TransitionInformation &tm):
- trans_model_(tm), tid_to_pdf_(trans_model_.TransitionIdToPdfArray()),
- frame_offset_(0), input_is_finished_(false) { }
- // this is not part of the generic Decodable interface.
- int32 FirstAvailableFrame() const { return frame_offset_; }
- // Logically, this function appends 'loglikes' (interpreted as newly available
- // frames) to the log-likelihoods stored in the class.
- //
- // This function is destructive of the input "loglikes" because it may
- // under some circumstances do a shallow copy using Swap(). This function
- // appends loglikes to any existing likelihoods you've previously supplied.
- void AcceptLoglikes(Matrix<BaseFloat> *loglikes,
- int32 frames_to_discard);
- void InputIsFinished() { input_is_finished_ = true; }
- virtual int32 NumFramesReady() const {
- return loglikes_.NumRows() + frame_offset_;
- }
- virtual bool IsLastFrame(int32 frame) const {
- KALDI_ASSERT(frame < NumFramesReady());
- return (frame == NumFramesReady() - 1 && input_is_finished_);
- }
- virtual BaseFloat LogLikelihood(int32 frame, int32 tid) {
- KALDI_PARANOID_ASSERT(tid >= 1 && tid < tid_to_pdf_.size());
- int32 pdf_id = tid_to_pdf_[tid];
- #ifdef KALDI_PARANOID
- return loglikes_(frame - frame_offset_, pdf_id);
- #else
- // This does no checking, so will be faster.
- return raw_data_[frame * stride_ + pdf_id];
- #endif
- }
- virtual int32 NumIndices() const { return trans_model_.NumTransitionIds(); }
- // nothing special to do in destructor.
- virtual ~DecodableMatrixMappedOffset() { }
- private:
- const TransitionInformation &trans_model_; // for tid to pdf mapping
- const std::vector<int32>& tid_to_pdf_;
- Matrix<BaseFloat> loglikes_;
- int32 frame_offset_;
- bool input_is_finished_;
- // 'raw_data_' and 'stride_' are intended as a fast look-aside which is an
- // alternative to accessing data_. raw_data_ is a faked version of
- // data_->Data() as if it started from frame zero rather than frame_offset_.
- // This simplifies the code of LogLikelihood(), in cases where KALDI_PARANOID
- // is not defined.
- BaseFloat *raw_data_;
- int32 stride_;
- KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableMatrixMappedOffset);
- };
- class DecodableMatrixScaled: public DecodableInterface {
- public:
- DecodableMatrixScaled(const Matrix<BaseFloat> &likes,
- BaseFloat scale):
- likes_(likes), scale_(scale) { }
- virtual int32 NumFramesReady() const { return likes_.NumRows(); }
- virtual bool IsLastFrame(int32 frame) const {
- KALDI_ASSERT(frame < NumFramesReady());
- return (frame == NumFramesReady() - 1);
- }
- // Note, frames are numbered from zero.
- virtual BaseFloat LogLikelihood(int32 frame, int32 index) {
- if (index > likes_.NumCols() || index <= 0 ||
- frame < 0 || frame >= likes_.NumRows())
- KALDI_ERR << "Invalid (frame, index - 1) = ("
- << frame << ", " << index - 1 << ") for matrix of size "
- << likes_.NumRows() << " x " << likes_.NumCols();
- return scale_ * likes_(frame, index - 1);
- }
- // Indices are one-based! This is for compatibility with OpenFst.
- virtual int32 NumIndices() const { return likes_.NumCols(); }
- private:
- const Matrix<BaseFloat> &likes_;
- BaseFloat scale_;
- KALDI_DISALLOW_COPY_AND_ASSIGN(DecodableMatrixScaled);
- };
- } // namespace kaldi
- #endif // KALDI_DECODER_DECODABLE_MATRIX_H_
|