雾聪 пре 2 година
родитељ
комит
653088af82

+ 1 - 1
funasr/runtime/onnxruntime/bin/funasr-onnx-2pass-rtf.cpp

@@ -202,7 +202,7 @@ int main(int argc, char** argv)
     TCLAP::ValueArg<std::string>    punc_quant("", PUNC_QUANT, "false (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir", false, "true", "string");
     TCLAP::ValueArg<std::string>    punc_quant("", PUNC_QUANT, "false (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir", false, "true", "string");
     TCLAP::ValueArg<std::string>    asr_mode("", ASR_MODE, "offline, online, 2pass", false, "2pass", "string");
     TCLAP::ValueArg<std::string>    asr_mode("", ASR_MODE, "offline, online, 2pass", false, "2pass", "string");
     TCLAP::ValueArg<std::int32_t>   onnx_thread("", "onnx-inter-thread", "onnxruntime SetIntraOpNumThreads", false, 1, "int32_t");
     TCLAP::ValueArg<std::int32_t>   onnx_thread("", "onnx-inter-thread", "onnxruntime SetIntraOpNumThreads", false, 1, "int32_t");
-    TCLAP::ValueArg<std::int32_t>   thread_num_("", THREAD_NUM, "multi-thread num for rtf", true, 0, "int32_t");
+    TCLAP::ValueArg<std::int32_t>   thread_num_("", THREAD_NUM, "multi-thread num for rtf", false, 1, "int32_t");
 
 
     TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
     TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
 
 

+ 18 - 18
funasr/runtime/onnxruntime/src/paraformer-online.cpp

@@ -101,27 +101,27 @@ void ParaformerOnline::ExtractFeats(float sample_rate, vector<std::vector<float>
         waves.insert(waves.begin(), reserve_waveforms_.begin(), reserve_waveforms_.end());
         waves.insert(waves.begin(), reserve_waveforms_.begin(), reserve_waveforms_.end());
         }
         }
         if (lfr_splice_cache_.empty()) {
         if (lfr_splice_cache_.empty()) {
-        for (int i = 0; i < (lfr_m - 1) / 2; i++) {
-            lfr_splice_cache_.emplace_back(wav_feats[0]);
-        }
+            for (int i = 0; i < (lfr_m - 1) / 2; i++) {
+                lfr_splice_cache_.emplace_back(wav_feats[0]);
+            }
         }
         }
         if (wav_feats.size() + lfr_splice_cache_.size() >= lfr_m) {
         if (wav_feats.size() + lfr_splice_cache_.size() >= lfr_m) {
-        wav_feats.insert(wav_feats.begin(), lfr_splice_cache_.begin(), lfr_splice_cache_.end());
-        int frame_from_waves = (waves.size() - frame_sample_length_) / frame_shift_sample_length_ + 1;
-        int minus_frame = reserve_waveforms_.empty() ? (lfr_m - 1) / 2 : 0;
-        int lfr_splice_frame_idxs = OnlineLfrCmvn(wav_feats, input_finished);
-        int reserve_frame_idx = std::abs(lfr_splice_frame_idxs - minus_frame);
-        reserve_waveforms_.clear();
-        reserve_waveforms_.insert(reserve_waveforms_.begin(),
-                                    waves.begin() + reserve_frame_idx * frame_shift_sample_length_,
-                                    waves.begin() + frame_from_waves * frame_shift_sample_length_);
-        int sample_length = (frame_from_waves - 1) * frame_shift_sample_length_ + frame_sample_length_;
-        waves.erase(waves.begin() + sample_length, waves.end());
+            wav_feats.insert(wav_feats.begin(), lfr_splice_cache_.begin(), lfr_splice_cache_.end());
+            int frame_from_waves = (waves.size() - frame_sample_length_) / frame_shift_sample_length_ + 1;
+            int minus_frame = reserve_waveforms_.empty() ? (lfr_m - 1) / 2 : 0;
+            int lfr_splice_frame_idxs = OnlineLfrCmvn(wav_feats, input_finished);
+            int reserve_frame_idx = std::abs(lfr_splice_frame_idxs - minus_frame);
+            reserve_waveforms_.clear();
+            reserve_waveforms_.insert(reserve_waveforms_.begin(),
+                                        waves.begin() + reserve_frame_idx * frame_shift_sample_length_,
+                                        waves.begin() + frame_from_waves * frame_shift_sample_length_);
+            int sample_length = (frame_from_waves - 1) * frame_shift_sample_length_ + frame_sample_length_;
+            waves.erase(waves.begin() + sample_length, waves.end());
         } else {
         } else {
-        reserve_waveforms_.clear();
-        reserve_waveforms_.insert(reserve_waveforms_.begin(),
-                                    waves.begin() + frame_sample_length_ - frame_shift_sample_length_, waves.end());
-        lfr_splice_cache_.insert(lfr_splice_cache_.end(), wav_feats.begin(), wav_feats.end());
+            reserve_waveforms_.clear();
+            reserve_waveforms_.insert(reserve_waveforms_.begin(),
+                                        waves.begin() + frame_sample_length_ - frame_shift_sample_length_, waves.end());
+            lfr_splice_cache_.insert(lfr_splice_cache_.end(), wav_feats.begin(), wav_feats.end());
         }
         }
     } else {
     } else {
         if (input_finished) {
         if (input_finished) {