2 سال پیش · fcc497dd73
--- a/funasr/runtime/onnxruntime/bin/funasr-onnx-2pass.cpp
+++ b/funasr/runtime/onnxruntime/bin/funasr-onnx-2pass.cpp
@@ -0,0 +1,193 @@
 
				+/**
			
 
				+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
			
 
				+ * MIT License  (https://opensource.org/licenses/MIT)
			
 
				+*/
			
 
				+
			
 
				+#ifndef _WIN32
			
 
				+#include <sys/time.h>
			
 
				+#else
			
 
				+#include <win_func.h>
			
 
				+#endif
			
 
				+
			
 
				+#include <iostream>
			
 
				+#include <fstream>
			
 
				+#include <sstream>
			
 
				+#include <map>
			
 
				+#include <glog/logging.h>
			
 
				+#include "funasrruntime.h"
			
 
				+#include "tclap/CmdLine.h"
			
 
				+#include "com-define.h"
			
 
				+#include "audio.h"
			
 
				+
			
 
				+using namespace std;
			
 
				+
			
 
				+bool is_target_file(const std::string& filename, const std::string target) {
			
 
				+    std::size_t pos = filename.find_last_of(".");
			
 
				+    if (pos == std::string::npos) {
			
 
				+        return false;
			
 
				+    }
			
 
				+    std::string extension = filename.substr(pos + 1);
			
 
				+    return (extension == target);
			
 
				+}
			
 
				+
			
 
				+void GetValue(TCLAP::ValueArg<std::string>& value_arg, string key, std::map<std::string, std::string>& model_path)
			
 
				+{
			
 
				+    if (value_arg.isSet()){
			
 
				+        model_path.insert({key, value_arg.getValue()});
			
 
				+        LOG(INFO)<< key << " : " << value_arg.getValue();
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+int main(int argc, char** argv)
			
 
				+{
			
 
				+    google::InitGoogleLogging(argv[0]);
			
 
				+    FLAGS_logtostderr = true;
			
 
				+
			
 
				+    TCLAP::CmdLine cmd("funasr-onnx-2pass", ' ', "1.0");
			
 
				+    TCLAP::ValueArg<std::string>    offline_model_dir("", OFFLINE_MODEL_DIR, "the asr offline model path, which contains model.onnx, config.yaml, am.mvn", true, "", "string");
			
 
				+    TCLAP::ValueArg<std::string>    online_model_dir("", ONLINE_MODEL_DIR, "the asr online model path, which contains encoder.onnx, decoder.onnx, config.yaml, am.mvn", true, "", "string");
			
 
				+    TCLAP::ValueArg<std::string>    quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string");
			
 
				+    TCLAP::ValueArg<std::string>    vad_dir("", VAD_DIR, "the vad online model path, which contains model.onnx, vad.yaml, vad.mvn", false, "", "string");
			
 
				+    TCLAP::ValueArg<std::string>    vad_quant("", VAD_QUANT, "false (Default), load the model of model.onnx in vad_dir. If set true, load the model of model_quant.onnx in vad_dir", false, "false", "string");
			
 
				+    TCLAP::ValueArg<std::string>    punc_dir("", PUNC_DIR, "the punc online model path, which contains model.onnx, punc.yaml", false, "", "string");
			
 
				+    TCLAP::ValueArg<std::string>    punc_quant("", PUNC_QUANT, "false (Default), load the model of model.onnx in punc_dir. If set true, load the model of model_quant.onnx in punc_dir", false, "false", "string");
			
 
				+
			
 
				+    TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
			
 
				+
			
 
				+    cmd.add(offline_model_dir);
			
 
				+    cmd.add(online_model_dir);
			
 
				+    cmd.add(quantize);
			
 
				+    cmd.add(vad_dir);
			
 
				+    cmd.add(vad_quant);
			
 
				+    cmd.add(punc_dir);
			
 
				+    cmd.add(punc_quant);
			
 
				+    cmd.add(wav_path);
			
 
				+    cmd.parse(argc, argv);
			
 
				+
			
 
				+    std::map<std::string, std::string> model_path;
			
 
				+    GetValue(offline_model_dir, OFFLINE_MODEL_DIR, model_path);
			
 
				+    GetValue(online_model_dir, ONLINE_MODEL_DIR, model_path);
			
 
				+    GetValue(quantize, QUANTIZE, model_path);
			
 
				+    GetValue(vad_dir, VAD_DIR, model_path);
			
 
				+    GetValue(vad_quant, VAD_QUANT, model_path);
			
 
				+    GetValue(punc_dir, PUNC_DIR, model_path);
			
 
				+    GetValue(punc_quant, PUNC_QUANT, model_path);
			
 
				+    GetValue(wav_path, WAV_PATH, model_path);
			
 
				+
			
 
				+    struct timeval start, end;
			
 
				+    gettimeofday(&start, NULL);
			
 
				+    int thread_num = 1;
			
 
				+    FUNASR_HANDLE tpass_hanlde=FunTpassInit(model_path, thread_num);
			
 
				+
			
 
				+    if (!tpass_hanlde)
			
 
				+    {
			
 
				+        LOG(ERROR) << "FunTpassInit init failed";
			
 
				+        exit(-1);
			
 
				+    }
			
 
				+
			
 
				+    gettimeofday(&end, NULL);
			
 
				+    long seconds = (end.tv_sec - start.tv_sec);
			
 
				+    long modle_init_micros = ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
			
 
				+    LOG(INFO) << "Model initialization takes " << (double)modle_init_micros / 1000000 << " s";
			
 
				+
			
 
				+    // read wav_path
			
 
				+    vector<string> wav_list;
			
 
				+    vector<string> wav_ids;
			
 
				+    string default_id = "wav_default_id";
			
 
				+    string wav_path_ = model_path.at(WAV_PATH);
			
 
				+
			
 
				+    if(is_target_file(wav_path_, "scp")){
			
 
				+        ifstream in(wav_path_);
			
 
				+        if (!in.is_open()) {
			
 
				+            LOG(ERROR) << "Failed to open file: " << model_path.at(WAV_SCP) ;
			
 
				+            return 0;
			
 
				+        }
			
 
				+        string line;
			
 
				+        while(getline(in, line))
			
 
				+        {
			
 
				+            istringstream iss(line);
			
 
				+            string column1, column2;
			
 
				+            iss >> column1 >> column2;
			
 
				+            wav_list.emplace_back(column2);
			
 
				+            wav_ids.emplace_back(column1);
			
 
				+        }
			
 
				+        in.close();
			
 
				+    }else{
			
 
				+        wav_list.emplace_back(wav_path_);
			
 
				+        wav_ids.emplace_back(default_id);
			
 
				+    }
			
 
				+
			
 
				+    // init online features
			
 
				+    FunTpassOnlineInit(tpass_hanlde);
			
 
				+    float snippet_time = 0.0f;
			
 
				+    long taking_micros = 0;
			
 
				+    for (int i = 0; i < wav_list.size(); i++) {
			
 
				+        auto& wav_file = wav_list[i];
			
 
				+        auto& wav_id = wav_ids[i];
			
 
				+
			
 
				+        int32_t sampling_rate_ = -1;
			
 
				+        funasr::Audio audio(1);
			
 
				+		if(is_target_file(wav_file.c_str(), "wav")){
			
 
				+			if(!audio.LoadWav2Char(wav_file.c_str(), &sampling_rate_)){
			
 
				+				LOG(ERROR)<<"Failed to load "<< wav_file;
			
 
				+                exit(-1);
			
 
				+            }
			
 
				+		}else if(is_target_file(wav_file.c_str(), "pcm")){
			
 
				+			if (!audio.LoadPcmwav2Char(wav_file.c_str(), &sampling_rate_)){
			
 
				+				LOG(ERROR)<<"Failed to load "<< wav_file;
			
 
				+                exit(-1);
			
 
				+            }
			
 
				+		}else{
			
 
				+			if (!audio.FfmpegLoad(wav_file.c_str(), true)){
			
 
				+				LOG(ERROR)<<"Failed to load "<< wav_file;
			
 
				+                exit(-1);
			
 
				+            }
			
 
				+		}
			
 
				+        char* speech_buff = audio.GetSpeechChar();
			
 
				+        int buff_len = audio.GetSpeechLen()*2;
			
 
				+
			
 
				+        int step = 1600*2;
			
 
				+        bool is_final = false;
			
 
				+
			
 
				+        string online_res="";
			
 
				+        string tpass_res="";
			
 
				+        for (int sample_offset = 0; sample_offset < buff_len; sample_offset += std::min(step, buff_len - sample_offset)) {
			
 
				+            if (sample_offset + step >= buff_len - 1) {
			
 
				+                    step = buff_len - sample_offset;
			
 
				+                    is_final = true;
			
 
				+                } else {
			
 
				+                    is_final = false;
			
 
				+            }
			
 
				+            gettimeofday(&start, NULL);
			
 
				+            FUNASR_RESULT result = FunTpassInferBuffer(tpass_hanlde, speech_buff+sample_offset, step, RASR_NONE, NULL, is_final, 16000);
			
 
				+            gettimeofday(&end, NULL);
			
 
				+            seconds = (end.tv_sec - start.tv_sec);
			
 
				+            taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
			
 
				+
			
 
				+            if (result)
			
 
				+            {
			
 
				+                string online_msg = FunASRGetResult(result, 0);
			
 
				+                online_res += online_msg;
			
 
				+                if(online_msg != ""){
			
 
				+                    LOG(INFO)<< wav_id <<" : "<<online_msg;
			
 
				+                }
			
 
				+                string tpass_msg = FunASRGetTpassResult(result, 0);
			
 
				+                tpass_res += tpass_msg;
			
 
				+                if(tpass_msg != ""){
			
 
				+                    LOG(INFO)<< wav_id <<" 2pass results : "<<tpass_msg;
			
 
				+                }
			
 
				+                snippet_time += FunASRGetRetSnippetTime(result);
			
 
				+                FunASRFreeResult(result);
			
 
				+            }
			
 
				+        }
			
 
				+        LOG(INFO)<<"Final online results " << wav_id <<" : "<<online_res;
			
 
				+        LOG(INFO)<<"Final 2pass  results " << wav_id <<" : "<<tpass_res;
			
 
				+    }
			
 
				+ 
			
 
				+    LOG(INFO) << "Audio length: " << (double)snippet_time << " s";
			
 
				+    LOG(INFO) << "Model inference takes: " << (double)taking_micros / 1000000 <<" s";
			
 
				+    LOG(INFO) << "Model inference RTF: " << (double)taking_micros/ (snippet_time*1000000);
			
 
				+    FunTpassUninit(tpass_hanlde);
			
 
				+    return 0;
			
 
				+}
			
 
				+
			
--- a/funasr/runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp
+++ b/funasr/runtime/onnxruntime/bin/funasr-onnx-online-asr.cpp
@@ -63,7 +63,7 @@ int main(int argc, char *argv[])
 
				     struct timeval start, end;
			
 
				     gettimeofday(&start, NULL);
			
 
				     int thread_num = 1;
			
 
				-    FUNASR_HANDLE asr_handle=FunASRInit(model_path, thread_num, 1);
			
 
				+    FUNASR_HANDLE asr_handle=FunASRInit(model_path, thread_num, ASR_ONLINE);
			
 
				 
			
 
				     if (!asr_handle)
			
 
				     {
			
--- a/funasr/runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp
+++ b/funasr/runtime/onnxruntime/bin/funasr-onnx-online-rtf.cpp
@@ -209,8 +209,7 @@ int main(int argc, char *argv[])
 
				 
			
 
				     struct timeval start, end;
			
 
				     gettimeofday(&start, NULL);
			
 
				-    int online_mode = 1;
			
 
				-    FUNASR_HANDLE asr_handle=FunASRInit(model_path, 1, online_mode);
			
 
				+    FUNASR_HANDLE asr_handle=FunASRInit(model_path, 1, ASR_ONLINE);
			
 
				 
			
 
				     if (!asr_handle)
			
 
				     {
			
--- a/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
+++ b/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
@@ -159,7 +159,7 @@ int main(int argc, char *argv[])
 
				         char* speech_buff = audio.GetSpeechChar();
			
 
				         int buff_len = audio.GetSpeechLen()*2;
			
 
				 
			
 
				-        int step = 3200;
			
 
				+        int step = 1600*2;
			
 
				         bool is_final = false;
			
 
				 
			
 
				         for (int sample_offset = 0; sample_offset < buff_len; sample_offset += std::min(step, buff_len - sample_offset)) {
			
--- a/funasr/runtime/onnxruntime/include/audio.h
+++ b/funasr/runtime/onnxruntime/include/audio.h
@@ -5,6 +5,7 @@
 
				 #include <stdint.h>
			
 
				 #include "vad-model.h"
			
 
				 #include "offline-stream.h"
			
 
				+#include "com-define.h"
			
 
				 
			
 
				 #ifndef WAV_HEADER_SIZE
			
 
				 #define WAV_HEADER_SIZE 44
			
@@ -17,11 +18,13 @@ class AudioFrame {
 
				   private:
			
 
				     int start;
			
 
				     int end;
			
 
				-    int len;
			
 
				+
			
 
				 
			
 
				   public:
			
 
				     AudioFrame();
			
 
				     AudioFrame(int len);
			
 
				+    AudioFrame(const AudioFrame &other);
			
 
				+    AudioFrame(int start, int end, bool is_final);
			
 
				 
			
 
				     ~AudioFrame();
			
 
				     int SetStart(int val);
			
@@ -29,6 +32,10 @@ class AudioFrame {
 
				     int GetStart();
			
 
				     int GetLen();
			
 
				     int Disp();
			
 
				+    // 2pass
			
 
				+    bool is_final = false;
			
 
				+    float* data = nullptr;
			
 
				+    int len;
			
 
				 };
			
 
				 
			
 
				 class Audio {
			
@@ -38,10 +45,11 @@ class Audio {
 
				     char* speech_char=nullptr;
			
 
				     int speech_len;
			
 
				     int speech_align_len;
			
 
				-    int offset;
			
 
				     float align_size;
			
 
				     int data_type;
			
 
				     queue<AudioFrame *> frame_queue;
			
 
				+    queue<AudioFrame *> asr_online_queue;
			
 
				+    queue<AudioFrame *> asr_offline_queue;
			
 
				 
			
 
				   public:
			
 
				     Audio(int data_type);
			
@@ -58,15 +66,33 @@ class Audio {
 
				     bool LoadOthers2Char(const char* filename);
			
 
				     bool FfmpegLoad(const char *filename, bool copy2char=false);
			
 
				     bool FfmpegLoad(const char* buf, int n_file_len);
			
 
				-    int FetchChunck(float *&dout, int len);
			
 
				+    int FetchChunck(AudioFrame *&frame);
			
 
				+    int FetchTpass(AudioFrame *&frame);
			
 
				     int Fetch(float *&dout, int &len, int &flag);
			
 
				     void Padding();
			
 
				     void Split(OfflineStream* offline_streamj);
			
 
				     void Split(VadModel* vad_obj, vector<std::vector<int>>& vad_segments, bool input_finished=true);
			
 
				+    void Split(VadModel* vad_obj, bool input_finished=true);
			
 
				     float GetTimeLen();
			
 
				     int GetQueueSize() { return (int)frame_queue.size(); }
			
 
				     char* GetSpeechChar(){return speech_char;}
			
 
				     int GetSpeechLen(){return speech_len;}
			
 
				+
			
 
				+    // 2pass
			
 
				+    vector<float> all_samples;
			
 
				+    int offset = 0;
			
 
				+    int speech_start=-1, speech_end=0;
			
 
				+    int speech_offline_start=-1;
			
 
				+
			
 
				+    int seg_sample = MODEL_SAMPLE_RATE/1000;
			
 
				+    bool LoadPcmwavOnline(const char* buf, int n_file_len, int32_t* sampling_rate);
			
 
				+    void ResetIndex(){
			
 
				+      speech_start=-1;
			
 
				+      speech_end=0;
			
 
				+      speech_offline_start=-1;
			
 
				+      offset = 0;
			
 
				+      all_samples.clear();
			
 
				+    }
			
 
				 };
			
 
				 
			
 
				 } // namespace funasr
			
--- a/funasr/runtime/onnxruntime/include/com-define.h
+++ b/funasr/runtime/onnxruntime/include/com-define.h
@@ -13,6 +13,8 @@ namespace funasr {
 
				 
			
 
				 // parser option
			
 
				 #define MODEL_DIR "model-dir"
			
 
				+#define OFFLINE_MODEL_DIR "offline-model-dir"
			
 
				+#define ONLINE_MODEL_DIR "online-model-dir"
			
 
				 #define VAD_DIR "vad-dir"
			
 
				 #define PUNC_DIR "punc-dir"
			
 
				 #define QUANTIZE "quantize"
			
@@ -77,6 +79,10 @@ namespace funasr {
 
				 #define PARA_LFR_N 6
			
 
				 #endif
			
 
				 
			
 
				+#ifndef ONLINE_STEP
			
 
				+#define ONLINE_STEP 9600
			
 
				+#endif
			
 
				+
			
 
				 // punc
			
 
				 #define UNK_CHAR "<unk>"
			
 
				 #define TOKEN_LEN     20
			
--- a/funasr/runtime/onnxruntime/include/funasrruntime.h
+++ b/funasr/runtime/onnxruntime/include/funasrruntime.h
@@ -46,6 +46,12 @@ typedef enum {
 
				 	FUNASR_MODEL_PARAFORMER = 3,
			
 
				 }FUNASR_MODEL_TYPE;
			
 
				 
			
 
				+typedef enum {
			
 
				+	ASR_OFFLINE=0,
			
 
				+	ASR_ONLINE=1,
			
 
				+	ASR_TWO_PASS=2,
			
 
				+}ASR_TYPE;
			
 
				+
			
 
				 typedef enum {
			
 
				 	PUNC_OFFLINE=0,
			
 
				 	PUNC_ONLINE=1,
			
@@ -54,7 +60,7 @@ typedef enum {
 
				 typedef void (* QM_CALLBACK)(int cur_step, int n_total); // n_total: total steps; cur_step: Current Step.
			
 
				 
			
 
				 // ASR
			
 
				-_FUNASRAPI FUNASR_HANDLE  	FunASRInit(std::map<std::string, std::string>& model_path, int thread_num, int mode=0);
			
 
				+_FUNASRAPI FUNASR_HANDLE  	FunASRInit(std::map<std::string, std::string>& model_path, int thread_num, ASR_TYPE type=ASR_OFFLINE);
			
 
				 _FUNASRAPI FUNASR_HANDLE  	FunASROnlineInit(FUNASR_HANDLE asr_handle);
			
 
				 // buffer
			
 
				 _FUNASRAPI FUNASR_RESULT	FunASRInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool input_finished=true, int sampling_rate=16000, std::string wav_format="pcm");
			
@@ -62,6 +68,7 @@ _FUNASRAPI FUNASR_RESULT	FunASRInferBuffer(FUNASR_HANDLE handle, const char* sz_
 
				 _FUNASRAPI FUNASR_RESULT	FunASRInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000);
			
 
				 
			
 
				 _FUNASRAPI const char*	FunASRGetResult(FUNASR_RESULT result,int n_index);
			
 
				+_FUNASRAPI const char*	FunASRGetTpassResult(FUNASR_RESULT result,int n_index);
			
 
				 _FUNASRAPI const int	FunASRGetRetNumber(FUNASR_RESULT result);
			
 
				 _FUNASRAPI void			FunASRFreeResult(FUNASR_RESULT result);
			
 
				 _FUNASRAPI void			FunASRUninit(FUNASR_HANDLE handle);
			
@@ -95,6 +102,13 @@ _FUNASRAPI FUNASR_RESULT	FunOfflineInferBuffer(FUNASR_HANDLE handle, const char*
 
				 _FUNASRAPI FUNASR_RESULT	FunOfflineInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate=16000);
			
 
				 _FUNASRAPI void				FunOfflineUninit(FUNASR_HANDLE handle);
			
 
				 
			
 
				+//2passStream
			
 
				+_FUNASRAPI FUNASR_HANDLE  	FunTpassInit(std::map<std::string, std::string>& model_path, int thread_num);
			
 
				+_FUNASRAPI void  	        FunTpassOnlineInit(FUNASR_HANDLE tpass_handle);
			
 
				+// buffer
			
 
				+_FUNASRAPI FUNASR_RESULT	FunTpassInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool input_finished=true, int sampling_rate=16000, std::string wav_format="pcm");
			
 
				+_FUNASRAPI void				FunTpassUninit(FUNASR_HANDLE handle);
			
 
				+
			
 
				 #ifdef __cplusplus 
			
 
				 
			
 
				 }
			
--- a/funasr/runtime/onnxruntime/include/model.h
+++ b/funasr/runtime/onnxruntime/include/model.h
@@ -4,6 +4,7 @@
 
				 
			
 
				 #include <string>
			
 
				 #include <map>
			
 
				+#include "funasrruntime.h"
			
 
				 namespace funasr {
			
 
				 class Model {
			
 
				   public:
			
@@ -11,11 +12,12 @@ class Model {
 
				     virtual void Reset() = 0;
			
 
				     virtual void InitAsr(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){};
			
 
				     virtual void InitAsr(const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){};
			
 
				+    virtual void InitAsr(const std::string &am_model, const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){};
			
 
				     virtual std::string Forward(float *din, int len, bool input_finished){return "";};
			
 
				     virtual std::string Rescoring() = 0;
			
 
				 };
			
 
				 
			
 
				-Model *CreateModel(std::map<std::string, std::string>& model_path, int thread_num=1, int mode=0);
			
 
				+Model *CreateModel(std::map<std::string, std::string>& model_path, int thread_num=1, ASR_TYPE type=ASR_OFFLINE);
			
 
				 Model *CreateModel(void* asr_handle);
			
 
				 
			
 
				 } // namespace funasr
			
--- a/funasr/runtime/onnxruntime/include/offline-stream.h
+++ b/funasr/runtime/onnxruntime/include/offline-stream.h
@@ -14,9 +14,9 @@ class OfflineStream {
 
				     OfflineStream(std::map<std::string, std::string>& model_path, int thread_num);
			
 
				     ~OfflineStream(){};
			
 
				 
			
 
				-    std::unique_ptr<VadModel> vad_handle;
			
 
				-    std::unique_ptr<Model> asr_handle;
			
 
				-    std::unique_ptr<PuncModel> punc_handle;
			
 
				+    std::unique_ptr<VadModel> vad_handle= nullptr;
			
 
				+    std::unique_ptr<Model> asr_handle= nullptr;
			
 
				+    std::unique_ptr<PuncModel> punc_handle= nullptr;
			
 
				     bool UseVad(){return use_vad;};
			
 
				     bool UsePunc(){return use_punc;}; 
			
 
				     
			
--- a/funasr/runtime/onnxruntime/include/tpass-stream.h
+++ b/funasr/runtime/onnxruntime/include/tpass-stream.h
@@ -0,0 +1,34 @@
 
				+#ifndef TPASS_STREAM_H
			
 
				+#define TPASS_STREAM_H
			
 
				+
			
 
				+#include <memory>
			
 
				+#include <string>
			
 
				+#include <map>
			
 
				+#include "model.h"
			
 
				+#include "punc-model.h"
			
 
				+#include "vad-model.h"
			
 
				+
			
 
				+namespace funasr {
			
 
				+class TpassStream {
			
 
				+  public:
			
 
				+    TpassStream(std::map<std::string, std::string>& model_path, int thread_num);
			
 
				+    ~TpassStream(){};
			
 
				+
			
 
				+    // std::unique_ptr<VadModel> vad_handle = nullptr;
			
 
				+    std::unique_ptr<VadModel> vad_handle = nullptr;
			
 
				+    std::unique_ptr<VadModel> vad_online_handle = nullptr;
			
 
				+    std::unique_ptr<Model> asr_handle = nullptr;
			
 
				+    std::unique_ptr<Model> asr_online_handle = nullptr;
			
 
				+    std::unique_ptr<PuncModel> punc_online_handle = nullptr;
			
 
				+    bool UseVad(){return use_vad;};
			
 
				+    bool UsePunc(){return use_punc;}; 
			
 
				+    
			
 
				+  private:
			
 
				+    bool use_vad=false;
			
 
				+    bool use_punc=false;
			
 
				+};
			
 
				+
			
 
				+TpassStream *CreateTpassStream(std::map<std::string, std::string>& model_path, int thread_num=1);
			
 
				+void CreateTpassOnlineStream(void* tpass_stream);
			
 
				+} // namespace funasr
			
 
				+#endif
			
--- a/funasr/runtime/onnxruntime/src/audio.cpp
+++ b/funasr/runtime/onnxruntime/src/audio.cpp
@@ -132,40 +132,54 @@ class AudioWindow {
 
				     };
			
 
				 };
			
 
				 
			
 
				-AudioFrame::AudioFrame(){};
			
 
				+AudioFrame::AudioFrame(){}
			
 
				 AudioFrame::AudioFrame(int len) : len(len)
			
 
				 {
			
 
				     start = 0;
			
 
				-};
			
 
				-AudioFrame::~AudioFrame(){};
			
 
				+}
			
 
				+AudioFrame::AudioFrame(const AudioFrame &other)
			
 
				+{
			
 
				+    start = other.start;
			
 
				+    end = other.end;
			
 
				+    len = other.len;
			
 
				+    is_final = other.is_final;
			
 
				+}
			
 
				+AudioFrame::AudioFrame(int start, int end, bool is_final):start(start),end(end),is_final(is_final){
			
 
				+    len = end - start;
			
 
				+}
			
 
				+AudioFrame::~AudioFrame(){
			
 
				+    if(data != NULL){
			
 
				+        free(data);
			
 
				+    }
			
 
				+}
			
 
				 int AudioFrame::SetStart(int val)
			
 
				 {
			
 
				     start = val < 0 ? 0 : val;
			
 
				     return start;
			
 
				-};
			
 
				+}
			
 
				 
			
 
				 int AudioFrame::SetEnd(int val)
			
 
				 {
			
 
				     end = val;
			
 
				     len = end - start;
			
 
				     return end;
			
 
				-};
			
 
				+}
			
 
				 
			
 
				 int AudioFrame::GetStart()
			
 
				 {
			
 
				     return start;
			
 
				-};
			
 
				+}
			
 
				 
			
 
				 int AudioFrame::GetLen()
			
 
				 {
			
 
				     return len;
			
 
				-};
			
 
				+}
			
 
				 
			
 
				 int AudioFrame::Disp()
			
 
				 {
			
 
				     LOG(ERROR) << "Not imp!!!!";
			
 
				     return 0;
			
 
				-};
			
 
				+}
			
 
				 
			
 
				 Audio::Audio(int data_type) : data_type(data_type)
			
 
				 {
			
@@ -771,6 +785,55 @@ bool Audio::LoadPcmwav(const char* buf, int n_buf_len, int32_t* sampling_rate)
 
				         return false;
			
 
				 }
			
 
				 
			
 
				+bool Audio::LoadPcmwavOnline(const char* buf, int n_buf_len, int32_t* sampling_rate)
			
 
				+{
			
 
				+    if (speech_data != NULL) {
			
 
				+        free(speech_data);
			
 
				+    }
			
 
				+    if (speech_buff != NULL) {
			
 
				+        free(speech_buff);
			
 
				+    }
			
 
				+    if (speech_char != NULL) {
			
 
				+        free(speech_char);
			
 
				+    }
			
 
				+
			
 
				+    speech_len = n_buf_len / 2;
			
 
				+    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
			
 
				+    if (speech_buff)
			
 
				+    {
			
 
				+        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
			
 
				+        memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
			
 
				+
			
 
				+        speech_data = (float*)malloc(sizeof(float) * speech_len);
			
 
				+        memset(speech_data, 0, sizeof(float) * speech_len);
			
 
				+
			
 
				+        float scale = 1;
			
 
				+        if (data_type == 1) {
			
 
				+            scale = 32768;
			
 
				+        }
			
 
				+
			
 
				+        for (int32_t i = 0; i != speech_len; ++i) {
			
 
				+            speech_data[i] = (float)speech_buff[i] / scale;
			
 
				+        }
			
 
				+        
			
 
				+        //resample
			
 
				+        if(*sampling_rate != MODEL_SAMPLE_RATE){
			
 
				+            WavResample(*sampling_rate, speech_data, speech_len);
			
 
				+        }
			
 
				+
			
 
				+        for (int32_t i = 0; i != speech_len; ++i) {
			
 
				+            all_samples.emplace_back(speech_data[i]);
			
 
				+        }
			
 
				+
			
 
				+        AudioFrame* frame = new AudioFrame(speech_len);
			
 
				+        frame_queue.push(frame);
			
 
				+        return true;
			
 
				+
			
 
				+    }
			
 
				+    else
			
 
				+        return false;
			
 
				+}
			
 
				+
			
 
				 bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate)
			
 
				 {
			
 
				     if (speech_data != NULL) {
			
@@ -879,24 +942,25 @@ bool Audio::LoadOthers2Char(const char* filename)
 
				     return true;
			
 
				 }
			
 
				 
			
 
				-int Audio::FetchChunck(float *&dout, int len)
			
 
				+int Audio::FetchTpass(AudioFrame *&frame)
			
 
				 {
			
 
				-    if (offset >= speech_align_len) {
			
 
				-        dout = NULL;
			
 
				-        return S_ERR;
			
 
				-    } else if (offset == speech_align_len - len) {
			
 
				-        dout = speech_data + offset;
			
 
				-        offset = speech_align_len;
			
 
				-        // 临时解决 
			
 
				-        AudioFrame *frame = frame_queue.front();
			
 
				-        frame_queue.pop();
			
 
				-        delete frame;
			
 
				+    if (asr_offline_queue.size() > 0) {
			
 
				+        frame = asr_offline_queue.front();
			
 
				+        asr_offline_queue.pop();
			
 
				+        return 1;
			
 
				+    } else {
			
 
				+        return 0;
			
 
				+    }
			
 
				+}
			
 
				 
			
 
				-        return S_END;
			
 
				+int Audio::FetchChunck(AudioFrame *&frame)
			
 
				+{
			
 
				+    if (asr_online_queue.size() > 0) {
			
 
				+        frame = asr_online_queue.front();
			
 
				+        asr_online_queue.pop();
			
 
				+        return 1;
			
 
				     } else {
			
 
				-        dout = speech_data + offset;
			
 
				-        offset += len;
			
 
				-        return S_MIDDLE;
			
 
				+        return 0;
			
 
				     }
			
 
				 }
			
 
				 
			
@@ -965,7 +1029,6 @@ void Audio::Split(OfflineStream* offline_stream)
 
				 
			
 
				     std::vector<float> pcm_data(speech_data, speech_data+sp_len);
			
 
				     vector<std::vector<int>> vad_segments = (offline_stream->vad_handle)->Infer(pcm_data);
			
 
				-    int seg_sample = MODEL_SAMPLE_RATE/1000;
			
 
				     for(vector<int> segment:vad_segments)
			
 
				     {
			
 
				         frame = new AudioFrame();
			
@@ -978,7 +1041,6 @@ void Audio::Split(OfflineStream* offline_stream)
 
				     }
			
 
				 }
			
 
				 
			
 
				-
			
 
				 void Audio::Split(VadModel* vad_obj, vector<std::vector<int>>& vad_segments, bool input_finished)
			
 
				 {
			
 
				     AudioFrame *frame;
			
@@ -993,4 +1055,171 @@ void Audio::Split(VadModel* vad_obj, vector<std::vector<int>>& vad_segments, boo
 
				     vad_segments = vad_obj->Infer(pcm_data, input_finished);
			
 
				 }
			
 
				 
			
 
				+// 2pass
			
 
				+void Audio::Split(VadModel* vad_obj, bool input_finished)
			
 
				+{
			
 
				+    AudioFrame *frame;
			
 
				+
			
 
				+    frame = frame_queue.front();
			
 
				+    frame_queue.pop();
			
 
				+    int sp_len = frame->GetLen();
			
 
				+    delete frame;
			
 
				+    frame = NULL;
			
 
				+
			
 
				+    std::vector<float> pcm_data(speech_data, speech_data+sp_len);
			
 
				+    vector<std::vector<int>> vad_segments = vad_obj->Infer(pcm_data, input_finished);
			
 
				+
			
 
				+    // print vad_segments
			
 
				+    // string seg_out="[";
			
 
				+    // for (int i = 0; i < vad_segments.size(); i++) {
			
 
				+    //     vector<int> inner_vec = vad_segments[i];
			
 
				+    //     if(inner_vec.size() == 0){
			
 
				+    //         continue;
			
 
				+    //     }
			
 
				+    //     seg_out += "[";
			
 
				+    //     for (int j = 0; j < inner_vec.size(); j++) {
			
 
				+    //         seg_out += to_string(inner_vec[j]);
			
 
				+    //         if (j != inner_vec.size() - 1) {
			
 
				+    //             seg_out += ",";
			
 
				+    //         }
			
 
				+    //     }
			
 
				+    //     seg_out += "]";
			
 
				+    //     if (i != vad_segments.size() - 1) {
			
 
				+    //         seg_out += ",";
			
 
				+    //     }
			
 
				+    // }
			
 
				+    // seg_out += "]";
			
 
				+    // LOG(INFO)<<seg_out;
			
 
				+
			
 
				+    speech_end += sp_len/seg_sample;
			
 
				+    if(vad_segments.size() == 0){
			
 
				+        if(speech_start != -1){
			
 
				+            int start = speech_start*seg_sample;
			
 
				+            int end = speech_end*seg_sample;
			
 
				+            int buff_len = end-start;
			
 
				+            int step = ONLINE_STEP;
			
 
				+
			
 
				+            if(buff_len >= step){
			
 
				+                frame = new AudioFrame(step);
			
 
				+                frame->data = (float*)malloc(sizeof(float) * step);
			
 
				+                memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float));
			
 
				+                asr_online_queue.push(frame);
			
 
				+                frame = NULL;
			
 
				+                speech_start += step/seg_sample;
			
 
				+            }
			
 
				+        }
			
 
				+    }else{
			
 
				+        for(auto vad_segment: vad_segments){
			
 
				+            int speech_start_i=-1, speech_end_i=-1;
			
 
				+            if(vad_segment[0] != -1){
			
 
				+                speech_start_i = vad_segment[0];
			
 
				+            }
			
 
				+            if(vad_segment[1] != -1){
			
 
				+                speech_end_i = vad_segment[1];
			
 
				+            }
			
 
				+
			
 
				+            // [1, 100]
			
 
				+            if(speech_start_i != -1 && speech_end_i != -1){
			
 
				+                int start = speech_start_i*seg_sample;
			
 
				+                int end = speech_end_i*seg_sample;
			
 
				+
			
 
				+                frame = new AudioFrame(end-start);
			
 
				+                frame->is_final = true;
			
 
				+                frame->data = (float*)malloc(sizeof(float) * (end-start));
			
 
				+                memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float));
			
 
				+                asr_online_queue.push(frame);
			
 
				+                frame = NULL;
			
 
				+
			
 
				+                frame = new AudioFrame(end-start);
			
 
				+                frame->is_final = true;
			
 
				+                frame->data = (float*)malloc(sizeof(float) * (end-start));
			
 
				+                memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float));
			
 
				+                asr_offline_queue.push(frame);
			
 
				+                frame = NULL;
			
 
				+
			
 
				+                speech_start = -1;
			
 
				+                speech_offline_start = -1;
			
 
				+            // [70, -1]
			
 
				+            }else if(speech_start_i != -1){
			
 
				+                speech_start = speech_start_i;
			
 
				+                speech_offline_start = speech_start_i;
			
 
				+                
			
 
				+                int start = speech_start*seg_sample;
			
 
				+                int end = speech_end*seg_sample;
			
 
				+                int buff_len = end-start;
			
 
				+                int step = ONLINE_STEP;
			
 
				+
			
 
				+                if(buff_len >= step){
			
 
				+                    frame = new AudioFrame(step);
			
 
				+                    frame->data = (float*)malloc(sizeof(float) * step);
			
 
				+                    memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float));
			
 
				+                    asr_online_queue.push(frame);
			
 
				+                    frame = NULL;
			
 
				+                    speech_start += step/seg_sample;
			
 
				+                }
			
 
				+
			
 
				+            }else if(speech_end_i != -1){ // [-1,100]
			
 
				+                if(speech_start == -1 or speech_offline_start == -1){
			
 
				+                    LOG(ERROR) <<"Vad start is null while vad end is available." ;
			
 
				+                    exit(-1);
			
 
				+                }
			
 
				+
			
 
				+                int start = speech_start*seg_sample;
			
 
				+                int offline_start = speech_offline_start*seg_sample;
			
 
				+                int end = speech_end_i*seg_sample;
			
 
				+                int buff_len = end-start;
			
 
				+                int step = ONLINE_STEP;
			
 
				+
			
 
				+                frame = new AudioFrame(end-offline_start);
			
 
				+                frame->is_final = true;
			
 
				+                frame->data = (float*)malloc(sizeof(float) * (end-offline_start));
			
 
				+                memcpy(frame->data, all_samples.data()+offline_start-offset, (end-offline_start)*sizeof(float));
			
 
				+                asr_offline_queue.push(frame);
			
 
				+                frame = NULL;
			
 
				+
			
 
				+                if(buff_len > 0){
			
 
				+                    for (int sample_offset = 0; sample_offset < buff_len; sample_offset += std::min(step, buff_len - sample_offset)) {
			
 
				+                        bool is_final = false;
			
 
				+                        if (sample_offset + step >= buff_len - 1) {
			
 
				+                            step = buff_len - sample_offset;
			
 
				+                            is_final = true;
			
 
				+                        }
			
 
				+                        frame = new AudioFrame(step);
			
 
				+                        frame->is_final = is_final;
			
 
				+                        frame->data = (float*)malloc(sizeof(float) * step);
			
 
				+                        memcpy(frame->data, all_samples.data()+start-offset+sample_offset, step*sizeof(float));
			
 
				+                        asr_online_queue.push(frame);
			
 
				+                        frame = NULL;
			
 
				+                    }
			
 
				+                }else{
			
 
				+                    frame = new AudioFrame(0);
			
 
				+                    frame->is_final = true;
			
 
				+                    asr_online_queue.push(frame);
			
 
				+                    frame = NULL;
			
 
				+                }
			
 
				+                speech_start = -1;
			
 
				+                speech_offline_start = -1;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // erase all_samples
			
 
				+    int vector_cache = MODEL_SAMPLE_RATE*2;
			
 
				+    if(speech_offline_start == -1){
			
 
				+        if(all_samples.size() > vector_cache){
			
 
				+            int erase_num = all_samples.size() - vector_cache;
			
 
				+            all_samples.erase(all_samples.begin(), all_samples.begin()+erase_num);
			
 
				+            offset += erase_num;
			
 
				+        }
			
 
				+    }else{
			
 
				+        int offline_start = speech_offline_start*seg_sample;
			
 
				+         if(offline_start-offset > vector_cache){
			
 
				+            int erase_num = offline_start-offset - vector_cache;
			
 
				+            all_samples.erase(all_samples.begin(), all_samples.begin()+erase_num);
			
 
				+            offset += erase_num;
			
 
				+        }       
			
 
				+    }
			
 
				+    
			
 
				+}
			
 
				+
			
 
				 } // namespace funasr
			
--- a/funasr/runtime/onnxruntime/src/commonfunc.h
+++ b/funasr/runtime/onnxruntime/src/commonfunc.h
@@ -5,7 +5,8 @@ namespace funasr {
 
				 typedef struct
			
 
				 {
			
 
				     std::string msg;
			
 
				-    float  snippet_time;
			
 
				+    std::string tpass_msg;
			
 
				+    float snippet_time;
			
 
				 }FUNASR_RECOG_RESULT;
			
 
				 
			
 
				 typedef struct
			
--- a/funasr/runtime/onnxruntime/src/fsmn-vad-online.cpp
+++ b/funasr/runtime/onnxruntime/src/fsmn-vad-online.cpp
@@ -175,6 +175,9 @@ void FsmnVadOnline::InitOnline(std::shared_ptr<Ort::Session> &vad_session,
 
				     vad_silence_duration_ = vad_silence_duration;
			
 
				     vad_max_len_ = vad_max_len;
			
 
				     vad_speech_noise_thres_ = vad_speech_noise_thres;
			
 
				+
			
 
				+    // 2pass
			
 
				+    audio_handle = make_unique<Audio>(1);
			
 
				 }
			
 
				 
			
 
				 FsmnVadOnline::~FsmnVadOnline() {
			
--- a/funasr/runtime/onnxruntime/src/fsmn-vad-online.h
+++ b/funasr/runtime/onnxruntime/src/fsmn-vad-online.h
@@ -21,6 +21,8 @@ public:
 
				     std::vector<std::vector<int>> Infer(std::vector<float> &waves, bool input_finished);
			
 
				     void ExtractFeats(float sample_rate, vector<vector<float>> &vad_feats, vector<float> &waves, bool input_finished);
			
 
				     void Reset();
			
 
				+    // 2pass
			
 
				+    std::unique_ptr<Audio> audio_handle = nullptr;
			
 
				 
			
 
				 private:
			
 
				     E2EVadModel vad_scorer = E2EVadModel();
			
--- a/funasr/runtime/onnxruntime/src/funasrruntime.cpp
+++ b/funasr/runtime/onnxruntime/src/funasrruntime.cpp
@@ -5,9 +5,9 @@ extern "C" {
 
				 #endif
			
 
				 
			
 
				 	// APIs for Init
			
 
				-	_FUNASRAPI FUNASR_HANDLE  FunASRInit(std::map<std::string, std::string>& model_path, int thread_num, int mode)
			
 
				+	_FUNASRAPI FUNASR_HANDLE  FunASRInit(std::map<std::string, std::string>& model_path, int thread_num, ASR_TYPE type)
			
 
				 	{
			
 
				-		funasr::Model* mm = funasr::CreateModel(model_path, thread_num, mode);
			
 
				+		funasr::Model* mm = funasr::CreateModel(model_path, thread_num, type);
			
 
				 		return mm;
			
 
				 	}
			
 
				 
			
@@ -41,6 +41,17 @@ extern "C" {
 
				 		return mm;
			
 
				 	}
			
 
				 
			
 
				+	_FUNASRAPI FUNASR_HANDLE  FunTpassInit(std::map<std::string, std::string>& model_path, int thread_num)
			
 
				+	{
			
 
				+		funasr::TpassStream* mm = funasr::CreateTpassStream(model_path, thread_num);
			
 
				+		return mm;
			
 
				+	}
			
 
				+
			
 
				+	_FUNASRAPI void FunTpassOnlineInit(FUNASR_HANDLE tpass_handle)
			
 
				+	{
			
 
				+		funasr::CreateTpassOnlineStream(tpass_handle);
			
 
				+	}
			
 
				+
			
 
				 	// APIs for ASR Infer
			
 
				 	_FUNASRAPI FUNASR_RESULT FunASRInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool input_finished, int sampling_rate, std::string wav_format)
			
 
				 	{
			
@@ -297,6 +308,71 @@ extern "C" {
 
				 		return p_result;
			
 
				 	}
			
 
				 
			
 
				+	// APIs for 2pass-stream Infer
			
 
				+	_FUNASRAPI FUNASR_RESULT FunTpassInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool input_finished, int sampling_rate, std::string wav_format)
			
 
				+	{
			
 
				+		funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
			
 
				+		if (!tpass_stream)
			
 
				+			return nullptr;
			
 
				+		
			
 
				+		funasr::VadModel* vad_online_handle = (tpass_stream->vad_online_handle).get();
			
 
				+		if (!vad_online_handle)
			
 
				+			return nullptr;
			
 
				+
			
 
				+		funasr::Audio* audio = ((funasr::FsmnVadOnline*)vad_online_handle)->audio_handle.get();
			
 
				+
			
 
				+		funasr::Model* asr_online_handle = (tpass_stream->asr_online_handle).get();
			
 
				+		if (!asr_online_handle)
			
 
				+			return nullptr;
			
 
				+		
			
 
				+		funasr::Model* asr_handle = (tpass_stream->asr_handle).get();
			
 
				+		if (!asr_handle)
			
 
				+			return nullptr;
			
 
				+
			
 
				+		if(wav_format == "pcm" || wav_format == "PCM"){
			
 
				+			if (!audio->LoadPcmwavOnline(sz_buf, n_len, &sampling_rate))
			
 
				+				return nullptr;
			
 
				+		}else{
			
 
				+			// if (!audio->FfmpegLoad(sz_buf, n_len))
			
 
				+			// 	return nullptr;
			
 
				+			LOG(ERROR) <<"Wrong wav_format: " << wav_format ;
			
 
				+			exit(-1);
			
 
				+		}
			
 
				+
			
 
				+		funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
			
 
				+		p_result->snippet_time = audio->GetTimeLen();
			
 
				+		if(p_result->snippet_time == 0){
			
 
				+			return p_result;
			
 
				+		}
			
 
				+
			
 
				+		audio->Split(vad_online_handle, input_finished);
			
 
				+
			
 
				+		funasr::AudioFrame* frame = NULL;
			
 
				+		while(audio->FetchChunck(frame) > 0){
			
 
				+			string msg = asr_online_handle->Forward(frame->data, frame->len, frame->is_final);
			
 
				+			p_result->msg += msg;
			
 
				+			if(frame != NULL){
			
 
				+				delete frame;
			
 
				+				frame = NULL;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		while(audio->FetchTpass(frame) > 0){
			
 
				+			string msg = asr_handle->Forward(frame->data, frame->len, frame->is_final);
			
 
				+			p_result->tpass_msg += msg;
			
 
				+			if(frame != NULL){
			
 
				+				delete frame;
			
 
				+				frame = NULL;
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if(input_finished){
			
 
				+			audio->ResetIndex();
			
 
				+		}
			
 
				+
			
 
				+		return p_result;
			
 
				+	}
			
 
				+
			
 
				 	_FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result)
			
 
				 	{
			
 
				 		if (!result)
			
@@ -332,6 +408,15 @@ extern "C" {
 
				 		return p_result->msg.c_str();
			
 
				 	}
			
 
				 
			
 
				+	_FUNASRAPI const char* FunASRGetTpassResult(FUNASR_RESULT result,int n_index)
			
 
				+	{
			
 
				+		funasr::FUNASR_RECOG_RESULT * p_result = (funasr::FUNASR_RECOG_RESULT*)result;
			
 
				+		if(!p_result)
			
 
				+			return nullptr;
			
 
				+
			
 
				+		return p_result->tpass_msg.c_str();
			
 
				+	}
			
 
				+
			
 
				 	_FUNASRAPI const char* CTTransformerGetResult(FUNASR_RESULT result,int n_index)
			
 
				 	{
			
 
				 		funasr::FUNASR_PUNC_RESULT * p_result = (funasr::FUNASR_PUNC_RESULT*)result;
			
@@ -420,6 +505,16 @@ extern "C" {
 
				 		delete offline_stream;
			
 
				 	}
			
 
				 
			
 
				+	_FUNASRAPI void FunTpassUninit(FUNASR_HANDLE handle)
			
 
				+	{
			
 
				+		funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
			
 
				+
			
 
				+		if (!tpass_stream)
			
 
				+			return;
			
 
				+
			
 
				+		delete tpass_stream;
			
 
				+	}
			
 
				+
			
 
				 #ifdef __cplusplus 
			
 
				 
			
 
				 }
			
--- a/funasr/runtime/onnxruntime/src/model.cpp
+++ b/funasr/runtime/onnxruntime/src/model.cpp
@@ -1,9 +1,10 @@
 
				 #include "precomp.h"
			
 
				 
			
 
				 namespace funasr {
			
 
				-Model *CreateModel(std::map<std::string, std::string>& model_path, int thread_num, int mode)
			
 
				+Model *CreateModel(std::map<std::string, std::string>& model_path, int thread_num, ASR_TYPE type)
			
 
				 {
			
 
				-    if(mode == 0){
			
 
				+    // offline
			
 
				+    if(type == ASR_OFFLINE){
			
 
				         string am_model_path;
			
 
				         string am_cmvn_path;
			
 
				         string am_config_path;
			
@@ -19,7 +20,7 @@ Model *CreateModel(std::map<std::string, std::string>& model_path, int thread_nu
 
				         mm = new Paraformer();
			
 
				         mm->InitAsr(am_model_path, am_cmvn_path, am_config_path, thread_num);
			
 
				         return mm;
			
 
				-    }else if(mode == 1){
			
 
				+    }else if(type == ASR_ONLINE){
			
 
				         // online
			
 
				         string en_model_path;
			
 
				         string de_model_path;
			
@@ -39,6 +40,9 @@ Model *CreateModel(std::map<std::string, std::string>& model_path, int thread_nu
 
				         mm = new Paraformer();
			
 
				         mm->InitAsr(en_model_path, de_model_path, am_cmvn_path, am_config_path, thread_num);
			
 
				         return mm;
			
 
				+    }else{
			
 
				+        LOG(ERROR)<<"Wrong ASR_TYPE : " << type;
			
 
				+        exit(-1);
			
 
				     }
			
 
				 }
			
 
				 
			
--- a/funasr/runtime/onnxruntime/src/paraformer.cpp
+++ b/funasr/runtime/onnxruntime/src/paraformer.cpp
@@ -33,7 +33,7 @@ void Paraformer::InitAsr(const std::string &am_model, const std::string &am_cmvn
 
				     session_options_.DisableCpuMemArena();
			
 
				 
			
 
				     try {
			
 
				-        m_session = std::make_unique<Ort::Session>(env_, am_model.c_str(), session_options_);
			
 
				+        m_session_ = std::make_unique<Ort::Session>(env_, am_model.c_str(), session_options_);
			
 
				         LOG(INFO) << "Successfully load model from " << am_model;
			
 
				     } catch (std::exception const &e) {
			
 
				         LOG(ERROR) << "Error when load am onnx model: " << e.what();
			
@@ -41,14 +41,14 @@ void Paraformer::InitAsr(const std::string &am_model, const std::string &am_cmvn
 
				     }
			
 
				 
			
 
				     string strName;
			
 
				-    GetInputName(m_session.get(), strName);
			
 
				+    GetInputName(m_session_.get(), strName);
			
 
				     m_strInputNames.push_back(strName.c_str());
			
 
				-    GetInputName(m_session.get(), strName,1);
			
 
				+    GetInputName(m_session_.get(), strName,1);
			
 
				     m_strInputNames.push_back(strName);
			
 
				     
			
 
				-    GetOutputName(m_session.get(), strName);
			
 
				+    GetOutputName(m_session_.get(), strName);
			
 
				     m_strOutputNames.push_back(strName);
			
 
				-    GetOutputName(m_session.get(), strName,1);
			
 
				+    GetOutputName(m_session_.get(), strName,1);
			
 
				     m_strOutputNames.push_back(strName);
			
 
				 
			
 
				     for (auto& item : m_strInputNames)
			
@@ -136,6 +136,37 @@ void Paraformer::InitAsr(const std::string &en_model, const std::string &de_mode
 
				     LoadCmvn(am_cmvn.c_str());
			
 
				 }
			
 
				 
			
 
				+// 2pass
			
 
				+void Paraformer::InitAsr(const std::string &am_model, const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){
			
 
				+    // online
			
 
				+    InitAsr(en_model, de_model, am_cmvn, am_config, thread_num);
			
 
				+
			
 
				+    // offline
			
 
				+    try {
			
 
				+        m_session_ = std::make_unique<Ort::Session>(env_, am_model.c_str(), session_options_);
			
 
				+        LOG(INFO) << "Successfully load model from " << am_model;
			
 
				+    } catch (std::exception const &e) {
			
 
				+        LOG(ERROR) << "Error when load am onnx model: " << e.what();
			
 
				+        exit(0);
			
 
				+    }
			
 
				+
			
 
				+    string strName;
			
 
				+    GetInputName(m_session_.get(), strName);
			
 
				+    m_strInputNames.push_back(strName.c_str());
			
 
				+    GetInputName(m_session_.get(), strName,1);
			
 
				+    m_strInputNames.push_back(strName);
			
 
				+    
			
 
				+    GetOutputName(m_session_.get(), strName);
			
 
				+    m_strOutputNames.push_back(strName);
			
 
				+    GetOutputName(m_session_.get(), strName,1);
			
 
				+    m_strOutputNames.push_back(strName);
			
 
				+
			
 
				+    for (auto& item : m_strInputNames)
			
 
				+        m_szInputNames.push_back(item.c_str());
			
 
				+    for (auto& item : m_strOutputNames)
			
 
				+        m_szOutputNames.push_back(item.c_str());
			
 
				+}
			
 
				+
			
 
				 void Paraformer::LoadOnlineConfigFromYaml(const char* filename){
			
 
				 
			
 
				     YAML::Node config;
			
@@ -332,7 +363,7 @@ string Paraformer::Forward(float* din, int len, bool input_finished)
 
				 
			
 
				     string result;
			
 
				     try {
			
 
				-        auto outputTensor = m_session->Run(Ort::RunOptions{nullptr}, m_szInputNames.data(), input_onnx.data(), input_onnx.size(), m_szOutputNames.data(), m_szOutputNames.size());
			
 
				+        auto outputTensor = m_session_->Run(Ort::RunOptions{nullptr}, m_szInputNames.data(), input_onnx.data(), input_onnx.size(), m_szOutputNames.data(), m_szOutputNames.size());
			
 
				         std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
			
 
				 
			
 
				         int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>());
			
--- a/funasr/runtime/onnxruntime/src/paraformer.h
+++ b/funasr/runtime/onnxruntime/src/paraformer.h
@@ -30,6 +30,8 @@ namespace funasr {
 
				         void InitAsr(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, int thread_num);
			
 
				         // online
			
 
				         void InitAsr(const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num);
			
 
				+        // 2pass
			
 
				+        void InitAsr(const std::string &am_model, const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num);
			
 
				         void Reset();
			
 
				         vector<float> FbankKaldi(float sample_rate, const float* waves, int len);
			
 
				         string Forward(float* din, int len, bool input_finished=true);
			
@@ -42,7 +44,8 @@ namespace funasr {
 
				         int lfr_m = PARA_LFR_M;
			
 
				         int lfr_n = PARA_LFR_N;
			
 
				 
			
 
				-        std::shared_ptr<Ort::Session> m_session = nullptr;
			
 
				+        // paraformer-offline
			
 
				+        std::shared_ptr<Ort::Session> m_session_ = nullptr;
			
 
				         Ort::Env env_;
			
 
				         Ort::SessionOptions session_options_;
			
 
				 
			
@@ -50,7 +53,7 @@ namespace funasr {
 
				         vector<const char*> m_szInputNames;
			
 
				         vector<const char*> m_szOutputNames;
			
 
				 
			
 
				-        //paraformer-online
			
 
				+        // paraformer-online
			
 
				         std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
			
 
				         std::shared_ptr<Ort::Session> decoder_session_ = nullptr;
			
 
				         vector<string> en_strInputNames, en_strOutputNames;
			
--- a/funasr/runtime/onnxruntime/src/precomp.h
+++ b/funasr/runtime/onnxruntime/src/precomp.h
@@ -33,19 +33,19 @@ using namespace std;
 
				 #include "model.h"
			
 
				 #include "vad-model.h"
			
 
				 #include "punc-model.h"
			
 
				-#include "offline-stream.h"
			
 
				 #include "tokenizer.h"
			
 
				 #include "ct-transformer.h"
			
 
				 #include "ct-transformer-online.h"
			
 
				 #include "e2e-vad.h"
			
 
				 #include "fsmn-vad.h"
			
 
				-#include "fsmn-vad-online.h"
			
 
				 #include "vocab.h"
			
 
				 #include "audio.h"
			
 
				+#include "fsmn-vad-online.h"
			
 
				 #include "tensor.h"
			
 
				 #include "util.h"
			
 
				 #include "resample.h"
			
 
				 #include "paraformer.h"
			
 
				 #include "paraformer-online.h"
			
 
				 #include "offline-stream.h"
			
 
				+#include "tpass-stream.h"
			
 
				 #include "funasrruntime.h"
			
--- a/funasr/runtime/onnxruntime/src/tpass-stream.cpp
+++ b/funasr/runtime/onnxruntime/src/tpass-stream.cpp
@@ -0,0 +1,103 @@
 
				+#include "precomp.h"
			
 
				+#include <unistd.h>
			
 
				+
			
 
				+namespace funasr {
			
 
				+TpassStream::TpassStream(std::map<std::string, std::string>& model_path, int thread_num)
			
 
				+{
			
 
				+    // VAD model
			
 
				+    if(model_path.find(VAD_DIR) != model_path.end()){
			
 
				+        string vad_model_path;
			
 
				+        string vad_cmvn_path;
			
 
				+        string vad_config_path;
			
 
				+    
			
 
				+        vad_model_path = PathAppend(model_path.at(VAD_DIR), MODEL_NAME);
			
 
				+        if(model_path.find(VAD_QUANT) != model_path.end() && model_path.at(VAD_QUANT) == "true"){
			
 
				+            vad_model_path = PathAppend(model_path.at(VAD_DIR), QUANT_MODEL_NAME);
			
 
				+        }
			
 
				+        vad_cmvn_path = PathAppend(model_path.at(VAD_DIR), VAD_CMVN_NAME);
			
 
				+        vad_config_path = PathAppend(model_path.at(VAD_DIR), VAD_CONFIG_NAME);
			
 
				+        if (access(vad_model_path.c_str(), F_OK) != 0 ||
			
 
				+            access(vad_cmvn_path.c_str(), F_OK) != 0 ||
			
 
				+            access(vad_config_path.c_str(), F_OK) != 0 )
			
 
				+        {
			
 
				+            LOG(INFO) << "VAD model file is not exist, skip load vad model.";
			
 
				+        }else{
			
 
				+            vad_handle = make_unique<FsmnVad>();
			
 
				+            vad_handle->InitVad(vad_model_path, vad_cmvn_path, vad_config_path, thread_num);
			
 
				+            use_vad = true;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    // AM model
			
 
				+    if(model_path.find(OFFLINE_MODEL_DIR) != model_path.end() && model_path.find(ONLINE_MODEL_DIR) != model_path.end()){
			
 
				+        // 2pass
			
 
				+        string am_model_path;
			
 
				+        string en_model_path;
			
 
				+        string de_model_path;
			
 
				+        string am_cmvn_path;
			
 
				+        string am_config_path;
			
 
				+
			
 
				+        am_model_path = PathAppend(model_path.at(OFFLINE_MODEL_DIR), MODEL_NAME);
			
 
				+        en_model_path = PathAppend(model_path.at(ONLINE_MODEL_DIR), ENCODER_NAME);
			
 
				+        de_model_path = PathAppend(model_path.at(ONLINE_MODEL_DIR), DECODER_NAME);
			
 
				+        if(model_path.find(QUANTIZE) != model_path.end() && model_path.at(QUANTIZE) == "true"){
			
 
				+            am_model_path = PathAppend(model_path.at(OFFLINE_MODEL_DIR), QUANT_MODEL_NAME);
			
 
				+            en_model_path = PathAppend(model_path.at(ONLINE_MODEL_DIR), QUANT_ENCODER_NAME);
			
 
				+            de_model_path = PathAppend(model_path.at(ONLINE_MODEL_DIR), QUANT_DECODER_NAME);
			
 
				+        }
			
 
				+        am_cmvn_path = PathAppend(model_path.at(ONLINE_MODEL_DIR), AM_CMVN_NAME);
			
 
				+        am_config_path = PathAppend(model_path.at(ONLINE_MODEL_DIR), AM_CONFIG_NAME);
			
 
				+
			
 
				+        asr_handle = make_unique<Paraformer>();
			
 
				+        asr_handle->InitAsr(am_model_path, en_model_path, de_model_path, am_cmvn_path, am_config_path, thread_num);
			
 
				+    }else{
			
 
				+        LOG(ERROR) <<"Can not find offline-model-dir or online-model-dir";
			
 
				+        exit(-1);
			
 
				+    }
			
 
				+
			
 
				+    // PUNC model
			
 
				+    if(model_path.find(PUNC_DIR) != model_path.end()){
			
 
				+        string punc_model_path;
			
 
				+        string punc_config_path;
			
 
				+    
			
 
				+        punc_model_path = PathAppend(model_path.at(PUNC_DIR), MODEL_NAME);
			
 
				+        if(model_path.find(PUNC_QUANT) != model_path.end() && model_path.at(PUNC_QUANT) == "true"){
			
 
				+            punc_model_path = PathAppend(model_path.at(PUNC_DIR), QUANT_MODEL_NAME);
			
 
				+        }
			
 
				+        punc_config_path = PathAppend(model_path.at(PUNC_DIR), PUNC_CONFIG_NAME);
			
 
				+
			
 
				+        if (access(punc_model_path.c_str(), F_OK) != 0 ||
			
 
				+            access(punc_config_path.c_str(), F_OK) != 0 )
			
 
				+        {
			
 
				+            LOG(INFO) << "PUNC model file is not exist, skip load punc model.";
			
 
				+        }else{
			
 
				+            punc_online_handle = make_unique<CTTransformerOnline>();
			
 
				+            punc_online_handle->InitPunc(punc_model_path, punc_config_path, thread_num);
			
 
				+            use_punc = true;
			
 
				+        }
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+TpassStream *CreateTpassStream(std::map<std::string, std::string>& model_path, int thread_num)
			
 
				+{
			
 
				+    TpassStream *mm;
			
 
				+    mm = new TpassStream(model_path, thread_num);
			
 
				+    return mm;
			
 
				+}
			
 
				+
			
 
				+void CreateTpassOnlineStream(void* tpass_stream)
			
 
				+{
			
 
				+    funasr::TpassStream* tpass_obj = (funasr::TpassStream*)tpass_stream;
			
 
				+    if(tpass_obj->vad_handle){
			
 
				+        tpass_obj->vad_online_handle = make_unique<FsmnVadOnline>((FsmnVad*)(tpass_obj->vad_handle).get());
			
 
				+    }
			
 
				+
			
 
				+    if(tpass_obj->asr_handle){
			
 
				+        tpass_obj->asr_online_handle = make_unique<ParaformerOnline>((Paraformer*)(tpass_obj->asr_handle).get());
			
 
				+    }else{
			
 
				+        LOG(ERROR)<<"asr_handle is null";
			
 
				+        exit(-1);
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+} // namespace funasr