2 лет назад · f973420064
--- a/funasr/runtime/onnxruntime/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/CMakeLists.txt
@@ -2,24 +2,27 @@ cmake_minimum_required(VERSION 3.10)
 
															 project(FunASRonnx)
														
 
															-set(CMAKE_CXX_STANDARD 11)
														
 
															+# set(CMAKE_CXX_STANDARD 11)
														
 
															+set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
														
 
															 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
														
 
															-# for onnxruntime
														
 
															+include(TestBigEndian)
														
 
															+test_big_endian(BIG_ENDIAN)
														
 
															+if(BIG_ENDIAN)
														
 
															+    message("Big endian system")
														
 
															+else()
														
 
															+    message("Little endian system")
														
 
															+endif()
														
 
															+# for onnxruntime
														
 
															 IF(WIN32)
														
 
															-
														
 
															-
														
 
															 	if(CMAKE_CL_64)
														
 
															 		link_directories(${ONNXRUNTIME_DIR}\\lib)
														
 
															 	else()
														
 
															 		add_definitions(-D_WIN_X86)
														
 
															 	endif()
														
 
															 ELSE()
														
 
															-
														
 
															-
														
 
															-link_directories(${ONNXRUNTIME_DIR}/lib)
														
 
															-
														
 
															+    link_directories(${ONNXRUNTIME_DIR}/lib)
														
 
															 endif()
														
 
															 add_subdirectory("./third_party/yaml-cpp")
														
--- a/funasr/runtime/onnxruntime/include/Audio.h
+++ b/funasr/runtime/onnxruntime/include/Audio.h
@@ -6,6 +6,13 @@
 
															 #include <queue>
														
 
															 #include <stdint.h>
														
 
															+#ifndef model_sample_rate
														
 
															+#define model_sample_rate 16000
														
 
															+#endif
														
 
															+#ifndef WAV_HEADER_SIZE
														
 
															+#define WAV_HEADER_SIZE 44
														
 
															+#endif
														
 
															+
														
 
															 using namespace std;
														
 
															 class AudioFrame {
														
@@ -32,7 +39,6 @@ class Audio {
 
															     int16_t *speech_buff;
														
 
															     int speech_len;
														
 
															     int speech_align_len;
														
 
															-    int16_t sample_rate;
														
 
															     int offset;
														
 
															     float align_size;
														
 
															     int data_type;
														
@@ -43,10 +49,11 @@ class Audio {
 
															     Audio(int data_type, int size);
														
 
															     ~Audio();
														
 
															     void disp();
														
 
															-    bool loadwav(const char* filename);
														
 
															-    bool loadwav(const char* buf, int nLen);
														
 
															-    bool loadpcmwav(const char* buf, int nFileLen);
														
 
															-    bool loadpcmwav(const char* filename);
														
 
															+    bool loadwav(const char* filename, int32_t* sampling_rate);
														
 
															+    void wavResample(int32_t sampling_rate, const float *waveform, int32_t n);
														
 
															+    bool loadwav(const char* buf, int nLen, int32_t* sampling_rate);
														
 
															+    bool loadpcmwav(const char* buf, int nFileLen, int32_t* sampling_rate);
														
 
															+    bool loadpcmwav(const char* filename, int32_t* sampling_rate);
														
 
															     int fetch_chunck(float *&dout, int len);
														
 
															     int fetch(float *&dout, int &len, int &flag);
														
 
															     void padding();
														
--- a/funasr/runtime/onnxruntime/include/libfunasrapi.h
+++ b/funasr/runtime/onnxruntime/include/libfunasrapi.h
@@ -55,9 +55,9 @@ _FUNASRAPI FUNASR_HANDLE  FunASRInit(const char* szModelDir, int nThread, bool q
 
															 // if not give a fnCallback ,it should be NULL 
														
 
															 _FUNASRAPI FUNASR_RESULT	FunASRRecogBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
														
 
															-_FUNASRAPI FUNASR_RESULT	FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
														
 
															+_FUNASRAPI FUNASR_RESULT	FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
														
 
															-_FUNASRAPI FUNASR_RESULT	FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
														
 
															+_FUNASRAPI FUNASR_RESULT	FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
														
 
															 _FUNASRAPI FUNASR_RESULT	FunASRRecogFile(FUNASR_HANDLE handle, const char* szWavfile, FUNASR_MODE Mode, QM_CALLBACK fnCallback);
														
--- a/funasr/runtime/onnxruntime/src/Audio.cpp
+++ b/funasr/runtime/onnxruntime/src/Audio.cpp
@@ -3,11 +3,96 @@
 
															 #include <stdio.h>
														
 
															 #include <stdlib.h>
														
 
															 #include <string.h>
														
 
															+#include <fstream>
														
 
															+#include <assert.h>
														
 
															 #include "Audio.h"
														
 
															+#include "precomp.h"
														
 
															 using namespace std;
														
 
															+// see http://soundfile.sapp.org/doc/WaveFormat/
														
 
															+// Note: We assume little endian here
														
 
															+struct WaveHeader {
														
 
															+  bool Validate() const {
														
 
															+    //                 F F I R
														
 
															+    if (chunk_id != 0x46464952) {
														
 
															+      printf("Expected chunk_id RIFF. Given: 0x%08x\n", chunk_id);
														
 
															+      return false;
														
 
															+    }
														
 
															+    //               E V A W
														
 
															+    if (format != 0x45564157) {
														
 
															+      printf("Expected format WAVE. Given: 0x%08x\n", format);
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+    if (subchunk1_id != 0x20746d66) {
														
 
															+      printf("Expected subchunk1_id 0x20746d66. Given: 0x%08x\n",
														
 
															+                       subchunk1_id);
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+    if (subchunk1_size != 16) {  // 16 for PCM
														
 
															+      printf("Expected subchunk1_size 16. Given: %d\n",
														
 
															+                       subchunk1_size);
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+    if (audio_format != 1) {  // 1 for PCM
														
 
															+      printf("Expected audio_format 1. Given: %d\n", audio_format);
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+    if (num_channels != 1) {  // we support only single channel for now
														
 
															+      printf("Expected single channel. Given: %d\n", num_channels);
														
 
															+      return false;
														
 
															+    }
														
 
															+    if (byte_rate != (sample_rate * num_channels * bits_per_sample / 8)) {
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+    if (block_align != (num_channels * bits_per_sample / 8)) {
														
 
															+      return false;
														
 
															+    }
														
 
															+
														
 
															+    if (bits_per_sample != 16) {  // we support only 16 bits per sample
														
 
															+      printf("Expected bits_per_sample 16. Given: %d\n",
														
 
															+                       bits_per_sample);
														
 
															+      return false;
														
 
															+    }
														
 
															+    return true;
														
 
															+  }
														
 
															+
														
 
															+  // See https://en.wikipedia.org/wiki/WAV#Metadata and
														
 
															+  // https://www.robotplanet.dk/audio/wav_meta_data/riff_mci.pdf
														
 
															+  void SeekToDataChunk(std::istream &is) {
														
 
															+    //                              a t a d
														
 
															+    while (is && subchunk2_id != 0x61746164) {
														
 
															+      // const char *p = reinterpret_cast<const char *>(&subchunk2_id);
														
 
															+      // printf("Skip chunk (%x): %c%c%c%c of size: %d\n", subchunk2_id, p[0],
														
 
															+      //        p[1], p[2], p[3], subchunk2_size);
														
 
															+      is.seekg(subchunk2_size, std::istream::cur);
														
 
															+      is.read(reinterpret_cast<char *>(&subchunk2_id), sizeof(int32_t));
														
 
															+      is.read(reinterpret_cast<char *>(&subchunk2_size), sizeof(int32_t));
														
 
															+    }
														
 
															+  }
														
 
															+
														
 
															+  int32_t chunk_id;
														
 
															+  int32_t chunk_size;
														
 
															+  int32_t format;
														
 
															+  int32_t subchunk1_id;
														
 
															+  int32_t subchunk1_size;
														
 
															+  int16_t audio_format;
														
 
															+  int16_t num_channels;
														
 
															+  int32_t sample_rate;
														
 
															+  int32_t byte_rate;
														
 
															+  int16_t block_align;
														
 
															+  int16_t bits_per_sample;
														
 
															+  int32_t subchunk2_id;    // a tag of this chunk
														
 
															+  int32_t subchunk2_size;  // size of subchunk2
														
 
															+};
														
 
															+static_assert(sizeof(WaveHeader) == WAV_HEADER_SIZE, "");
														
 
															+
														
 
															 class AudioWindow {
														
 
															   private:
														
 
															     int *window;
														
@@ -56,7 +141,7 @@ int AudioFrame::set_end(int val, int max_len)
 
															     float frame_length = 400;
														
 
															     float frame_shift = 160;
														
 
															     float num_new_samples =
														
 
															-        ceil((num_samples - 400) / frame_shift) * frame_shift + frame_length;
														
 
															+        ceil((num_samples - frame_length) / frame_shift) * frame_shift + frame_length;
														
 
															     end = start + num_new_samples;
														
 
															     len = (int)num_new_samples;
														
@@ -111,120 +196,150 @@ Audio::~Audio()
 
															 void Audio::disp()
														
 
															 {
														
 
															-    printf("Audio time is %f s. len is %d\n", (float)speech_len / 16000,
														
 
															+    printf("Audio time is %f s. len is %d\n", (float)speech_len / model_sample_rate,
														
 
															            speech_len);
														
 
															 }
														
 
															 float Audio::get_time_len()
														
 
															 {
														
 
															-    return (float)speech_len / 16000;
														
 
															-       //speech_len);
														
 
															+    return (float)speech_len / model_sample_rate;
														
 
															 }
														
 
															-bool Audio::loadwav(const char *filename)
														
 
															+void Audio::wavResample(int32_t sampling_rate, const float *waveform,
														
 
															+                          int32_t n)
														
 
															 {
														
 
															+    printf(
														
 
															+          "Creating a resampler:\n"
														
 
															+          "   in_sample_rate: %d\n"
														
 
															+          "   output_sample_rate: %d\n",
														
 
															+          sampling_rate, static_cast<int32_t>(model_sample_rate));
														
 
															+    float min_freq =
														
 
															+        std::min<int32_t>(sampling_rate, model_sample_rate);
														
 
															+    float lowpass_cutoff = 0.99 * 0.5 * min_freq;
														
 
															+
														
 
															+    int32_t lowpass_filter_width = 6;
														
 
															+    //FIXME
														
 
															+    //auto resampler = new LinearResample(
														
 
															+    //      sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
														
 
															+    auto resampler = std::make_unique<LinearResample>(
														
 
															+          sampling_rate, model_sample_rate, lowpass_cutoff, lowpass_filter_width);
														
 
															+    std::vector<float> samples;
														
 
															+    resampler->Resample(waveform, n, true, &samples);
														
 
															+    //reset speech_data
														
 
															+    speech_len = samples.size();
														
 
															+    if (speech_data != NULL) {
														
 
															+        free(speech_data);
														
 
															+    }
														
 
															+    speech_data = (float*)malloc(sizeof(float) * speech_len);
														
 
															+    memset(speech_data, 0, sizeof(float) * speech_len);
														
 
															+    copy(samples.begin(), samples.end(), speech_data);
														
 
															+}
														
 
															+bool Audio::loadwav(const char *filename, int32_t* sampling_rate)
														
 
															+{
														
 
															+    WaveHeader header;
														
 
															     if (speech_data != NULL) {
														
 
															         free(speech_data);
														
 
															     }
														
 
															     if (speech_buff != NULL) {
														
 
															         free(speech_buff);
														
 
															     }
														
 
															-
														
 
															+    
														
 
															     offset = 0;
														
 
															-
														
 
															-    FILE *fp;
														
 
															-    fp = fopen(filename, "rb");
														
 
															-    if (fp == nullptr)
														
 
															+    std::ifstream is(filename, std::ifstream::binary);
														
 
															+    is.read(reinterpret_cast<char *>(&header), sizeof(header));
														
 
															+    if(!is){
														
 
															+        fprintf(stderr, "Failed to read %s\n", filename);
														
 
															         return false;
														
 
															-    fseek(fp, 0, SEEK_END);  /*定位到文件末尾*/
														
 
															-    uint32_t nFileLen = ftell(fp);  /*得到文件大小*/
														
 
															-    fseek(fp, 44, SEEK_SET);  /*跳过wav文件头*/
														
 
															-
														
 
															-    speech_len = (nFileLen - 44) / 2;
														
 
															-    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
														
 
															-    speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len);
														
 
															+    }
														
 
															+    
														
 
															+    *sampling_rate = header.sample_rate;
														
 
															+    // header.subchunk2_size contains the number of bytes in the data.
														
 
															+    // As we assume each sample contains two bytes, so it is divided by 2 here
														
 
															+    speech_len = header.subchunk2_size / 2;
														
 
															+    speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_len);
														
 
															     if (speech_buff)
														
 
															     {
														
 
															-        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
														
 
															-        int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
														
 
															-        fclose(fp);
														
 
															+        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
														
 
															+        is.read(reinterpret_cast<char *>(speech_buff), header.subchunk2_size);
														
 
															+        if (!is) {
														
 
															+            fprintf(stderr, "Failed to read %s\n", filename);
														
 
															+            return false;
														
 
															+        }
														
 
															+        speech_data = (float*)malloc(sizeof(float) * speech_len);
														
 
															+        memset(speech_data, 0, sizeof(float) * speech_len);
														
 
															-        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
														
 
															-        memset(speech_data, 0, sizeof(float) * speech_align_len);
														
 
															-        int i;
														
 
															         float scale = 1;
														
 
															-
														
 
															         if (data_type == 1) {
														
 
															             scale = 32768;
														
 
															         }
														
 
															-
														
 
															-        for (i = 0; i < speech_len; i++) {
														
 
															+        for (int32_t i = 0; i != speech_len; ++i) {
														
 
															             speech_data[i] = (float)speech_buff[i] / scale;
														
 
															         }
														
 
															+        //resample
														
 
															+        if(*sampling_rate != model_sample_rate){
														
 
															+            wavResample(*sampling_rate, speech_data, speech_len);
														
 
															+        }
														
 
															+
														
 
															         AudioFrame* frame = new AudioFrame(speech_len);
														
 
															         frame_queue.push(frame);
														
 
															-
														
 
															         return true;
														
 
															     }
														
 
															     else
														
 
															         return false;
														
 
															 }
														
 
															-
														
 
															-bool Audio::loadwav(const char* buf, int nFileLen)
														
 
															+bool Audio::loadwav(const char* buf, int nFileLen, int32_t* sampling_rate)
														
 
															 {
														
 
															-
														
 
															-    
														
 
															-
														
 
															+    WaveHeader header;
														
 
															     if (speech_data != NULL) {
														
 
															         free(speech_data);
														
 
															     }
														
 
															     if (speech_buff != NULL) {
														
 
															         free(speech_buff);
														
 
															     }
														
 
															-
														
 
															     offset = 0;
														
 
															-    size_t nOffset = 0;
														
 
															+    std::memcpy(&header, buf, sizeof(header));
														
 
															-#define WAV_HEADER_SIZE 44
														
 
															-
														
 
															-    speech_len = (nFileLen - WAV_HEADER_SIZE) / 2;
														
 
															-    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
														
 
															-    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
														
 
															+    *sampling_rate = header.sample_rate;
														
 
															+    speech_len = header.subchunk2_size / 2;
														
 
															+    speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_len);
														
 
															     if (speech_buff)
														
 
															     {
														
 
															-        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
														
 
															+        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
														
 
															         memcpy((void*)speech_buff, (const void*)(buf + WAV_HEADER_SIZE), speech_len * sizeof(int16_t));
														
 
															+        speech_data = (float*)malloc(sizeof(float) * speech_len);
														
 
															+        memset(speech_data, 0, sizeof(float) * speech_len);
														
 
															-        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
														
 
															-        memset(speech_data, 0, sizeof(float) * speech_align_len);
														
 
															-        int i;
														
 
															         float scale = 1;
														
 
															-
														
 
															         if (data_type == 1) {
														
 
															             scale = 32768;
														
 
															         }
														
 
															-        for (i = 0; i < speech_len; i++) {
														
 
															+        for (int32_t i = 0; i != speech_len; ++i) {
														
 
															             speech_data[i] = (float)speech_buff[i] / scale;
														
 
															         }
														
 
															+        
														
 
															+        //resample
														
 
															+        if(*sampling_rate != model_sample_rate){
														
 
															+            wavResample(*sampling_rate, speech_data, speech_len);
														
 
															+        }
														
 
															+        AudioFrame* frame = new AudioFrame(speech_len);
														
 
															+        frame_queue.push(frame);
														
 
															         return true;
														
 
															     }
														
 
															     else
														
 
															         return false;
														
 
															-
														
 
															 }
														
 
															-
														
 
															-bool Audio::loadpcmwav(const char* buf, int nBufLen)
														
 
															+bool Audio::loadpcmwav(const char* buf, int nBufLen, int32_t* sampling_rate)
														
 
															 {
														
 
															     if (speech_data != NULL) {
														
 
															         free(speech_data);
														
@@ -234,33 +349,29 @@ bool Audio::loadpcmwav(const char* buf, int nBufLen)
 
															     }
														
 
															     offset = 0;
														
 
															-    size_t nOffset = 0;
														
 
															-
														
 
															-
														
 
															-
														
 
															     speech_len = nBufLen / 2;
														
 
															-    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
														
 
															-    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
														
 
															+    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
														
 
															     if (speech_buff)
														
 
															     {
														
 
															-        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
														
 
															+        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
														
 
															         memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
														
 
															+        speech_data = (float*)malloc(sizeof(float) * speech_len);
														
 
															+        memset(speech_data, 0, sizeof(float) * speech_len);
														
 
															-        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
														
 
															-        memset(speech_data, 0, sizeof(float) * speech_align_len);
														
 
															-
														
 
															-     
														
 
															-        int i;
														
 
															         float scale = 1;
														
 
															-
														
 
															         if (data_type == 1) {
														
 
															             scale = 32768;
														
 
															         }
														
 
															-        for (i = 0; i < speech_len; i++) {
														
 
															+        for (int32_t i = 0; i != speech_len; ++i) {
														
 
															             speech_data[i] = (float)speech_buff[i] / scale;
														
 
															         }
														
 
															+        
														
 
															+        //resample
														
 
															+        if(*sampling_rate != model_sample_rate){
														
 
															+            wavResample(*sampling_rate, speech_data, speech_len);
														
 
															+        }
														
 
															         AudioFrame* frame = new AudioFrame(speech_len);
														
 
															         frame_queue.push(frame);
														
@@ -269,13 +380,10 @@ bool Audio::loadpcmwav(const char* buf, int nBufLen)
 
															     }
														
 
															     else
														
 
															         return false;
														
 
															-
														
 
															-    
														
 
															 }
														
 
															-bool Audio::loadpcmwav(const char* filename)
														
 
															+bool Audio::loadpcmwav(const char* filename, int32_t* sampling_rate)
														
 
															 {
														
 
															-
														
 
															     if (speech_data != NULL) {
														
 
															         free(speech_data);
														
 
															     }
														
@@ -293,34 +401,31 @@ bool Audio::loadpcmwav(const char* filename)
 
															     fseek(fp, 0, SEEK_SET);
														
 
															     speech_len = (nFileLen) / 2;
														
 
															-    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
														
 
															-    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
														
 
															+    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
														
 
															     if (speech_buff)
														
 
															     {
														
 
															-        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
														
 
															+        memset(speech_buff, 0, sizeof(int16_t) * speech_len);
														
 
															         int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
														
 
															         fclose(fp);
														
 
															-        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
														
 
															-        memset(speech_data, 0, sizeof(float) * speech_align_len);
														
 
															-
														
 
															+        speech_data = (float*)malloc(sizeof(float) * speech_len);
														
 
															+        memset(speech_data, 0, sizeof(float) * speech_len);
														
 
															-
														
 
															-        int i;
														
 
															         float scale = 1;
														
 
															-
														
 
															         if (data_type == 1) {
														
 
															             scale = 32768;
														
 
															         }
														
 
															-
														
 
															-        for (i = 0; i < speech_len; i++) {
														
 
															+        for (int32_t i = 0; i != speech_len; ++i) {
														
 
															             speech_data[i] = (float)speech_buff[i] / scale;
														
 
															         }
														
 
															+        //resample
														
 
															+        if(*sampling_rate != model_sample_rate){
														
 
															+            wavResample(*sampling_rate, speech_data, speech_len);
														
 
															+        }
														
 
															         AudioFrame* frame = new AudioFrame(speech_len);
														
 
															         frame_queue.push(frame);
														
 
															-
														
 
															         return true;
														
 
															     }
														
@@ -329,7 +434,6 @@ bool Audio::loadpcmwav(const char* filename)
 
															 }
														
 
															-
														
 
															 int Audio::fetch_chunck(float *&dout, int len)
														
 
															 {
														
 
															     if (offset >= speech_align_len) {
														
--- a/funasr/runtime/onnxruntime/src/CMakeLists.txt
+++ b/funasr/runtime/onnxruntime/src/CMakeLists.txt
@@ -1,5 +1,6 @@
 
															 file(GLOB files1 "*.cpp")
														
 
															+file(GLOB files2 "*.cc")
														
 
															 file(GLOB files4 "paraformer/*.cpp")
														
 
															 set(files ${files1} ${files2} ${files3} ${files4})
														
--- a/funasr/runtime/onnxruntime/src/Vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/Vocab.cpp
@@ -13,21 +13,6 @@ Vocab::Vocab(const char *filename)
 
															 {
														
 
															     ifstream in(filename);
														
 
															     loadVocabFromYaml(filename);
														
 
															-
														
 
															-    /*
														
 
															-    string line;
														
 
															-    if (in) // 有该文件
														
 
															-    {
														
 
															-        while (getline(in, line)) // line中不包括每行的换行符
														
 
															-        {
														
 
															-            vocab.push_back(line);
														
 
															-        }
														
 
															-    }
														
 
															-    else{
														
 
															-        printf("Cannot load vocab from: %s, there must be file vocab.txt", filename);
														
 
															-        exit(-1);
														
 
															-    }
														
 
															-    */
														
 
															 }
														
 
															 Vocab::~Vocab()
														
 
															 {
														
--- a/funasr/runtime/onnxruntime/src/libfunasrapi.cpp
+++ b/funasr/runtime/onnxruntime/src/libfunasrapi.cpp
@@ -17,8 +17,9 @@ extern "C" {
 
															 		if (!pRecogObj)
														
 
															 			return nullptr;
														
 
															+		int32_t sampling_rate = -1;
														
 
															 		Audio audio(1);
														
 
															-		if (!audio.loadwav(szBuf, nLen))
														
 
															+		if (!audio.loadwav(szBuf, nLen, &sampling_rate))
														
 
															 			return nullptr;
														
 
															 		//audio.split();
														
@@ -41,14 +42,14 @@ extern "C" {
 
															 		return pResult;
														
 
															 	}
														
 
															-	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
														
 
															+	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMBuffer(FUNASR_HANDLE handle, const char* szBuf, int nLen, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
														
 
															 	{
														
 
															 		Model* pRecogObj = (Model*)handle;
														
 
															 		if (!pRecogObj)
														
 
															 			return nullptr;
														
 
															 		Audio audio(1);
														
 
															-		if (!audio.loadpcmwav(szBuf, nLen))
														
 
															+		if (!audio.loadpcmwav(szBuf, nLen, &sampling_rate))
														
 
															 			return nullptr;
														
 
															 		//audio.split();
														
@@ -71,14 +72,14 @@ extern "C" {
 
															 		return pResult;
														
 
															 	}
														
 
															-	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
														
 
															+	_FUNASRAPI FUNASR_RESULT FunASRRecogPCMFile(FUNASR_HANDLE handle, const char* szFileName, int sampling_rate, FUNASR_MODE Mode, QM_CALLBACK fnCallback)
														
 
															 	{
														
 
															 		Model* pRecogObj = (Model*)handle;
														
 
															 		if (!pRecogObj)
														
 
															 			return nullptr;
														
 
															 		Audio audio(1);
														
 
															-		if (!audio.loadpcmwav(szFileName))
														
 
															+		if (!audio.loadpcmwav(szFileName, &sampling_rate))
														
 
															 			return nullptr;
														
 
															 		//audio.split();
														
@@ -106,9 +107,10 @@ extern "C" {
 
															 		Model* pRecogObj = (Model*)handle;
														
 
															 		if (!pRecogObj)
														
 
															 			return nullptr;
														
 
															-
														
 
															+		
														
 
															+		int32_t sampling_rate = -1;
														
 
															 		Audio audio(1);
														
 
															-		if(!audio.loadwav(szWavfile))
														
 
															+		if(!audio.loadwav(szWavfile, &sampling_rate))
														
 
															 			return nullptr;
														
 
															 		//audio.split();
														
--- a/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
+++ b/funasr/runtime/onnxruntime/src/paraformer_onnx.cpp
@@ -70,7 +70,6 @@ ModelImp::~ModelImp()
 
															 void ModelImp::reset()
														
 
															 {
														
 
															-    printf("Not Imp!!!!!!\n");
														
 
															 }
														
 
															 void ModelImp::apply_lfr(Tensor<float>*& din)
														
--- a/funasr/runtime/onnxruntime/src/precomp.h
+++ b/funasr/runtime/onnxruntime/src/precomp.h
@@ -44,6 +44,7 @@ using namespace std;
 
															 #include "FeatureQueue.h"
														
 
															 #include "SpeechWrap.h"
														
 
															 #include <Audio.h>
														
 
															+#include "resample.h"
														
 
															 #include "Model.h"
														
 
															 #include "paraformer_onnx.h"
														
 
															 #include "libfunasrapi.h"
														
--- a/funasr/runtime/onnxruntime/src/resample.cc
+++ b/funasr/runtime/onnxruntime/src/resample.cc
@@ -0,0 +1,305 @@
 
															+/**
														
 
															+ * Copyright     2013  Pegah Ghahremani
														
 
															+ *               2014  IMSL, PKU-HKUST (author: Wei Shi)
														
 
															+ *               2014  Yanqing Sun, Junjie Wang
														
 
															+ *               2014  Johns Hopkins University (author: Daniel Povey)
														
 
															+ * Copyright     2023  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															+ *
														
 
															+ * See LICENSE for clarification regarding multiple authors
														
 
															+ *
														
 
															+ * Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+ * you may not use this file except in compliance with the License.
														
 
															+ * You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+// this file is copied and modified from
														
 
															+// kaldi/src/feat/resample.cc
														
 
															+
														
 
															+#include "resample.h"
														
 
															+
														
 
															+#include <assert.h>
														
 
															+#include <math.h>
														
 
															+#include <stdio.h>
														
 
															+
														
 
															+#include <cstdlib>
														
 
															+#include <type_traits>
														
 
															+
														
 
															+#ifndef M_2PI
														
 
															+#define M_2PI 6.283185307179586476925286766559005
														
 
															+#endif
														
 
															+
														
 
															+#ifndef M_PI
														
 
															+#define M_PI 3.1415926535897932384626433832795
														
 
															+#endif
														
 
															+
														
 
															+template <class I>
														
 
															+I Gcd(I m, I n) {
														
 
															+  // this function is copied from kaldi/src/base/kaldi-math.h
														
 
															+  if (m == 0 || n == 0) {
														
 
															+    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
														
 
															+      fprintf(stderr, "Undefined GCD since m = 0, n = 0.\n");
														
 
															+      exit(-1);
														
 
															+    }
														
 
															+    return (m == 0 ? (n > 0 ? n : -n) : (m > 0 ? m : -m));
														
 
															+    // return absolute value of whichever is nonzero
														
 
															+  }
														
 
															+  // could use compile-time assertion
														
 
															+  // but involves messing with complex template stuff.
														
 
															+  static_assert(std::is_integral<I>::value, "");
														
 
															+  while (1) {
														
 
															+    m %= n;
														
 
															+    if (m == 0) return (n > 0 ? n : -n);
														
 
															+    n %= m;
														
 
															+    if (n == 0) return (m > 0 ? m : -m);
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+/// Returns the least common multiple of two integers.  Will
														
 
															+/// crash unless the inputs are positive.
														
 
															+template <class I>
														
 
															+I Lcm(I m, I n) {
														
 
															+  // This function is copied from kaldi/src/base/kaldi-math.h
														
 
															+  assert(m > 0 && n > 0);
														
 
															+  I gcd = Gcd(m, n);
														
 
															+  return gcd * (m / gcd) * (n / gcd);
														
 
															+}
														
 
															+
														
 
															+static float DotProduct(const float *a, const float *b, int32_t n) {
														
 
															+  float sum = 0;
														
 
															+  for (int32_t i = 0; i != n; ++i) {
														
 
															+    sum += a[i] * b[i];
														
 
															+  }
														
 
															+  return sum;
														
 
															+}
														
 
															+
														
 
															+LinearResample::LinearResample(int32_t samp_rate_in_hz,
														
 
															+                               int32_t samp_rate_out_hz, float filter_cutoff_hz,
														
 
															+                               int32_t num_zeros)
														
 
															+    : samp_rate_in_(samp_rate_in_hz),
														
 
															+      samp_rate_out_(samp_rate_out_hz),
														
 
															+      filter_cutoff_(filter_cutoff_hz),
														
 
															+      num_zeros_(num_zeros) {
														
 
															+  assert(samp_rate_in_hz > 0.0 && samp_rate_out_hz > 0.0 &&
														
 
															+         filter_cutoff_hz > 0.0 && filter_cutoff_hz * 2 <= samp_rate_in_hz &&
														
 
															+         filter_cutoff_hz * 2 <= samp_rate_out_hz && num_zeros > 0);
														
 
															+
														
 
															+  // base_freq is the frequency of the repeating unit, which is the gcd
														
 
															+  // of the input frequencies.
														
 
															+  int32_t base_freq = Gcd(samp_rate_in_, samp_rate_out_);
														
 
															+  input_samples_in_unit_ = samp_rate_in_ / base_freq;
														
 
															+  output_samples_in_unit_ = samp_rate_out_ / base_freq;
														
 
															+
														
 
															+  SetIndexesAndWeights();
														
 
															+  Reset();
														
 
															+}
														
 
															+
														
 
															+void LinearResample::SetIndexesAndWeights() {
														
 
															+  first_index_.resize(output_samples_in_unit_);
														
 
															+  weights_.resize(output_samples_in_unit_);
														
 
															+
														
 
															+  double window_width = num_zeros_ / (2.0 * filter_cutoff_);
														
 
															+
														
 
															+  for (int32_t i = 0; i < output_samples_in_unit_; i++) {
														
 
															+    double output_t = i / static_cast<double>(samp_rate_out_);
														
 
															+    double min_t = output_t - window_width, max_t = output_t + window_width;
														
 
															+    // we do ceil on the min and floor on the max, because if we did it
														
 
															+    // the other way around we would unnecessarily include indexes just
														
 
															+    // outside the window, with zero coefficients.  It's possible
														
 
															+    // if the arguments to the ceil and floor expressions are integers
														
 
															+    // (e.g. if filter_cutoff_ has an exact ratio with the sample rates),
														
 
															+    // that we unnecessarily include something with a zero coefficient,
														
 
															+    // but this is only a slight efficiency issue.
														
 
															+    int32_t min_input_index = ceil(min_t * samp_rate_in_),
														
 
															+            max_input_index = floor(max_t * samp_rate_in_),
														
 
															+            num_indices = max_input_index - min_input_index + 1;
														
 
															+    first_index_[i] = min_input_index;
														
 
															+    weights_[i].resize(num_indices);
														
 
															+    for (int32_t j = 0; j < num_indices; j++) {
														
 
															+      int32_t input_index = min_input_index + j;
														
 
															+      double input_t = input_index / static_cast<double>(samp_rate_in_),
														
 
															+             delta_t = input_t - output_t;
														
 
															+      // sign of delta_t doesn't matter.
														
 
															+      weights_[i][j] = FilterFunc(delta_t) / samp_rate_in_;
														
 
															+    }
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+/** Here, t is a time in seconds representing an offset from
														
 
															+    the center of the windowed filter function, and FilterFunction(t)
														
 
															+    returns the windowed filter function, described
														
 
															+    in the header as h(t) = f(t)g(t), evaluated at t.
														
 
															+*/
														
 
															+float LinearResample::FilterFunc(float t) const {
														
 
															+  float window,  // raised-cosine (Hanning) window of width
														
 
															+                 // num_zeros_/2*filter_cutoff_
														
 
															+      filter;    // sinc filter function
														
 
															+  if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
														
 
															+    window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
														
 
															+  else
														
 
															+    window = 0.0;  // outside support of window function
														
 
															+  if (t != 0)
														
 
															+    filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
														
 
															+  else
														
 
															+    filter = 2 * filter_cutoff_;  // limit of the function at t = 0
														
 
															+  return filter * window;
														
 
															+}
														
 
															+
														
 
															+void LinearResample::Reset() {
														
 
															+  input_sample_offset_ = 0;
														
 
															+  output_sample_offset_ = 0;
														
 
															+  input_remainder_.resize(0);
														
 
															+}
														
 
															+
														
 
															+void LinearResample::Resample(const float *input, int32_t input_dim, bool flush,
														
 
															+                              std::vector<float> *output) {
														
 
															+  int64_t tot_input_samp = input_sample_offset_ + input_dim,
														
 
															+          tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);
														
 
															+
														
 
															+  assert(tot_output_samp >= output_sample_offset_);
														
 
															+
														
 
															+  output->resize(tot_output_samp - output_sample_offset_);
														
 
															+
														
 
															+  // samp_out is the index into the total output signal, not just the part
														
 
															+  // of it we are producing here.
														
 
															+  for (int64_t samp_out = output_sample_offset_; samp_out < tot_output_samp;
														
 
															+       samp_out++) {
														
 
															+    int64_t first_samp_in;
														
 
															+    int32_t samp_out_wrapped;
														
 
															+    GetIndexes(samp_out, &first_samp_in, &samp_out_wrapped);
														
 
															+    const std::vector<float> &weights = weights_[samp_out_wrapped];
														
 
															+    // first_input_index is the first index into "input" that we have a weight
														
 
															+    // for.
														
 
															+    int32_t first_input_index =
														
 
															+        static_cast<int32_t>(first_samp_in - input_sample_offset_);
														
 
															+    float this_output;
														
 
															+    if (first_input_index >= 0 &&
														
 
															+        first_input_index + static_cast<int32_t>(weights.size()) <= input_dim) {
														
 
															+      this_output =
														
 
															+          DotProduct(input + first_input_index, weights.data(), weights.size());
														
 
															+    } else {  // Handle edge cases.
														
 
															+      this_output = 0.0;
														
 
															+      for (int32_t i = 0; i < static_cast<int32_t>(weights.size()); i++) {
														
 
															+        float weight = weights[i];
														
 
															+        int32_t input_index = first_input_index + i;
														
 
															+        if (input_index < 0 &&
														
 
															+            static_cast<int32_t>(input_remainder_.size()) + input_index >= 0) {
														
 
															+          this_output +=
														
 
															+              weight * input_remainder_[input_remainder_.size() + input_index];
														
 
															+        } else if (input_index >= 0 && input_index < input_dim) {
														
 
															+          this_output += weight * input[input_index];
														
 
															+        } else if (input_index >= input_dim) {
														
 
															+          // We're past the end of the input and are adding zero; should only
														
 
															+          // happen if the user specified flush == true, or else we would not
														
 
															+          // be trying to output this sample.
														
 
															+          assert(flush);
														
 
															+        }
														
 
															+      }
														
 
															+    }
														
 
															+    int32_t output_index =
														
 
															+        static_cast<int32_t>(samp_out - output_sample_offset_);
														
 
															+    (*output)[output_index] = this_output;
														
 
															+  }
														
 
															+
														
 
															+  if (flush) {
														
 
															+    Reset();  // Reset the internal state.
														
 
															+  } else {
														
 
															+    SetRemainder(input, input_dim);
														
 
															+    input_sample_offset_ = tot_input_samp;
														
 
															+    output_sample_offset_ = tot_output_samp;
														
 
															+  }
														
 
															+}
														
 
															+
														
 
															+int64_t LinearResample::GetNumOutputSamples(int64_t input_num_samp,
														
 
															+                                            bool flush) const {
														
 
															+  // For exact computation, we measure time in "ticks" of 1.0 / tick_freq,
														
 
															+  // where tick_freq is the least common multiple of samp_rate_in_ and
														
 
															+  // samp_rate_out_.
														
 
															+  int32_t tick_freq = Lcm(samp_rate_in_, samp_rate_out_);
														
 
															+  int32_t ticks_per_input_period = tick_freq / samp_rate_in_;
														
 
															+
														
 
															+  // work out the number of ticks in the time interval
														
 
															+  // [ 0, input_num_samp/samp_rate_in_ ).
														
 
															+  int64_t interval_length_in_ticks = input_num_samp * ticks_per_input_period;
														
 
															+  if (!flush) {
														
 
															+    float window_width = num_zeros_ / (2.0 * filter_cutoff_);
														
 
															+    // To count the window-width in ticks we take the floor.  This
														
 
															+    // is because since we're looking for the largest integer num-out-samp
														
 
															+    // that fits in the interval, which is open on the right, a reduction
														
 
															+    // in interval length of less than a tick will never make a difference.
														
 
															+    // For example, the largest integer in the interval [ 0, 2 ) and the
														
 
															+    // largest integer in the interval [ 0, 2 - 0.9 ) are the same (both one).
														
 
															+    // So when we're subtracting the window-width we can ignore the fractional
														
 
															+    // part.
														
 
															+    int32_t window_width_ticks = floor(window_width * tick_freq);
														
 
															+    // The time-period of the output that we can sample gets reduced
														
 
															+    // by the window-width (which is actually the distance from the
														
 
															+    // center to the edge of the windowing function) if we're not
														
 
															+    // "flushing the output".
														
 
															+    interval_length_in_ticks -= window_width_ticks;
														
 
															+  }
														
 
															+  if (interval_length_in_ticks <= 0) return 0;
														
 
															+
														
 
															+  int32_t ticks_per_output_period = tick_freq / samp_rate_out_;
														
 
															+  // Get the last output-sample in the closed interval, i.e. replacing [ ) with
														
 
															+  // [ ].  Note: integer division rounds down.  See
														
 
															+  // http://en.wikipedia.org/wiki/Interval_(mathematics) for an explanation of
														
 
															+  // the notation.
														
 
															+  int64_t last_output_samp = interval_length_in_ticks / ticks_per_output_period;
														
 
															+  // We need the last output-sample in the open interval, so if it takes us to
														
 
															+  // the end of the interval exactly, subtract one.
														
 
															+  if (last_output_samp * ticks_per_output_period == interval_length_in_ticks)
														
 
															+    last_output_samp--;
														
 
															+
														
 
															+  // First output-sample index is zero, so the number of output samples
														
 
															+  // is the last output-sample plus one.
														
 
															+  int64_t num_output_samp = last_output_samp + 1;
														
 
															+  return num_output_samp;
														
 
															+}
														
 
															+
														
 
															+// inline
														
 
															+void LinearResample::GetIndexes(int64_t samp_out, int64_t *first_samp_in,
														
 
															+                                int32_t *samp_out_wrapped) const {
														
 
															+  // A unit is the smallest nonzero amount of time that is an exact
														
 
															+  // multiple of the input and output sample periods.  The unit index
														
 
															+  // is the answer to "which numbered unit we are in".
														
 
															+  int64_t unit_index = samp_out / output_samples_in_unit_;
														
 
															+  // samp_out_wrapped is equal to samp_out % output_samples_in_unit_
														
 
															+  *samp_out_wrapped =
														
 
															+      static_cast<int32_t>(samp_out - unit_index * output_samples_in_unit_);
														
 
															+  *first_samp_in =
														
 
															+      first_index_[*samp_out_wrapped] + unit_index * input_samples_in_unit_;
														
 
															+}
														
 
															+
														
 
															+void LinearResample::SetRemainder(const float *input, int32_t input_dim) {
														
 
															+  std::vector<float> old_remainder(input_remainder_);
														
 
															+  // max_remainder_needed is the width of the filter from side to side,
														
 
															+  // measured in input samples.  you might think it should be half that,
														
 
															+  // but you have to consider that you might be wanting to output samples
														
 
															+  // that are "in the past" relative to the beginning of the latest
														
 
															+  // input... anyway, storing more remainder than needed is not harmful.
														
 
															+  int32_t max_remainder_needed =
														
 
															+      ceil(samp_rate_in_ * num_zeros_ / filter_cutoff_);
														
 
															+  input_remainder_.resize(max_remainder_needed);
														
 
															+  for (int32_t index = -static_cast<int32_t>(input_remainder_.size());
														
 
															+       index < 0; index++) {
														
 
															+    // we interpret "index" as an offset from the end of "input" and
														
 
															+    // from the end of input_remainder_.
														
 
															+    int32_t input_index = index + input_dim;
														
 
															+    if (input_index >= 0) {
														
 
															+      input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
														
 
															+          input[input_index];
														
 
															+    } else if (input_index + static_cast<int32_t>(old_remainder.size()) >= 0) {
														
 
															+      input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
														
 
															+          old_remainder[input_index +
														
 
															+                        static_cast<int32_t>(old_remainder.size())];
														
 
															+      // else leave it at zero.
														
 
															+    }
														
 
															+  }
														
 
															+}
														
--- a/funasr/runtime/onnxruntime/src/resample.h
+++ b/funasr/runtime/onnxruntime/src/resample.h
@@ -0,0 +1,137 @@
 
															+/**
														
 
															+ * Copyright     2013  Pegah Ghahremani
														
 
															+ *               2014  IMSL, PKU-HKUST (author: Wei Shi)
														
 
															+ *               2014  Yanqing Sun, Junjie Wang
														
 
															+ *               2014  Johns Hopkins University (author: Daniel Povey)
														
 
															+ * Copyright     2023  Xiaomi Corporation (authors: Fangjun Kuang)
														
 
															+ *
														
 
															+ * See LICENSE for clarification regarding multiple authors
														
 
															+ *
														
 
															+ * Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+ * you may not use this file except in compliance with the License.
														
 
															+ * You may obtain a copy of the License at
														
 
															+ *
														
 
															+ *     http://www.apache.org/licenses/LICENSE-2.0
														
 
															+ *
														
 
															+ * Unless required by applicable law or agreed to in writing, software
														
 
															+ * distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+ * See the License for the specific language governing permissions and
														
 
															+ * limitations under the License.
														
 
															+ */
														
 
															+// this file is copied and modified from
														
 
															+// kaldi/src/feat/resample.h
														
 
															+
														
 
															+#include <cstdint>
														
 
															+#include <vector>
														
 
															+
														
 
															+
														
 
															+/*
														
 
															+   We require that the input and output sampling rate be specified as
														
 
															+   integers, as this is an easy way to specify that their ratio be rational.
														
 
															+*/
														
 
															+
														
 
															+class LinearResample {
														
 
															+ public:
														
 
															+  /// Constructor.  We make the input and output sample rates integers, because
														
 
															+  /// we are going to need to find a common divisor.  This should just remind
														
 
															+  /// you that they need to be integers.  The filter cutoff needs to be less
														
 
															+  /// than samp_rate_in_hz/2 and less than samp_rate_out_hz/2.  num_zeros
														
 
															+  /// controls the sharpness of the filter, more == sharper but less efficient.
														
 
															+  /// We suggest around 4 to 10 for normal use.
														
 
															+  LinearResample(int32_t samp_rate_in_hz, int32_t samp_rate_out_hz,
														
 
															+                 float filter_cutoff_hz, int32_t num_zeros);
														
 
															+
														
 
															+  /// Calling the function Reset() resets the state of the object prior to
														
 
															+  /// processing a new signal; it is only necessary if you have called
														
 
															+  /// Resample(x, x_size, false, y) for some signal, leading to a remainder of
														
 
															+  /// the signal being called, but then abandon processing the signal before
														
 
															+  /// calling Resample(x, x_size, true, y) for the last piece.  Call it
														
 
															+  /// unnecessarily between signals will not do any harm.
														
 
															+  void Reset();
														
 
															+
														
 
															+  /// This function does the resampling.  If you call it with flush == true and
														
 
															+  /// you have never called it with flush == false, it just resamples the input
														
 
															+  /// signal (it resizes the output to a suitable number of samples).
														
 
															+  ///
														
 
															+  /// You can also use this function to process a signal a piece at a time.
														
 
															+  /// suppose you break it into piece1, piece2, ... pieceN.  You can call
														
 
															+  /// \code{.cc}
														
 
															+  /// Resample(piece1, piece1_size, false, &output1);
														
 
															+  /// Resample(piece2, piece2_size, false, &output2);
														
 
															+  /// Resample(piece3, piece3_size, true, &output3);
														
 
															+  /// \endcode
														
 
															+  /// If you call it with flush == false, it won't output the last few samples
														
 
															+  /// but will remember them, so that if you later give it a second piece of
														
 
															+  /// the input signal it can process it correctly.
														
 
															+  /// If your most recent call to the object was with flush == false, it will
														
 
															+  /// have internal state; you can remove this by calling Reset().
														
 
															+  /// Empty input is acceptable.
														
 
															+  void Resample(const float *input, int32_t input_dim, bool flush,
														
 
															+                std::vector<float> *output);
														
 
															+
														
 
															+  //// Return the input and output sampling rates (for checks, for example)
														
 
															+  int32_t GetInputSamplingRate() const { return samp_rate_in_; }
														
 
															+  int32_t GetOutputSamplingRate() const { return samp_rate_out_; }
														
 
															+
														
 
															+ private:
														
 
															+  void SetIndexesAndWeights();
														
 
															+
														
 
															+  float FilterFunc(float) const;
														
 
															+
														
 
															+  /// This function outputs the number of output samples we will output
														
 
															+  /// for a signal with "input_num_samp" input samples.  If flush == true,
														
 
															+  /// we return the largest n such that
														
 
															+  /// (n/samp_rate_out_) is in the interval [ 0, input_num_samp/samp_rate_in_ ),
														
 
															+  /// and note that the interval is half-open.  If flush == false,
														
 
															+  /// define window_width as num_zeros / (2.0 * filter_cutoff_);
														
 
															+  /// we return the largest n such that (n/samp_rate_out_) is in the interval
														
 
															+  /// [ 0, input_num_samp/samp_rate_in_ - window_width ).
														
 
															+  int64_t GetNumOutputSamples(int64_t input_num_samp, bool flush) const;
														
 
															+
														
 
															+  /// Given an output-sample index, this function outputs to *first_samp_in the
														
 
															+  /// first input-sample index that we have a weight on (may be negative),
														
 
															+  /// and to *samp_out_wrapped the index into weights_ where we can get the
														
 
															+  /// corresponding weights on the input.
														
 
															+  inline void GetIndexes(int64_t samp_out, int64_t *first_samp_in,
														
 
															+                         int32_t *samp_out_wrapped) const;
														
 
															+
														
 
															+  void SetRemainder(const float *input, int32_t input_dim);
														
 
															+
														
 
															+ private:
														
 
															+  // The following variables are provided by the user.
														
 
															+  int32_t samp_rate_in_;
														
 
															+  int32_t samp_rate_out_;
														
 
															+  float filter_cutoff_;
														
 
															+  int32_t num_zeros_;
														
 
															+
														
 
															+  int32_t input_samples_in_unit_;  ///< The number of input samples in the
														
 
															+                                   ///< smallest repeating unit: num_samp_in_ =
														
 
															+                                   ///< samp_rate_in_hz / Gcd(samp_rate_in_hz,
														
 
															+                                   ///< samp_rate_out_hz)
														
 
															+
														
 
															+  int32_t output_samples_in_unit_;  ///< The number of output samples in the
														
 
															+                                    ///< smallest repeating unit: num_samp_out_
														
 
															+                                    ///< = samp_rate_out_hz /
														
 
															+                                    ///< Gcd(samp_rate_in_hz, samp_rate_out_hz)
														
 
															+
														
 
															+  /// The first input-sample index that we sum over, for this output-sample
														
 
															+  /// index.  May be negative; any truncation at the beginning is handled
														
 
															+  /// separately.  This is just for the first few output samples, but we can
														
 
															+  /// extrapolate the correct input-sample index for arbitrary output samples.
														
 
															+  std::vector<int32_t> first_index_;
														
 
															+
														
 
															+  /// Weights on the input samples, for this output-sample index.
														
 
															+  std::vector<std::vector<float>> weights_;
														
 
															+
														
 
															+  // the following variables keep track of where we are in a particular signal,
														
 
															+  // if it is being provided over multiple calls to Resample().
														
 
															+
														
 
															+  int64_t input_sample_offset_;   ///< The number of input samples we have
														
 
															+                                  ///< already received for this signal
														
 
															+                                  ///< (including anything in remainder_)
														
 
															+  int64_t output_sample_offset_;  ///< The number of samples we have already
														
 
															+                                  ///< output for this signal.
														
 
															+  std::vector<float> input_remainder_;  ///< A small trailing part of the
														
 
															+                                        ///< previously seen input signal.
														
 
															+};