funasrruntime.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805
  1. #include "precomp.h"
  2. #include <vector>
  3. // APIs for Init
  4. _FUNASRAPI FUNASR_HANDLE FunASRInit(std::map<std::string, std::string>& model_path, int thread_num, ASR_TYPE type)
  5. {
  6. funasr::Model* mm = funasr::CreateModel(model_path, thread_num, type);
  7. return mm;
  8. }
  9. _FUNASRAPI FUNASR_HANDLE FunASROnlineInit(FUNASR_HANDLE asr_hanlde, std::vector<int> chunk_size)
  10. {
  11. funasr::Model* mm = funasr::CreateModel(asr_hanlde, chunk_size);
  12. return mm;
  13. }
  14. _FUNASRAPI FUNASR_HANDLE FsmnVadInit(std::map<std::string, std::string>& model_path, int thread_num)
  15. {
  16. funasr::VadModel* mm = funasr::CreateVadModel(model_path, thread_num);
  17. return mm;
  18. }
  19. _FUNASRAPI FUNASR_HANDLE FsmnVadOnlineInit(FUNASR_HANDLE fsmnvad_handle)
  20. {
  21. funasr::VadModel* mm = funasr::CreateVadModel(fsmnvad_handle);
  22. return mm;
  23. }
  24. _FUNASRAPI FUNASR_HANDLE CTTransformerInit(std::map<std::string, std::string>& model_path, int thread_num, PUNC_TYPE type)
  25. {
  26. funasr::PuncModel* mm = funasr::CreatePuncModel(model_path, thread_num, type);
  27. return mm;
  28. }
  29. _FUNASRAPI FUNASR_HANDLE FunOfflineInit(std::map<std::string, std::string>& model_path, int thread_num)
  30. {
  31. funasr::OfflineStream* mm = funasr::CreateOfflineStream(model_path, thread_num);
  32. return mm;
  33. }
  34. _FUNASRAPI FUNASR_HANDLE FunTpassInit(std::map<std::string, std::string>& model_path, int thread_num)
  35. {
  36. funasr::TpassStream* mm = funasr::CreateTpassStream(model_path, thread_num);
  37. return mm;
  38. }
  39. _FUNASRAPI FUNASR_HANDLE FunTpassOnlineInit(FUNASR_HANDLE tpass_handle, std::vector<int> chunk_size)
  40. {
  41. return funasr::CreateTpassOnlineStream(tpass_handle, chunk_size);
  42. }
  43. // APIs for ASR Infer
  44. _FUNASRAPI FUNASR_RESULT FunASRInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, FUNASR_MODE mode, QM_CALLBACK fn_callback, bool input_finished, int sampling_rate, std::string wav_format)
  45. {
  46. funasr::Model* recog_obj = (funasr::Model*)handle;
  47. if (!recog_obj)
  48. return nullptr;
  49. funasr::Audio audio(recog_obj->GetAsrSampleRate(),1);
  50. if(wav_format == "pcm" || wav_format == "PCM"){
  51. if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
  52. return nullptr;
  53. }else{
  54. if (!audio.FfmpegLoad(sz_buf, n_len))
  55. return nullptr;
  56. }
  57. float* buff;
  58. int len;
  59. int flag = 0;
  60. funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
  61. p_result->snippet_time = audio.GetTimeLen();
  62. if(p_result->snippet_time == 0){
  63. return p_result;
  64. }
  65. int n_step = 0;
  66. int n_total = audio.GetQueueSize();
  67. while (audio.Fetch(buff, len, flag) > 0) {
  68. string msg = recog_obj->Forward(buff, len, input_finished);
  69. p_result->msg += msg;
  70. n_step++;
  71. if (fn_callback)
  72. fn_callback(n_step, n_total);
  73. }
  74. return p_result;
  75. }
  76. _FUNASRAPI FUNASR_RESULT FunASRInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback, int sampling_rate)
  77. {
  78. funasr::Model* recog_obj = (funasr::Model*)handle;
  79. if (!recog_obj)
  80. return nullptr;
  81. funasr::Audio audio(recog_obj->GetAsrSampleRate(),1);
  82. if(funasr::is_target_file(sz_filename, "wav")){
  83. int32_t sampling_rate_ = -1;
  84. if(!audio.LoadWav(sz_filename, &sampling_rate_))
  85. return nullptr;
  86. }else if(funasr::is_target_file(sz_filename, "pcm")){
  87. if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
  88. return nullptr;
  89. }else{
  90. if (!audio.FfmpegLoad(sz_filename))
  91. return nullptr;
  92. }
  93. float* buff;
  94. int len;
  95. int flag = 0;
  96. int n_step = 0;
  97. int n_total = audio.GetQueueSize();
  98. funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
  99. p_result->snippet_time = audio.GetTimeLen();
  100. if(p_result->snippet_time == 0){
  101. return p_result;
  102. }
  103. while (audio.Fetch(buff, len, flag) > 0) {
  104. string msg = recog_obj->Forward(buff, len, true);
  105. p_result->msg += msg;
  106. n_step++;
  107. if (fn_callback)
  108. fn_callback(n_step, n_total);
  109. }
  110. return p_result;
  111. }
  112. // APIs for VAD Infer
  113. _FUNASRAPI FUNASR_RESULT FsmnVadInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len, QM_CALLBACK fn_callback, bool input_finished, int sampling_rate, std::string wav_format)
  114. {
  115. funasr::VadModel* vad_obj = (funasr::VadModel*)handle;
  116. if (!vad_obj)
  117. return nullptr;
  118. funasr::Audio audio(vad_obj->GetVadSampleRate(),1);
  119. if(wav_format == "pcm" || wav_format == "PCM"){
  120. if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
  121. return nullptr;
  122. }else{
  123. if (!audio.FfmpegLoad(sz_buf, n_len))
  124. return nullptr;
  125. }
  126. funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT;
  127. p_result->snippet_time = audio.GetTimeLen();
  128. if(p_result->snippet_time == 0){
  129. p_result->segments = new vector<std::vector<int>>();
  130. return p_result;
  131. }
  132. vector<std::vector<int>> vad_segments;
  133. audio.Split(vad_obj, vad_segments, input_finished);
  134. p_result->segments = new vector<std::vector<int>>(vad_segments);
  135. return p_result;
  136. }
  137. _FUNASRAPI FUNASR_RESULT FsmnVadInfer(FUNASR_HANDLE handle, const char* sz_filename, QM_CALLBACK fn_callback, int sampling_rate)
  138. {
  139. funasr::VadModel* vad_obj = (funasr::VadModel*)handle;
  140. if (!vad_obj)
  141. return nullptr;
  142. funasr::Audio audio(vad_obj->GetVadSampleRate(),1);
  143. if(funasr::is_target_file(sz_filename, "wav")){
  144. int32_t sampling_rate_ = -1;
  145. if(!audio.LoadWav(sz_filename, &sampling_rate_))
  146. return nullptr;
  147. }else if(funasr::is_target_file(sz_filename, "pcm")){
  148. if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
  149. return nullptr;
  150. }else{
  151. if (!audio.FfmpegLoad(sz_filename))
  152. return nullptr;
  153. }
  154. funasr::FUNASR_VAD_RESULT* p_result = new funasr::FUNASR_VAD_RESULT;
  155. p_result->snippet_time = audio.GetTimeLen();
  156. if(p_result->snippet_time == 0){
  157. p_result->segments = new vector<std::vector<int>>();
  158. return p_result;
  159. }
  160. vector<std::vector<int>> vad_segments;
  161. audio.Split(vad_obj, vad_segments, true);
  162. p_result->segments = new vector<std::vector<int>>(vad_segments);
  163. return p_result;
  164. }
  165. // APIs for PUNC Infer
  166. _FUNASRAPI FUNASR_RESULT CTTransformerInfer(FUNASR_HANDLE handle, const char* sz_sentence, FUNASR_MODE mode, QM_CALLBACK fn_callback, PUNC_TYPE type, FUNASR_RESULT pre_result)
  167. {
  168. funasr::PuncModel* punc_obj = (funasr::PuncModel*)handle;
  169. if (!punc_obj)
  170. return nullptr;
  171. FUNASR_RESULT p_result = nullptr;
  172. if (type==PUNC_OFFLINE){
  173. p_result = (FUNASR_RESULT)new funasr::FUNASR_PUNC_RESULT;
  174. ((funasr::FUNASR_PUNC_RESULT*)p_result)->msg = punc_obj->AddPunc(sz_sentence);
  175. }else if(type==PUNC_ONLINE){
  176. if (!pre_result)
  177. p_result = (FUNASR_RESULT)new funasr::FUNASR_PUNC_RESULT;
  178. else
  179. p_result = pre_result;
  180. ((funasr::FUNASR_PUNC_RESULT*)p_result)->msg = punc_obj->AddPunc(sz_sentence, ((funasr::FUNASR_PUNC_RESULT*)p_result)->arr_cache);
  181. }else{
  182. LOG(ERROR) << "Wrong PUNC_TYPE";
  183. exit(-1);
  184. }
  185. return p_result;
  186. }
  187. // APIs for Offline-stream Infer
  188. _FUNASRAPI FUNASR_RESULT FunOfflineInferBuffer(FUNASR_HANDLE handle, const char* sz_buf, int n_len,
  189. FUNASR_MODE mode, QM_CALLBACK fn_callback, const std::vector<std::vector<float>> &hw_emb,
  190. int sampling_rate, std::string wav_format, bool itn, FUNASR_DEC_HANDLE dec_handle)
  191. {
  192. funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
  193. if (!offline_stream)
  194. return nullptr;
  195. funasr::Audio audio(offline_stream->asr_handle->GetAsrSampleRate(),1);
  196. try{
  197. if(wav_format == "pcm" || wav_format == "PCM"){
  198. if (!audio.LoadPcmwav(sz_buf, n_len, &sampling_rate))
  199. return nullptr;
  200. }else{
  201. if (!audio.FfmpegLoad(sz_buf, n_len))
  202. return nullptr;
  203. }
  204. }catch (std::exception const &e)
  205. {
  206. LOG(ERROR)<<e.what();
  207. return nullptr;
  208. }
  209. funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
  210. p_result->snippet_time = audio.GetTimeLen();
  211. if(p_result->snippet_time == 0){
  212. return p_result;
  213. }
  214. if(offline_stream->UseVad()){
  215. audio.CutSplit(offline_stream);
  216. }
  217. float* buff;
  218. int len;
  219. int flag = 0;
  220. int n_step = 0;
  221. int n_total = audio.GetQueueSize();
  222. float start_time = 0.0;
  223. std::string cur_stamp = "[";
  224. std::string lang = (offline_stream->asr_handle)->GetLang();
  225. while (audio.Fetch(buff, len, flag, start_time) > 0) {
  226. // dec reset
  227. funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)dec_handle;
  228. if (wfst_decoder){
  229. wfst_decoder->StartUtterance();
  230. }
  231. string msg = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle);
  232. std::vector<std::string> msg_vec = funasr::split(msg, '|');
  233. if(msg_vec.size()==0){
  234. continue;
  235. }
  236. if(lang == "en-bpe" && p_result->msg != ""){
  237. p_result->msg += " ";
  238. }
  239. p_result->msg += msg_vec[0];
  240. //timestamp
  241. if(msg_vec.size() > 1){
  242. std::vector<std::string> msg_stamp = funasr::split(msg_vec[1], ',');
  243. for(int i=0; i<msg_stamp.size()-1; i+=2){
  244. float begin = std::stof(msg_stamp[i])+start_time;
  245. float end = std::stof(msg_stamp[i+1])+start_time;
  246. cur_stamp += "["+std::to_string((int)(1000*begin))+","+std::to_string((int)(1000*end))+"],";
  247. }
  248. }
  249. n_step++;
  250. if (fn_callback)
  251. fn_callback(n_step, n_total);
  252. }
  253. if(cur_stamp != "["){
  254. cur_stamp.erase(cur_stamp.length() - 1);
  255. p_result->stamp += cur_stamp + "]";
  256. }
  257. if(offline_stream->UsePunc()){
  258. string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str(), lang);
  259. p_result->msg = punc_res;
  260. }
  261. #if !defined(__APPLE__)
  262. if(offline_stream->UseITN() && itn){
  263. string msg_itn = offline_stream->itn_handle->Normalize(p_result->msg);
  264. if(!(p_result->stamp).empty()){
  265. std::string new_stamp = funasr::TimestampSmooth(p_result->msg, msg_itn, p_result->stamp);
  266. if(!new_stamp.empty()){
  267. p_result->stamp = new_stamp;
  268. }
  269. }
  270. p_result->msg = msg_itn;
  271. }
  272. #endif
  273. if (!(p_result->stamp).empty()){
  274. p_result->stamp_sents = funasr::TimestampSentence(p_result->msg, p_result->stamp);
  275. }
  276. return p_result;
  277. }
  278. _FUNASRAPI FUNASR_RESULT FunOfflineInfer(FUNASR_HANDLE handle, const char* sz_filename, FUNASR_MODE mode, QM_CALLBACK fn_callback,
  279. const std::vector<std::vector<float>> &hw_emb, int sampling_rate, bool itn, FUNASR_DEC_HANDLE dec_handle)
  280. {
  281. funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
  282. if (!offline_stream)
  283. return nullptr;
  284. funasr::Audio audio((offline_stream->asr_handle)->GetAsrSampleRate(),1);
  285. try{
  286. if(funasr::is_target_file(sz_filename, "wav")){
  287. int32_t sampling_rate_ = -1;
  288. if(!audio.LoadWav(sz_filename, &sampling_rate_))
  289. return nullptr;
  290. }else if(funasr::is_target_file(sz_filename, "pcm")){
  291. if (!audio.LoadPcmwav(sz_filename, &sampling_rate))
  292. return nullptr;
  293. }else{
  294. if (!audio.FfmpegLoad(sz_filename))
  295. return nullptr;
  296. }
  297. }catch (std::exception const &e)
  298. {
  299. LOG(ERROR)<<e.what();
  300. return nullptr;
  301. }
  302. funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
  303. p_result->snippet_time = audio.GetTimeLen();
  304. if(p_result->snippet_time == 0){
  305. return p_result;
  306. }
  307. if(offline_stream->UseVad()){
  308. audio.Split(offline_stream);
  309. }
  310. float* buff;
  311. int len;
  312. int flag = 0;
  313. int n_step = 0;
  314. int n_total = audio.GetQueueSize();
  315. float start_time = 0.0;
  316. std::string cur_stamp = "[";
  317. std::string lang = (offline_stream->asr_handle)->GetLang();
  318. while (audio.Fetch(buff, len, flag, start_time) > 0) {
  319. // dec reset
  320. funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)dec_handle;
  321. if (wfst_decoder){
  322. wfst_decoder->StartUtterance();
  323. }
  324. string msg = (offline_stream->asr_handle)->Forward(buff, len, true, hw_emb, dec_handle);
  325. std::vector<std::string> msg_vec = funasr::split(msg, '|');
  326. if(msg_vec.size()==0){
  327. continue;
  328. }
  329. if(lang == "en-bpe" && p_result->msg != ""){
  330. p_result->msg += " ";
  331. }
  332. p_result->msg += msg_vec[0];
  333. //timestamp
  334. if(msg_vec.size() > 1){
  335. std::vector<std::string> msg_stamp = funasr::split(msg_vec[1], ',');
  336. for(int i=0; i<msg_stamp.size()-1; i+=2){
  337. float begin = std::stof(msg_stamp[i])+start_time;
  338. float end = std::stof(msg_stamp[i+1])+start_time;
  339. cur_stamp += "["+std::to_string((int)(1000*begin))+","+std::to_string((int)(1000*end))+"],";
  340. }
  341. }
  342. n_step++;
  343. if (fn_callback)
  344. fn_callback(n_step, n_total);
  345. }
  346. if(cur_stamp != "["){
  347. cur_stamp.erase(cur_stamp.length() - 1);
  348. p_result->stamp += cur_stamp + "]";
  349. }
  350. if(offline_stream->UsePunc()){
  351. string punc_res = (offline_stream->punc_handle)->AddPunc((p_result->msg).c_str(), lang);
  352. p_result->msg = punc_res;
  353. }
  354. #if !defined(__APPLE__)
  355. if(offline_stream->UseITN() && itn){
  356. string msg_itn = offline_stream->itn_handle->Normalize(p_result->msg);
  357. if(!(p_result->stamp).empty()){
  358. std::string new_stamp = funasr::TimestampSmooth(p_result->msg, msg_itn, p_result->stamp);
  359. if(!new_stamp.empty()){
  360. p_result->stamp = new_stamp;
  361. }
  362. }
  363. p_result->msg = msg_itn;
  364. }
  365. #endif
  366. if (!(p_result->stamp).empty()){
  367. p_result->stamp_sents = funasr::TimestampSentence(p_result->msg, p_result->stamp);
  368. }
  369. return p_result;
  370. }
  371. #if !defined(__APPLE__)
  372. _FUNASRAPI const std::vector<std::vector<float>> CompileHotwordEmbedding(FUNASR_HANDLE handle, std::string &hotwords, ASR_TYPE mode)
  373. {
  374. if (mode == ASR_OFFLINE){
  375. funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
  376. std::vector<std::vector<float>> emb;
  377. if (!offline_stream)
  378. return emb;
  379. return (offline_stream->asr_handle)->CompileHotwordEmbedding(hotwords);
  380. }
  381. else if (mode == ASR_TWO_PASS){
  382. funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
  383. std::vector<std::vector<float>> emb;
  384. if (!tpass_stream)
  385. return emb;
  386. return (tpass_stream->asr_handle)->CompileHotwordEmbedding(hotwords);
  387. }
  388. else{
  389. LOG(ERROR) << "Not implement: Online model does not support Hotword yet!";
  390. std::vector<std::vector<float>> emb;
  391. return emb;
  392. }
  393. }
  394. #endif
  395. // APIs for 2pass-stream Infer
  396. _FUNASRAPI FUNASR_RESULT FunTpassInferBuffer(FUNASR_HANDLE handle, FUNASR_HANDLE online_handle, const char* sz_buf,
  397. int n_len, std::vector<std::vector<std::string>> &punc_cache, bool input_finished,
  398. int sampling_rate, std::string wav_format, ASR_TYPE mode,
  399. const std::vector<std::vector<float>> &hw_emb, bool itn, FUNASR_DEC_HANDLE dec_handle)
  400. {
  401. funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
  402. funasr::TpassOnlineStream* tpass_online_stream = (funasr::TpassOnlineStream*)online_handle;
  403. if (!tpass_stream || !tpass_online_stream)
  404. return nullptr;
  405. funasr::VadModel* vad_online_handle = (tpass_online_stream->vad_online_handle).get();
  406. if (!vad_online_handle)
  407. return nullptr;
  408. funasr::Audio* audio = ((funasr::FsmnVadOnline*)vad_online_handle)->audio_handle.get();
  409. funasr::Model* asr_online_handle = (tpass_online_stream->asr_online_handle).get();
  410. if (!asr_online_handle)
  411. return nullptr;
  412. int chunk_len = ((funasr::ParaformerOnline*)asr_online_handle)->chunk_len;
  413. funasr::Model* asr_handle = (tpass_stream->asr_handle).get();
  414. if (!asr_handle)
  415. return nullptr;
  416. funasr::PuncModel* punc_online_handle = (tpass_stream->punc_online_handle).get();
  417. if (!punc_online_handle)
  418. return nullptr;
  419. if(wav_format == "pcm" || wav_format == "PCM"){
  420. if (!audio->LoadPcmwavOnline(sz_buf, n_len, &sampling_rate))
  421. return nullptr;
  422. }else{
  423. // if (!audio->FfmpegLoad(sz_buf, n_len))
  424. // return nullptr;
  425. LOG(ERROR) <<"Wrong wav_format: " << wav_format ;
  426. return nullptr;
  427. }
  428. funasr::FUNASR_RECOG_RESULT* p_result = new funasr::FUNASR_RECOG_RESULT;
  429. p_result->snippet_time = audio->GetTimeLen();
  430. audio->Split(vad_online_handle, chunk_len, input_finished, mode);
  431. funasr::AudioFrame* frame = nullptr;
  432. while(audio->FetchChunck(frame) > 0){
  433. string msg = ((funasr::ParaformerOnline*)asr_online_handle)->Forward(frame->data, frame->len, frame->is_final);
  434. if(mode == ASR_ONLINE){
  435. ((funasr::ParaformerOnline*)asr_online_handle)->online_res += msg;
  436. if(frame->is_final){
  437. string online_msg = ((funasr::ParaformerOnline*)asr_online_handle)->online_res;
  438. string msg_punc = punc_online_handle->AddPunc(online_msg.c_str(), punc_cache[0]);
  439. p_result->tpass_msg = msg_punc;
  440. #if !defined(__APPLE__)
  441. // ITN
  442. if(tpass_stream->UseITN() && itn){
  443. string msg_itn = tpass_stream->itn_handle->Normalize(msg_punc);
  444. p_result->tpass_msg = msg_itn;
  445. }
  446. #endif
  447. ((funasr::ParaformerOnline*)asr_online_handle)->online_res = "";
  448. p_result->msg += msg;
  449. }else{
  450. p_result->msg += msg;
  451. }
  452. }else if(mode == ASR_TWO_PASS){
  453. p_result->msg += msg;
  454. }
  455. if(frame != nullptr){
  456. delete frame;
  457. frame = nullptr;
  458. }
  459. }
  460. // timestamp
  461. std::string cur_stamp = "[";
  462. while(audio->FetchTpass(frame) > 0){
  463. // dec reset
  464. funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)dec_handle;
  465. if (wfst_decoder){
  466. wfst_decoder->StartUtterance();
  467. }
  468. string msg = ((funasr::Paraformer*)asr_handle)->Forward(frame->data, frame->len, frame->is_final, hw_emb, dec_handle);
  469. std::vector<std::string> msg_vec = funasr::split(msg, '|'); // split with timestamp
  470. if(msg_vec.size()==0){
  471. continue;
  472. }
  473. msg = msg_vec[0];
  474. //timestamp
  475. if(msg_vec.size() > 1){
  476. std::vector<std::string> msg_stamp = funasr::split(msg_vec[1], ',');
  477. for(int i=0; i<msg_stamp.size()-1; i+=2){
  478. float begin = std::stof(msg_stamp[i]) + float(frame->global_start)/1000.0;
  479. float end = std::stof(msg_stamp[i+1]) + float(frame->global_start)/1000.0;
  480. cur_stamp += "["+std::to_string((int)(1000*begin))+","+std::to_string((int)(1000*end))+"],";
  481. }
  482. }
  483. if(cur_stamp != "["){
  484. cur_stamp.erase(cur_stamp.length() - 1);
  485. p_result->stamp += cur_stamp + "]";
  486. }
  487. string msg_punc = punc_online_handle->AddPunc(msg.c_str(), punc_cache[1]);
  488. if(input_finished){
  489. msg_punc += "。";
  490. }
  491. p_result->tpass_msg = msg_punc;
  492. #if !defined(__APPLE__)
  493. if(tpass_stream->UseITN() && itn){
  494. string msg_itn = tpass_stream->itn_handle->Normalize(msg_punc);
  495. // TimestampSmooth
  496. if(!(p_result->stamp).empty()){
  497. std::string new_stamp = funasr::TimestampSmooth(p_result->tpass_msg, msg_itn, p_result->stamp);
  498. if(!new_stamp.empty()){
  499. p_result->stamp = new_stamp;
  500. }
  501. }
  502. p_result->tpass_msg = msg_itn;
  503. }
  504. #endif
  505. if (!(p_result->stamp).empty()){
  506. p_result->stamp_sents = funasr::TimestampSentence(p_result->tpass_msg, p_result->stamp);
  507. }
  508. if(frame != nullptr){
  509. delete frame;
  510. frame = nullptr;
  511. }
  512. }
  513. if(input_finished){
  514. audio->ResetIndex();
  515. }
  516. return p_result;
  517. }
  518. _FUNASRAPI const int FunASRGetRetNumber(FUNASR_RESULT result)
  519. {
  520. if (!result)
  521. return 0;
  522. return 1;
  523. }
  524. // APIs for GetRetSnippetTime
  525. _FUNASRAPI const float FunASRGetRetSnippetTime(FUNASR_RESULT result)
  526. {
  527. if (!result)
  528. return 0.0f;
  529. return ((funasr::FUNASR_RECOG_RESULT*)result)->snippet_time;
  530. }
  531. _FUNASRAPI const float FsmnVadGetRetSnippetTime(FUNASR_RESULT result)
  532. {
  533. if (!result)
  534. return 0.0f;
  535. return ((funasr::FUNASR_VAD_RESULT*)result)->snippet_time;
  536. }
  537. // APIs for GetResult
  538. _FUNASRAPI const char* FunASRGetResult(FUNASR_RESULT result,int n_index)
  539. {
  540. funasr::FUNASR_RECOG_RESULT * p_result = (funasr::FUNASR_RECOG_RESULT*)result;
  541. if(!p_result)
  542. return nullptr;
  543. return p_result->msg.c_str();
  544. }
  545. _FUNASRAPI const char* FunASRGetStamp(FUNASR_RESULT result)
  546. {
  547. funasr::FUNASR_RECOG_RESULT * p_result = (funasr::FUNASR_RECOG_RESULT*)result;
  548. if(!p_result)
  549. return nullptr;
  550. return p_result->stamp.c_str();
  551. }
  552. _FUNASRAPI const char* FunASRGetStampSents(FUNASR_RESULT result)
  553. {
  554. funasr::FUNASR_RECOG_RESULT * p_result = (funasr::FUNASR_RECOG_RESULT*)result;
  555. if(!p_result)
  556. return nullptr;
  557. return p_result->stamp_sents.c_str();
  558. }
  559. _FUNASRAPI const char* FunASRGetTpassResult(FUNASR_RESULT result,int n_index)
  560. {
  561. funasr::FUNASR_RECOG_RESULT * p_result = (funasr::FUNASR_RECOG_RESULT*)result;
  562. if(!p_result)
  563. return nullptr;
  564. return p_result->tpass_msg.c_str();
  565. }
  566. _FUNASRAPI const char* CTTransformerGetResult(FUNASR_RESULT result,int n_index)
  567. {
  568. funasr::FUNASR_PUNC_RESULT * p_result = (funasr::FUNASR_PUNC_RESULT*)result;
  569. if(!p_result)
  570. return nullptr;
  571. return p_result->msg.c_str();
  572. }
  573. _FUNASRAPI vector<std::vector<int>>* FsmnVadGetResult(FUNASR_RESULT result,int n_index)
  574. {
  575. funasr::FUNASR_VAD_RESULT * p_result = (funasr::FUNASR_VAD_RESULT*)result;
  576. if(!p_result)
  577. return nullptr;
  578. return p_result->segments;
  579. }
  580. // APIs for FreeResult
  581. _FUNASRAPI void FunASRFreeResult(FUNASR_RESULT result)
  582. {
  583. if (result)
  584. {
  585. delete (funasr::FUNASR_RECOG_RESULT*)result;
  586. }
  587. }
  588. _FUNASRAPI void CTTransformerFreeResult(FUNASR_RESULT result)
  589. {
  590. if (result)
  591. {
  592. delete (funasr::FUNASR_PUNC_RESULT*)result;
  593. }
  594. }
  595. _FUNASRAPI void FsmnVadFreeResult(FUNASR_RESULT result)
  596. {
  597. funasr::FUNASR_VAD_RESULT * p_result = (funasr::FUNASR_VAD_RESULT*)result;
  598. if (p_result)
  599. {
  600. if(p_result->segments){
  601. delete p_result->segments;
  602. }
  603. delete p_result;
  604. }
  605. }
  606. // APIs for decoder status reset
  607. _FUNASRAPI void FunASRReset(FUNASR_HANDLE handle, FUNASR_DEC_HANDLE dec_handle)
  608. {
  609. funasr::Model* recog_obj = (funasr::Model*)handle;
  610. recog_obj->StartUtterance();
  611. funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)dec_handle;
  612. if (wfst_decoder)
  613. wfst_decoder->StartUtterance();
  614. }
  615. _FUNASRAPI void FunOfflineReset(FUNASR_HANDLE handle, FUNASR_DEC_HANDLE dec_handle)
  616. {
  617. funasr::OfflineStream* recog_obj = (funasr::OfflineStream*)handle;
  618. recog_obj->asr_handle->StartUtterance();
  619. funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)dec_handle;
  620. if (wfst_decoder)
  621. wfst_decoder->StartUtterance();
  622. }
  623. // APIs for Uninit
  624. _FUNASRAPI void FunASRUninit(FUNASR_HANDLE handle)
  625. {
  626. funasr::Model* recog_obj = (funasr::Model*)handle;
  627. if (!recog_obj)
  628. return;
  629. delete recog_obj;
  630. }
  631. _FUNASRAPI void FsmnVadUninit(FUNASR_HANDLE handle)
  632. {
  633. funasr::VadModel* recog_obj = (funasr::VadModel*)handle;
  634. if (!recog_obj)
  635. return;
  636. delete recog_obj;
  637. }
  638. _FUNASRAPI void CTTransformerUninit(FUNASR_HANDLE handle)
  639. {
  640. funasr::PuncModel* punc_obj = (funasr::PuncModel*)handle;
  641. if (!punc_obj)
  642. return;
  643. delete punc_obj;
  644. }
  645. _FUNASRAPI void FunOfflineUninit(FUNASR_HANDLE handle)
  646. {
  647. funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
  648. if (!offline_stream)
  649. return;
  650. delete offline_stream;
  651. }
  652. _FUNASRAPI void FunTpassUninit(FUNASR_HANDLE handle)
  653. {
  654. funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
  655. if (!tpass_stream)
  656. return;
  657. delete tpass_stream;
  658. }
  659. _FUNASRAPI void FunTpassOnlineUninit(FUNASR_HANDLE handle)
  660. {
  661. funasr::TpassOnlineStream* tpass_online_stream = (funasr::TpassOnlineStream*)handle;
  662. if (!tpass_online_stream)
  663. return;
  664. delete tpass_online_stream;
  665. }
  666. _FUNASRAPI FUNASR_DEC_HANDLE FunASRWfstDecoderInit(FUNASR_HANDLE handle, int asr_type, float glob_beam, float lat_beam, float am_scale)
  667. {
  668. funasr::WfstDecoder* mm = nullptr;
  669. if (asr_type == ASR_OFFLINE) {
  670. funasr::OfflineStream* offline_stream = (funasr::OfflineStream*)handle;
  671. funasr::Paraformer* paraformer = (funasr::Paraformer*)offline_stream->asr_handle.get();
  672. if (paraformer->lm_)
  673. mm = new funasr::WfstDecoder(paraformer->lm_.get(),
  674. paraformer->GetPhoneSet(), paraformer->GetLmVocab(), glob_beam, lat_beam, am_scale);
  675. } else if (asr_type == ASR_TWO_PASS){
  676. funasr::TpassStream* tpass_stream = (funasr::TpassStream*)handle;
  677. funasr::Paraformer* paraformer = (funasr::Paraformer*)tpass_stream->asr_handle.get();
  678. if (paraformer->lm_)
  679. mm = new funasr::WfstDecoder(paraformer->lm_.get(),
  680. paraformer->GetPhoneSet(), paraformer->GetLmVocab(), glob_beam, lat_beam, am_scale);
  681. }
  682. return mm;
  683. }
  684. _FUNASRAPI void FunASRWfstDecoderUninit(FUNASR_DEC_HANDLE handle)
  685. {
  686. funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)handle;
  687. if (!wfst_decoder)
  688. return;
  689. delete wfst_decoder;
  690. }
  691. _FUNASRAPI void FunWfstDecoderLoadHwsRes(FUNASR_DEC_HANDLE handle, int inc_bias, unordered_map<string, int> &hws_map)
  692. {
  693. funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)handle;
  694. if (!wfst_decoder)
  695. return;
  696. wfst_decoder->LoadHwsRes(inc_bias, hws_map);
  697. }
  698. _FUNASRAPI void FunWfstDecoderUnloadHwsRes(FUNASR_DEC_HANDLE handle)
  699. {
  700. funasr::WfstDecoder* wfst_decoder = (funasr::WfstDecoder*)handle;
  701. if (!wfst_decoder)
  702. return;
  703. wfst_decoder->UnloadHwsRes();
  704. }