websocket-server.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419
  1. /**
  2. * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  3. * Reserved. MIT License (https://opensource.org/licenses/MIT)
  4. */
  5. /* 2022-2023 by zhaomingwork */
  6. // websocket server for asr engine
  7. // take some ideas from https://github.com/k2-fsa/sherpa-onnx
  8. // online-websocket-server-impl.cc, thanks. The websocket server has two threads
  9. // pools, one for handle network data and one for asr decoder.
  10. // now only support offline engine.
  11. #include "websocket-server.h"
  12. #include <thread>
  13. #include <utility>
  14. #include <vector>
  15. extern std::unordered_map<std::string, int> hws_map_;
  16. extern int fst_inc_wts_;
  17. extern float global_beam_, lattice_beam_, am_scale_;
  18. context_ptr WebSocketServer::on_tls_init(tls_mode mode,
  19. websocketpp::connection_hdl hdl,
  20. std::string& s_certfile,
  21. std::string& s_keyfile) {
  22. namespace asio = websocketpp::lib::asio;
  23. LOG(INFO) << "on_tls_init called with hdl: " << hdl.lock().get();
  24. LOG(INFO) << "using TLS mode: "
  25. << (mode == MOZILLA_MODERN ? "Mozilla Modern"
  26. : "Mozilla Intermediate");
  27. context_ptr ctx = websocketpp::lib::make_shared<asio::ssl::context>(
  28. asio::ssl::context::sslv23);
  29. try {
  30. if (mode == MOZILLA_MODERN) {
  31. // Modern disables TLSv1
  32. ctx->set_options(
  33. asio::ssl::context::default_workarounds |
  34. asio::ssl::context::no_sslv2 | asio::ssl::context::no_sslv3 |
  35. asio::ssl::context::no_tlsv1 | asio::ssl::context::single_dh_use);
  36. } else {
  37. ctx->set_options(asio::ssl::context::default_workarounds |
  38. asio::ssl::context::no_sslv2 |
  39. asio::ssl::context::no_sslv3 |
  40. asio::ssl::context::single_dh_use);
  41. }
  42. ctx->use_certificate_chain_file(s_certfile);
  43. ctx->use_private_key_file(s_keyfile, asio::ssl::context::pem);
  44. } catch (std::exception& e) {
  45. LOG(INFO) << "Exception: " << e.what();
  46. }
  47. return ctx;
  48. }
  49. // feed buffer to asr engine for decoder
  50. void WebSocketServer::do_decoder(const std::vector<char>& buffer,
  51. websocketpp::connection_hdl& hdl,
  52. nlohmann::json& msg,
  53. websocketpp::lib::mutex& thread_lock,
  54. std::vector<std::vector<float>> &hotwords_embedding,
  55. std::string wav_name,
  56. bool itn,
  57. int audio_fs,
  58. std::string wav_format,
  59. FUNASR_DEC_HANDLE& decoder_handle) {
  60. try {
  61. int num_samples = buffer.size(); // the size of the buf
  62. if (!buffer.empty() && hotwords_embedding.size() > 0) {
  63. std::string asr_result="";
  64. std::string stamp_res="";
  65. std::string stamp_sents="";
  66. try{
  67. FUNASR_RESULT Result = FunOfflineInferBuffer(
  68. asr_handle, buffer.data(), buffer.size(), RASR_NONE, nullptr,
  69. hotwords_embedding, audio_fs, wav_format, itn, decoder_handle);
  70. if (Result != nullptr){
  71. asr_result = FunASRGetResult(Result, 0); // get decode result
  72. stamp_res = FunASRGetStamp(Result);
  73. stamp_sents = FunASRGetStampSents(Result);
  74. FunASRFreeResult(Result);
  75. } else{
  76. LOG(ERROR) << "FUNASR_RESULT is nullptr.";
  77. }
  78. }catch (std::exception const& e) {
  79. LOG(ERROR) << e.what();
  80. }
  81. websocketpp::lib::error_code ec;
  82. nlohmann::json jsonresult; // result json
  83. jsonresult["text"] = asr_result; // put result in 'text'
  84. jsonresult["mode"] = "offline";
  85. jsonresult["is_final"] = false;
  86. if(stamp_res != ""){
  87. jsonresult["timestamp"] = stamp_res;
  88. }
  89. if(stamp_sents != ""){
  90. try{
  91. nlohmann::json json_stamp = nlohmann::json::parse(stamp_sents);
  92. jsonresult["stamp_sents"] = json_stamp;
  93. }catch (std::exception const &e)
  94. {
  95. LOG(ERROR)<<e.what();
  96. jsonresult["stamp_sents"] = "";
  97. }
  98. }
  99. jsonresult["wav_name"] = wav_name;
  100. // send the json to client
  101. if (is_ssl) {
  102. wss_server_->send(hdl, jsonresult.dump(),
  103. websocketpp::frame::opcode::text, ec);
  104. } else {
  105. server_->send(hdl, jsonresult.dump(), websocketpp::frame::opcode::text,
  106. ec);
  107. }
  108. LOG(INFO) << "buffer.size=" << buffer.size() << ",result json=" << jsonresult.dump();
  109. }else{
  110. LOG(INFO) << "Sent empty msg";
  111. websocketpp::lib::error_code ec;
  112. nlohmann::json jsonresult; // result json
  113. jsonresult["text"] = ""; // put result in 'text'
  114. jsonresult["mode"] = "offline";
  115. jsonresult["is_final"] = false;
  116. jsonresult["wav_name"] = wav_name;
  117. // send the json to client
  118. if (is_ssl) {
  119. wss_server_->send(hdl, jsonresult.dump(),
  120. websocketpp::frame::opcode::text, ec);
  121. } else {
  122. server_->send(hdl, jsonresult.dump(), websocketpp::frame::opcode::text,
  123. ec);
  124. }
  125. }
  126. } catch (std::exception const& e) {
  127. std::cerr << "Error: " << e.what() << std::endl;
  128. }
  129. scoped_lock guard(thread_lock);
  130. msg["access_num"]=(int)msg["access_num"]-1;
  131. }
  132. void WebSocketServer::on_open(websocketpp::connection_hdl hdl) {
  133. scoped_lock guard(m_lock); // for threads safty
  134. std::shared_ptr<FUNASR_MESSAGE> data_msg =
  135. std::make_shared<FUNASR_MESSAGE>(); // put a new data vector for new
  136. // connection
  137. data_msg->samples = std::make_shared<std::vector<char>>();
  138. data_msg->thread_lock = std::make_shared<websocketpp::lib::mutex>();
  139. data_msg->msg = nlohmann::json::parse("{}");
  140. data_msg->msg["wav_format"] = "pcm";
  141. data_msg->msg["wav_name"] = "wav-default-id";
  142. data_msg->msg["itn"] = true;
  143. data_msg->msg["audio_fs"] = 16000; // default is 16k
  144. data_msg->msg["access_num"] = 0; // the number of access for this object, when it is 0, we can free it saftly
  145. data_msg->msg["is_eof"]=false;
  146. FUNASR_DEC_HANDLE decoder_handle =
  147. FunASRWfstDecoderInit(asr_handle, ASR_OFFLINE, global_beam_, lattice_beam_, am_scale_);
  148. data_msg->decoder_handle = decoder_handle;
  149. data_map.emplace(hdl, data_msg);
  150. LOG(INFO) << "on_open, active connections: " << data_map.size();
  151. }
  152. void WebSocketServer::on_close(websocketpp::connection_hdl hdl) {
  153. scoped_lock guard(m_lock);
  154. std::shared_ptr<FUNASR_MESSAGE> data_msg = nullptr;
  155. auto it_data = data_map.find(hdl);
  156. if (it_data != data_map.end()) {
  157. data_msg = it_data->second;
  158. } else {
  159. return;
  160. }
  161. unique_lock guard_decoder(*(data_msg->thread_lock));
  162. data_msg->msg["is_eof"]=true;
  163. guard_decoder.unlock();
  164. LOG(INFO) << "on_close, active connections: " << data_map.size();
  165. }
  166. void remove_hdl(
  167. websocketpp::connection_hdl hdl,
  168. std::map<websocketpp::connection_hdl, std::shared_ptr<FUNASR_MESSAGE>,
  169. std::owner_less<websocketpp::connection_hdl>>& data_map) {
  170. std::shared_ptr<FUNASR_MESSAGE> data_msg = nullptr;
  171. auto it_data = data_map.find(hdl);
  172. if (it_data != data_map.end()) {
  173. data_msg = it_data->second;
  174. } else {
  175. return;
  176. }
  177. unique_lock guard_decoder(*(data_msg->thread_lock));
  178. if (data_msg->msg["access_num"]==0 && data_msg->msg["is_eof"]==true) {
  179. FunWfstDecoderUnloadHwsRes(data_msg->decoder_handle);
  180. FunASRWfstDecoderUninit(data_msg->decoder_handle);
  181. data_msg->decoder_handle = nullptr;
  182. data_map.erase(hdl);
  183. LOG(INFO) << "remove one connection";
  184. }
  185. guard_decoder.unlock();
  186. }
  187. void WebSocketServer::check_and_clean_connection() {
  188. while(true){
  189. std::this_thread::sleep_for(std::chrono::milliseconds(5000));
  190. std::vector<websocketpp::connection_hdl> to_remove; // remove list
  191. auto iter = data_map.begin();
  192. while (iter != data_map.end()) { // loop to find closed connection
  193. websocketpp::connection_hdl hdl = iter->first;
  194. try{
  195. if (is_ssl) {
  196. wss_server::connection_ptr con = wss_server_->get_con_from_hdl(hdl);
  197. if (con->get_state() != 1) { // session::state::open ==1
  198. to_remove.push_back(hdl);
  199. }
  200. } else {
  201. server::connection_ptr con = server_->get_con_from_hdl(hdl);
  202. if (con->get_state() != 1) { // session::state::open ==1
  203. to_remove.push_back(hdl);
  204. }
  205. }
  206. }
  207. catch (std::exception const &e)
  208. {
  209. // if connection is close, we set is_eof = true
  210. std::shared_ptr<FUNASR_MESSAGE> data_msg = nullptr;
  211. auto it_data = data_map.find(hdl);
  212. if (it_data != data_map.end()) {
  213. data_msg = it_data->second;
  214. } else {
  215. continue;
  216. }
  217. unique_lock guard_decoder(*(data_msg->thread_lock));
  218. data_msg->msg["is_eof"]=true;
  219. guard_decoder.unlock();
  220. to_remove.push_back(hdl);
  221. LOG(INFO)<<"connection is closed.";
  222. }
  223. iter++;
  224. }
  225. for (auto hdl : to_remove) {
  226. {
  227. unique_lock lock(m_lock);
  228. remove_hdl(hdl, data_map);
  229. }
  230. }
  231. }
  232. }
  233. void WebSocketServer::on_message(websocketpp::connection_hdl hdl,
  234. message_ptr msg) {
  235. unique_lock lock(m_lock);
  236. // find the sample data vector according to one connection
  237. std::shared_ptr<FUNASR_MESSAGE> msg_data = nullptr;
  238. auto it_data = data_map.find(hdl);
  239. if (it_data != data_map.end()) {
  240. msg_data = it_data->second;
  241. if(msg_data->msg["is_eof"]){
  242. lock.unlock();
  243. return;
  244. }
  245. } else{
  246. lock.unlock();
  247. return;
  248. }
  249. std::shared_ptr<std::vector<char>> sample_data_p = msg_data->samples;
  250. std::shared_ptr<websocketpp::lib::mutex> thread_lock_p = msg_data->thread_lock;
  251. lock.unlock();
  252. if (sample_data_p == nullptr) {
  253. LOG(INFO) << "error when fetch sample data vector";
  254. return;
  255. }
  256. const std::string& payload = msg->get_payload(); // get msg type
  257. unique_lock guard_decoder(*(thread_lock_p)); // mutex for one connection
  258. switch (msg->get_opcode()) {
  259. case websocketpp::frame::opcode::text: {
  260. nlohmann::json jsonresult;
  261. try{
  262. jsonresult = nlohmann::json::parse(payload);
  263. }catch (std::exception const &e)
  264. {
  265. LOG(ERROR)<<e.what();
  266. msg_data->msg["is_eof"]=true;
  267. guard_decoder.unlock();
  268. return;
  269. }
  270. if (jsonresult["wav_name"] != nullptr) {
  271. msg_data->msg["wav_name"] = jsonresult["wav_name"];
  272. }
  273. if (jsonresult["wav_format"] != nullptr) {
  274. msg_data->msg["wav_format"] = jsonresult["wav_format"];
  275. }
  276. // hotwords: fst/nn
  277. if(msg_data->hotwords_embedding == nullptr){
  278. std::unordered_map<std::string, int> merged_hws_map;
  279. std::string nn_hotwords = "";
  280. if (jsonresult["hotwords"] != nullptr) {
  281. std::string json_string = jsonresult["hotwords"];
  282. if (!json_string.empty()){
  283. nlohmann::json json_fst_hws;
  284. try{
  285. json_fst_hws = nlohmann::json::parse(json_string);
  286. if(json_fst_hws.type() == nlohmann::json::value_t::object){
  287. // fst
  288. try{
  289. std::unordered_map<std::string, int> client_hws_map = json_fst_hws;
  290. merged_hws_map.insert(client_hws_map.begin(), client_hws_map.end());
  291. } catch (const std::exception& e) {
  292. LOG(INFO) << e.what();
  293. }
  294. }
  295. } catch (std::exception const &e)
  296. {
  297. LOG(ERROR)<<e.what();
  298. // nn
  299. std::string client_nn_hws = jsonresult["hotwords"];
  300. nn_hotwords += " " + client_nn_hws;
  301. // LOG(INFO) << "nn hotwords: " << client_nn_hws;
  302. }
  303. }
  304. }
  305. merged_hws_map.insert(hws_map_.begin(), hws_map_.end());
  306. // fst
  307. LOG(INFO) << "hotwords: ";
  308. for (const auto& pair : merged_hws_map) {
  309. nn_hotwords += " " + pair.first;
  310. LOG(INFO) << pair.first << " : " << pair.second;
  311. }
  312. FunWfstDecoderLoadHwsRes(msg_data->decoder_handle, fst_inc_wts_, merged_hws_map);
  313. // nn
  314. std::vector<std::vector<float>> new_hotwords_embedding= CompileHotwordEmbedding(asr_handle, nn_hotwords);
  315. msg_data->hotwords_embedding =
  316. std::make_shared<std::vector<std::vector<float>>>(new_hotwords_embedding);
  317. }
  318. if (jsonresult.contains("audio_fs")) {
  319. msg_data->msg["audio_fs"] = jsonresult["audio_fs"];
  320. }
  321. if (jsonresult.contains("itn")) {
  322. msg_data->msg["itn"] = jsonresult["itn"];
  323. }
  324. if ((jsonresult["is_speaking"] == false ||
  325. jsonresult["is_finished"] == true) &&
  326. msg_data->msg["is_eof"] != true &&
  327. msg_data->hotwords_embedding != nullptr) {
  328. LOG(INFO) << "client done";
  329. // for offline, send all receive data to decoder engine
  330. std::vector<std::vector<float>> hotwords_embedding_(*(msg_data->hotwords_embedding));
  331. asio::post(io_decoder_,
  332. std::bind(&WebSocketServer::do_decoder, this,
  333. std::move(*(sample_data_p.get())),
  334. std::move(hdl),
  335. std::ref(msg_data->msg),
  336. std::ref(*thread_lock_p),
  337. std::move(hotwords_embedding_),
  338. msg_data->msg["wav_name"],
  339. msg_data->msg["itn"],
  340. msg_data->msg["audio_fs"],
  341. msg_data->msg["wav_format"],
  342. std::ref(msg_data->decoder_handle)));
  343. msg_data->msg["access_num"]=(int)(msg_data->msg["access_num"])+1;
  344. }
  345. break;
  346. }
  347. case websocketpp::frame::opcode::binary: {
  348. // recived binary data
  349. const auto* pcm_data = static_cast<const char*>(payload.data());
  350. int32_t num_samples = payload.size();
  351. if (isonline) {
  352. // TODO
  353. } else {
  354. // for offline, we add receive data to end of the sample data vector
  355. sample_data_p->insert(sample_data_p->end(), pcm_data,
  356. pcm_data + num_samples);
  357. }
  358. break;
  359. }
  360. default:
  361. break;
  362. }
  363. guard_decoder.unlock();
  364. }
  365. // init asr model
  366. void WebSocketServer::initAsr(std::map<std::string, std::string>& model_path,
  367. int thread_num) {
  368. try {
  369. // init model with api
  370. asr_handle = FunOfflineInit(model_path, thread_num);
  371. LOG(INFO) << "model successfully inited";
  372. LOG(INFO) << "initAsr run check_and_clean_connection";
  373. std::thread clean_thread(&WebSocketServer::check_and_clean_connection,this);
  374. clean_thread.detach();
  375. LOG(INFO) << "initAsr run check_and_clean_connection finished";
  376. } catch (const std::exception& e) {
  377. LOG(INFO) << e.what();
  378. }
  379. }