websocket-server-2pass.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. /**
  2. * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  3. * Reserved. MIT License (https://opensource.org/licenses/MIT)
  4. */
  5. /* 2022-2023 by zhaomingwork */
  6. // websocket server for asr engine
  7. // take some ideas from https://github.com/k2-fsa/sherpa-onnx
  8. // online-websocket-server-impl.cc, thanks. The websocket server has two threads
  9. // pools, one for handle network data and one for asr decoder.
  10. // now only support offline engine.
  11. #include "websocket-server-2pass.h"
  12. #include <thread>
  13. #include <utility>
  14. #include <vector>
  15. context_ptr WebSocketServer::on_tls_init(tls_mode mode,
  16. websocketpp::connection_hdl hdl,
  17. std::string& s_certfile,
  18. std::string& s_keyfile) {
  19. namespace asio = websocketpp::lib::asio;
  20. LOG(INFO) << "on_tls_init called with hdl: " << hdl.lock().get();
  21. LOG(INFO) << "using TLS mode: "
  22. << (mode == MOZILLA_MODERN ? "Mozilla Modern"
  23. : "Mozilla Intermediate");
  24. context_ptr ctx = websocketpp::lib::make_shared<asio::ssl::context>(
  25. asio::ssl::context::sslv23);
  26. try {
  27. if (mode == MOZILLA_MODERN) {
  28. // Modern disables TLSv1
  29. ctx->set_options(
  30. asio::ssl::context::default_workarounds |
  31. asio::ssl::context::no_sslv2 | asio::ssl::context::no_sslv3 |
  32. asio::ssl::context::no_tlsv1 | asio::ssl::context::single_dh_use);
  33. } else {
  34. ctx->set_options(asio::ssl::context::default_workarounds |
  35. asio::ssl::context::no_sslv2 |
  36. asio::ssl::context::no_sslv3 |
  37. asio::ssl::context::single_dh_use);
  38. }
  39. ctx->use_certificate_chain_file(s_certfile);
  40. ctx->use_private_key_file(s_keyfile, asio::ssl::context::pem);
  41. } catch (std::exception& e) {
  42. LOG(INFO) << "Exception: " << e.what();
  43. }
  44. return ctx;
  45. }
  46. nlohmann::json handle_result(FUNASR_RESULT result) {
  47. websocketpp::lib::error_code ec;
  48. nlohmann::json jsonresult;
  49. jsonresult["text"]="";
  50. std::string tmp_online_msg = FunASRGetResult(result, 0);
  51. if (tmp_online_msg != "") {
  52. LOG(INFO) << "online_res :" << tmp_online_msg;
  53. jsonresult["text"] = tmp_online_msg;
  54. jsonresult["mode"] = "2pass-online";
  55. }
  56. std::string tmp_tpass_msg = FunASRGetTpassResult(result, 0);
  57. if (tmp_tpass_msg != "") {
  58. LOG(INFO) << "offline results : " << tmp_tpass_msg;
  59. jsonresult["text"] = tmp_tpass_msg;
  60. jsonresult["mode"] = "2pass-offline";
  61. }
  62. return jsonresult;
  63. }
  64. // feed buffer to asr engine for decoder
  65. void WebSocketServer::do_decoder(
  66. std::vector<char>& buffer, websocketpp::connection_hdl& hdl,
  67. nlohmann::json& msg, std::vector<std::vector<std::string>>& punc_cache,
  68. websocketpp::lib::mutex& thread_lock, bool& is_final,
  69. std::string wav_name, FUNASR_HANDLE& tpass_online_handle) {
  70. // lock for each connection
  71. scoped_lock guard(thread_lock);
  72. FUNASR_RESULT Result = nullptr;
  73. int asr_mode_ = 2;
  74. if (msg.contains("mode")) {
  75. std::string modeltype = msg["mode"];
  76. if (modeltype == "offline") {
  77. asr_mode_ = 0;
  78. } else if (modeltype == "online") {
  79. asr_mode_ = 1;
  80. } else if (modeltype == "2pass") {
  81. asr_mode_ = 2;
  82. }
  83. } else {
  84. // default value
  85. msg["mode"] = "2pass";
  86. asr_mode_ = 2;
  87. }
  88. try {
  89. // loop to send chunk_size 800*2 data to asr engine. TODO: chunk_size need get from client
  90. while (buffer.size() >= 800 * 2) {
  91. std::vector<char> subvector = {buffer.begin(),
  92. buffer.begin() + 800 * 2};
  93. buffer.erase(buffer.begin(), buffer.begin() + 800 * 2);
  94. try{
  95. Result =
  96. FunTpassInferBuffer(tpass_handle, tpass_online_handle,
  97. subvector.data(), subvector.size(), punc_cache,
  98. false, msg["audio_fs"], msg["wav_format"], (ASR_TYPE)asr_mode_);
  99. }catch (std::exception const &e)
  100. {
  101. LOG(ERROR)<<e.what();
  102. }
  103. if (Result) {
  104. websocketpp::lib::error_code ec;
  105. nlohmann::json jsonresult =
  106. handle_result(Result);
  107. jsonresult["wav_name"] = wav_name;
  108. jsonresult["is_final"] = false;
  109. if(jsonresult["text"] != "") {
  110. if (is_ssl) {
  111. wss_server_->send(hdl, jsonresult.dump(),
  112. websocketpp::frame::opcode::text, ec);
  113. } else {
  114. server_->send(hdl, jsonresult.dump(),
  115. websocketpp::frame::opcode::text, ec);
  116. }
  117. }
  118. FunASRFreeResult(Result);
  119. }
  120. }
  121. if(is_final){
  122. try{
  123. Result = FunTpassInferBuffer(tpass_handle, tpass_online_handle,
  124. buffer.data(), buffer.size(), punc_cache,
  125. is_final, msg["audio_fs"], msg["wav_format"], (ASR_TYPE)asr_mode_);
  126. }catch (std::exception const &e)
  127. {
  128. LOG(ERROR)<<e.what();
  129. }
  130. for(auto &vec:punc_cache){
  131. vec.clear();
  132. }
  133. if (Result) {
  134. websocketpp::lib::error_code ec;
  135. nlohmann::json jsonresult =
  136. handle_result(Result);
  137. jsonresult["wav_name"] = wav_name;
  138. jsonresult["is_final"] = true;
  139. if (is_ssl) {
  140. wss_server_->send(hdl, jsonresult.dump(),
  141. websocketpp::frame::opcode::text, ec);
  142. } else {
  143. server_->send(hdl, jsonresult.dump(),
  144. websocketpp::frame::opcode::text, ec);
  145. }
  146. FunASRFreeResult(Result);
  147. }
  148. }
  149. } catch (std::exception const& e) {
  150. std::cerr << "Error: " << e.what() << std::endl;
  151. }
  152. }
  153. void WebSocketServer::on_open(websocketpp::connection_hdl hdl) {
  154. scoped_lock guard(m_lock); // for threads safty
  155. check_and_clean_connection(); // remove closed connection
  156. std::shared_ptr<FUNASR_MESSAGE> data_msg =
  157. std::make_shared<FUNASR_MESSAGE>(); // put a new data vector for new
  158. // connection
  159. data_msg->samples = std::make_shared<std::vector<char>>();
  160. data_msg->thread_lock = new websocketpp::lib::mutex();
  161. data_msg->msg = nlohmann::json::parse("{}");
  162. data_msg->msg["wav_format"] = "pcm";
  163. data_msg->msg["audio_fs"] = 16000;
  164. data_msg->punc_cache =
  165. std::make_shared<std::vector<std::vector<std::string>>>(2);
  166. // std::vector<int> chunk_size = {5, 10, 5}; //TODO, need get from client
  167. // FUNASR_HANDLE tpass_online_handle =
  168. // FunTpassOnlineInit(tpass_handle, chunk_size);
  169. // data_msg->tpass_online_handle = tpass_online_handle;
  170. data_map.emplace(hdl, data_msg);
  171. LOG(INFO) << "on_open, active connections: " << data_map.size();
  172. }
  173. void WebSocketServer::on_close(websocketpp::connection_hdl hdl) {
  174. scoped_lock guard(m_lock);
  175. std::shared_ptr<FUNASR_MESSAGE> data_msg = nullptr;
  176. auto it_data = data_map.find(hdl);
  177. if (it_data != data_map.end()) {
  178. data_msg = it_data->second;
  179. }
  180. else
  181. {
  182. return;
  183. }
  184. scoped_lock guard_decoder(*(data_msg->thread_lock)); //wait for do_decoder finished and avoid access freed tpass_online_handle
  185. LOG(INFO) << "----------------FunTpassOnlineUninit----------------------";
  186. FunTpassOnlineUninit(data_msg->tpass_online_handle);
  187. data_map.erase(hdl); // remove data vector when connection is closed
  188. LOG(INFO) << "on_close, active connections: "<< data_map.size();
  189. }
  190. // remove closed connection
  191. void WebSocketServer::check_and_clean_connection() {
  192. std::vector<websocketpp::connection_hdl> to_remove; // remove list
  193. auto iter = data_map.begin();
  194. while (iter != data_map.end()) { // loop to find closed connection
  195. websocketpp::connection_hdl hdl = iter->first;
  196. if (is_ssl) {
  197. wss_server::connection_ptr con = wss_server_->get_con_from_hdl(hdl);
  198. if (con->get_state() != 1) { // session::state::open ==1
  199. to_remove.push_back(hdl);
  200. }
  201. } else {
  202. server::connection_ptr con = server_->get_con_from_hdl(hdl);
  203. if (con->get_state() != 1) { // session::state::open ==1
  204. to_remove.push_back(hdl);
  205. }
  206. }
  207. iter++;
  208. }
  209. for (auto hdl : to_remove) {
  210. data_map.erase(hdl);
  211. LOG(INFO) << "remove one connection ";
  212. }
  213. }
  214. void WebSocketServer::on_message(websocketpp::connection_hdl hdl,
  215. message_ptr msg) {
  216. unique_lock lock(m_lock);
  217. // find the sample data vector according to one connection
  218. std::shared_ptr<FUNASR_MESSAGE> msg_data = nullptr;
  219. auto it_data = data_map.find(hdl);
  220. if (it_data != data_map.end()) {
  221. msg_data = it_data->second;
  222. }
  223. std::shared_ptr<std::vector<char>> sample_data_p = msg_data->samples;
  224. std::shared_ptr<std::vector<std::vector<std::string>>> punc_cache_p =
  225. msg_data->punc_cache;
  226. websocketpp::lib::mutex* thread_lock_p = msg_data->thread_lock;
  227. lock.unlock();
  228. if (sample_data_p == nullptr) {
  229. LOG(INFO) << "error when fetch sample data vector";
  230. return;
  231. }
  232. const std::string& payload = msg->get_payload(); // get msg type
  233. switch (msg->get_opcode()) {
  234. case websocketpp::frame::opcode::text: {
  235. nlohmann::json jsonresult = nlohmann::json::parse(payload);
  236. if (jsonresult.contains("wav_name")) {
  237. msg_data->msg["wav_name"] = jsonresult["wav_name"];
  238. }
  239. if (jsonresult.contains("mode")) {
  240. msg_data->msg["mode"] = jsonresult["mode"];
  241. }
  242. if (jsonresult.contains("wav_format")) {
  243. msg_data->msg["wav_format"] = jsonresult["wav_format"];
  244. }
  245. if (jsonresult.contains("audio_fs")) {
  246. msg_data->msg["audio_fs"] = jsonresult["audio_fs"];
  247. }
  248. if (jsonresult.contains("chunk_size")){
  249. if(msg_data->tpass_online_handle == NULL){
  250. std::vector<int> chunk_size_vec = jsonresult["chunk_size"].get<std::vector<int>>();
  251. LOG(INFO) << "----------------FunTpassOnlineInit----------------------";
  252. FUNASR_HANDLE tpass_online_handle =
  253. FunTpassOnlineInit(tpass_handle, chunk_size_vec);
  254. msg_data->tpass_online_handle = tpass_online_handle;
  255. }
  256. }
  257. LOG(INFO) << "jsonresult=" << jsonresult << ", msg_data->msg="
  258. << msg_data->msg;
  259. if (jsonresult["is_speaking"] == false ||
  260. jsonresult["is_finished"] == true) {
  261. LOG(INFO) << "client done";
  262. // if it is in final message, post the sample_data to decode
  263. asio::post(
  264. io_decoder_,
  265. std::bind(&WebSocketServer::do_decoder, this,
  266. std::move(*(sample_data_p.get())), std::move(hdl),
  267. std::ref(msg_data->msg), std::ref(*(punc_cache_p.get())),
  268. std::ref(*thread_lock_p), std::move(true),
  269. msg_data->msg["wav_name"],
  270. std::ref(msg_data->tpass_online_handle)));
  271. }
  272. break;
  273. }
  274. case websocketpp::frame::opcode::binary: {
  275. // recived binary data
  276. const auto* pcm_data = static_cast<const char*>(payload.data());
  277. int32_t num_samples = payload.size();
  278. if (isonline) {
  279. sample_data_p->insert(sample_data_p->end(), pcm_data,
  280. pcm_data + num_samples);
  281. int setpsize = 800 * 2; // TODO, need get from client
  282. // if sample_data size > setpsize, we post data to decode
  283. if (sample_data_p->size() > setpsize) {
  284. int chunksize = floor(sample_data_p->size() / setpsize);
  285. // make sure the subvector size is an integer multiple of setpsize
  286. std::vector<char> subvector = {
  287. sample_data_p->begin(),
  288. sample_data_p->begin() + chunksize * setpsize};
  289. // keep remain in sample_data
  290. sample_data_p->erase(sample_data_p->begin(),
  291. sample_data_p->begin() + chunksize * setpsize);
  292. // post to decode
  293. asio::post(io_decoder_,
  294. std::bind(&WebSocketServer::do_decoder, this,
  295. std::move(subvector), std::move(hdl),
  296. std::ref(msg_data->msg),
  297. std::ref(*(punc_cache_p.get())),
  298. std::ref(*thread_lock_p), std::move(false),
  299. msg_data->msg["wav_name"],
  300. std::ref(msg_data->tpass_online_handle)));
  301. }
  302. } else {
  303. sample_data_p->insert(sample_data_p->end(), pcm_data,
  304. pcm_data + num_samples);
  305. }
  306. break;
  307. }
  308. default:
  309. break;
  310. }
  311. }
  312. // init asr model
  313. void WebSocketServer::initAsr(std::map<std::string, std::string>& model_path,
  314. int thread_num) {
  315. try {
  316. tpass_handle = FunTpassInit(model_path, thread_num);
  317. if (!tpass_handle) {
  318. LOG(ERROR) << "FunTpassInit init failed";
  319. exit(-1);
  320. }
  321. } catch (const std::exception& e) {
  322. LOG(INFO) << e.what();
  323. }
  324. }