funasr-wss-client.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
  1. /**
  2. * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  3. * Reserved. MIT License (https://opensource.org/licenses/MIT)
  4. */
  5. /* 2022-2023 by zhaomingwork */
  6. // client for websocket, support multiple threads
  7. // ./funasr-wss-client --server-ip <string>
  8. // --port <string>
  9. // --wav-path <string>
  10. // [--thread-num <int>]
  11. // [--is-ssl <int>] [--]
  12. // [--version] [-h]
  13. // example:
  14. // ./funasr-wss-client --server-ip 127.0.0.1 --port 10095 --wav-path test.wav --thread-num 1 --is-ssl 1
  15. #define ASIO_STANDALONE 1
  16. #include <websocketpp/client.hpp>
  17. #include <websocketpp/common/thread.hpp>
  18. #include <websocketpp/config/asio_client.hpp>
  19. #include <fstream>
  20. #include <atomic>
  21. #include <thread>
  22. #include <glog/logging.h>
  23. #include "util.h"
  24. #include "audio.h"
  25. #include "nlohmann/json.hpp"
  26. #include "tclap/CmdLine.h"
  27. /**
  28. * Define a semi-cross platform helper method that waits/sleeps for a bit.
  29. */
  30. void WaitABit() {
  31. #ifdef WIN32
  32. Sleep(200);
  33. #else
  34. usleep(200);
  35. #endif
  36. }
  37. std::atomic<int> wav_index(0);
  38. typedef websocketpp::config::asio_client::message_type::ptr message_ptr;
  39. typedef websocketpp::lib::shared_ptr<websocketpp::lib::asio::ssl::context> context_ptr;
  40. using websocketpp::lib::bind;
  41. using websocketpp::lib::placeholders::_1;
  42. using websocketpp::lib::placeholders::_2;
  43. context_ptr OnTlsInit(websocketpp::connection_hdl) {
  44. context_ptr ctx = websocketpp::lib::make_shared<asio::ssl::context>(
  45. asio::ssl::context::sslv23);
  46. try {
  47. ctx->set_options(
  48. asio::ssl::context::default_workarounds | asio::ssl::context::no_sslv2 |
  49. asio::ssl::context::no_sslv3 | asio::ssl::context::single_dh_use);
  50. } catch (std::exception& e) {
  51. LOG(ERROR) << e.what();
  52. }
  53. return ctx;
  54. }
  55. // template for tls or not config
  56. template <typename T>
  57. class WebsocketClient {
  58. public:
  59. // typedef websocketpp::client<T> client;
  60. // typedef websocketpp::client<websocketpp::config::asio_tls_client>
  61. // wss_client;
  62. typedef websocketpp::lib::lock_guard<websocketpp::lib::mutex> scoped_lock;
  63. WebsocketClient(int is_ssl) : m_open(false), m_done(false) {
  64. // set up access channels to only log interesting things
  65. m_client.clear_access_channels(websocketpp::log::alevel::all);
  66. m_client.set_access_channels(websocketpp::log::alevel::connect);
  67. m_client.set_access_channels(websocketpp::log::alevel::disconnect);
  68. m_client.set_access_channels(websocketpp::log::alevel::app);
  69. // Initialize the Asio transport policy
  70. m_client.init_asio();
  71. // Bind the handlers we are using
  72. using websocketpp::lib::bind;
  73. using websocketpp::lib::placeholders::_1;
  74. m_client.set_open_handler(bind(&WebsocketClient::on_open, this, _1));
  75. m_client.set_close_handler(bind(&WebsocketClient::on_close, this, _1));
  76. m_client.set_message_handler(
  77. [this](websocketpp::connection_hdl hdl, message_ptr msg) {
  78. on_message(hdl, msg);
  79. });
  80. m_client.set_fail_handler(bind(&WebsocketClient::on_fail, this, _1));
  81. m_client.clear_access_channels(websocketpp::log::alevel::all);
  82. }
  83. void on_message(websocketpp::connection_hdl hdl, message_ptr msg) {
  84. const std::string& payload = msg->get_payload();
  85. switch (msg->get_opcode()) {
  86. case websocketpp::frame::opcode::text:
  87. total_recv=total_recv+1;
  88. LOG(INFO)<< "Thread: " << this_thread::get_id() <<", on_message = " << payload;
  89. LOG(INFO)<< "Thread: " << this_thread::get_id() << ", total_recv=" << total_recv << " total_send=" <<total_send;
  90. if(total_recv==total_send)
  91. {
  92. LOG(INFO)<< "Thread: " << this_thread::get_id() << ", close client";
  93. websocketpp::lib::error_code ec;
  94. m_client.close(m_hdl, websocketpp::close::status::going_away, "", ec);
  95. if (ec){
  96. LOG(ERROR)<< "Error closing connection " << ec.message();
  97. }
  98. }
  99. }
  100. }
  101. // This method will block until the connection is complete
  102. void run(const std::string& uri, const std::vector<string>& wav_list, const std::vector<string>& wav_ids,
  103. int audio_fs, const std::unordered_map<std::string, int>& hws_map, int use_itn=1) {
  104. // Create a new connection to the given URI
  105. websocketpp::lib::error_code ec;
  106. typename websocketpp::client<T>::connection_ptr con =
  107. m_client.get_connection(uri, ec);
  108. if (ec) {
  109. m_client.get_alog().write(websocketpp::log::alevel::app,
  110. "Get Connection Error: " + ec.message());
  111. return;
  112. }
  113. // Grab a handle for this connection so we can talk to it in a thread
  114. // safe manor after the event loop starts.
  115. m_hdl = con->get_handle();
  116. // Queue the connection. No DNS queries or network connections will be
  117. // made until the io_service event loop is run.
  118. m_client.connect(con);
  119. // Create a thread to run the ASIO io_service event loop
  120. websocketpp::lib::thread asio_thread(&websocketpp::client<T>::run,
  121. &m_client);
  122. bool send_hotword = true;
  123. while(true){
  124. int i = wav_index.fetch_add(1);
  125. if (i >= wav_list.size()) {
  126. break;
  127. }
  128. total_send += 1;
  129. send_wav_data(wav_list[i], wav_ids[i], audio_fs, hws_map, send_hotword, use_itn);
  130. if(send_hotword){
  131. send_hotword = false;
  132. }
  133. }
  134. WaitABit();
  135. asio_thread.join();
  136. }
  137. // The open handler will signal that we are ready to start sending data
  138. void on_open(websocketpp::connection_hdl) {
  139. m_client.get_alog().write(websocketpp::log::alevel::app,
  140. "Connection opened, starting data!");
  141. scoped_lock guard(m_lock);
  142. m_open = true;
  143. }
  144. // The close handler will signal that we should stop sending data
  145. void on_close(websocketpp::connection_hdl) {
  146. m_client.get_alog().write(websocketpp::log::alevel::app,
  147. "Connection closed, stopping data!");
  148. scoped_lock guard(m_lock);
  149. m_done = true;
  150. }
  151. // The fail handler will signal that we should stop sending data
  152. void on_fail(websocketpp::connection_hdl) {
  153. m_client.get_alog().write(websocketpp::log::alevel::app,
  154. "Connection failed, stopping data!");
  155. scoped_lock guard(m_lock);
  156. m_done = true;
  157. }
  158. // send wav to server
  159. void send_wav_data(string wav_path, string wav_id, int audio_fs,
  160. const std::unordered_map<std::string, int>& hws_map,
  161. bool send_hotword, bool use_itn) {
  162. uint64_t count = 0;
  163. std::stringstream val;
  164. funasr::Audio audio(1);
  165. int32_t sampling_rate = audio_fs;
  166. std::string wav_format = "pcm";
  167. if(funasr::IsTargetFile(wav_path.c_str(), "wav")){
  168. if(!audio.LoadWav(wav_path.c_str(), &sampling_rate, false))
  169. return ;
  170. }else if(funasr::IsTargetFile(wav_path.c_str(), "pcm")){
  171. if (!audio.LoadPcmwav(wav_path.c_str(), &sampling_rate, false))
  172. return ;
  173. }else{
  174. wav_format = "others";
  175. if (!audio.LoadOthers2Char(wav_path.c_str()))
  176. return ;
  177. }
  178. float* buff;
  179. int len;
  180. int flag = 0;
  181. bool wait = false;
  182. while (1) {
  183. {
  184. scoped_lock guard(m_lock);
  185. // If the connection has been closed, stop generating data
  186. if (m_done) {
  187. break;
  188. }
  189. // If the connection hasn't been opened yet wait a bit and retry
  190. if (!m_open) {
  191. wait = true;
  192. } else {
  193. break;
  194. }
  195. }
  196. if (wait) {
  197. // LOG(INFO) << "wait.." << m_open;
  198. WaitABit();
  199. continue;
  200. }
  201. }
  202. websocketpp::lib::error_code ec;
  203. nlohmann::json jsonbegin;
  204. nlohmann::json chunk_size = nlohmann::json::array();
  205. chunk_size.push_back(5);
  206. chunk_size.push_back(10);
  207. chunk_size.push_back(5);
  208. jsonbegin["chunk_size"] = chunk_size;
  209. jsonbegin["chunk_interval"] = 10;
  210. jsonbegin["wav_name"] = wav_id;
  211. jsonbegin["wav_format"] = wav_format;
  212. jsonbegin["audio_fs"] = sampling_rate;
  213. jsonbegin["itn"] = true;
  214. if(use_itn == 0){
  215. jsonbegin["itn"] = false;
  216. }
  217. jsonbegin["is_speaking"] = true;
  218. if(send_hotword){
  219. if(!hws_map.empty()){
  220. LOG(INFO) << "hotwords: ";
  221. for (const auto& pair : hws_map) {
  222. LOG(INFO) << pair.first << " : " << pair.second;
  223. }
  224. nlohmann::json json_map(hws_map);
  225. std::string json_map_str = json_map.dump();
  226. jsonbegin["hotwords"] = json_map_str;
  227. }
  228. }
  229. m_client.send(m_hdl, jsonbegin.dump(), websocketpp::frame::opcode::text,
  230. ec);
  231. // fetch wav data use asr engine api
  232. if(wav_format == "pcm"){
  233. while (audio.Fetch(buff, len, flag) > 0) {
  234. short* iArray = new short[len];
  235. for (size_t i = 0; i < len; ++i) {
  236. iArray[i] = (short)(buff[i]*32768);
  237. }
  238. // send data to server
  239. int offset = 0;
  240. int block_size = 102400;
  241. while(offset < len){
  242. int send_block = 0;
  243. if (offset + block_size <= len){
  244. send_block = block_size;
  245. }else{
  246. send_block = len - offset;
  247. }
  248. m_client.send(m_hdl, iArray+offset, send_block * sizeof(short),
  249. websocketpp::frame::opcode::binary, ec);
  250. offset += send_block;
  251. }
  252. LOG(INFO)<< "Thread: " << this_thread::get_id() << ", sended data len=" << len * sizeof(short);
  253. // The most likely error that we will get is that the connection is
  254. // not in the right state. Usually this means we tried to send a
  255. // message to a connection that was closed or in the process of
  256. // closing. While many errors here can be easily recovered from,
  257. // in this simple example, we'll stop the data loop.
  258. if (ec) {
  259. m_client.get_alog().write(websocketpp::log::alevel::app,
  260. "Send Error: " + ec.message());
  261. break;
  262. }
  263. delete[] iArray;
  264. // WaitABit();
  265. }
  266. }else{
  267. int offset = 0;
  268. int block_size = 204800;
  269. len = audio.GetSpeechLen();
  270. char* others_buff = audio.GetSpeechChar();
  271. while(offset < len){
  272. int send_block = 0;
  273. if (offset + block_size <= len){
  274. send_block = block_size;
  275. }else{
  276. send_block = len - offset;
  277. }
  278. m_client.send(m_hdl, others_buff+offset, send_block,
  279. websocketpp::frame::opcode::binary, ec);
  280. offset += send_block;
  281. }
  282. LOG(INFO)<< "Thread: " << this_thread::get_id() << ", sended data len=" << len;
  283. // The most likely error that we will get is that the connection is
  284. // not in the right state. Usually this means we tried to send a
  285. // message to a connection that was closed or in the process of
  286. // closing. While many errors here can be easily recovered from,
  287. // in this simple example, we'll stop the data loop.
  288. if (ec) {
  289. m_client.get_alog().write(websocketpp::log::alevel::app,
  290. "Send Error: " + ec.message());
  291. }
  292. }
  293. nlohmann::json jsonresult;
  294. jsonresult["is_speaking"] = false;
  295. m_client.send(m_hdl, jsonresult.dump(), websocketpp::frame::opcode::text,
  296. ec);
  297. std::this_thread::sleep_for(std::chrono::milliseconds(20));
  298. }
  299. websocketpp::client<T> m_client;
  300. private:
  301. websocketpp::connection_hdl m_hdl;
  302. websocketpp::lib::mutex m_lock;
  303. bool m_open;
  304. bool m_done;
  305. int total_send=0;
  306. int total_recv=0;
  307. };
  308. int main(int argc, char* argv[]) {
  309. #ifdef _WIN32
  310. SetConsoleOutputCP(65001);
  311. #endif
  312. google::InitGoogleLogging(argv[0]);
  313. FLAGS_logtostderr = true;
  314. TCLAP::CmdLine cmd("funasr-wss-client", ' ', "1.0");
  315. TCLAP::ValueArg<std::string> server_ip_("", "server-ip", "server-ip", true,
  316. "127.0.0.1", "string");
  317. TCLAP::ValueArg<std::string> port_("", "port", "port", true, "10095", "string");
  318. TCLAP::ValueArg<std::string> wav_path_("", "wav-path",
  319. "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)",
  320. true, "", "string");
  321. TCLAP::ValueArg<std::int32_t> audio_fs_("", "audio-fs", "the sample rate of audio", false, 16000, "int32_t");
  322. TCLAP::ValueArg<int> thread_num_("", "thread-num", "thread-num",
  323. false, 1, "int");
  324. TCLAP::ValueArg<int> is_ssl_(
  325. "", "is-ssl", "is-ssl is 1 means use wss connection, or use ws connection",
  326. false, 1, "int");
  327. TCLAP::ValueArg<int> use_itn_(
  328. "", "use-itn",
  329. "use-itn is 1 means use itn, 0 means not use itn", false, 1, "int");
  330. TCLAP::ValueArg<std::string> hotword_("", HOTWORD,
  331. "the hotword file, one hotword perline, Format: Hotword Weight (could be: 阿里巴巴 20)", false, "", "string");
  332. cmd.add(server_ip_);
  333. cmd.add(port_);
  334. cmd.add(wav_path_);
  335. cmd.add(audio_fs_);
  336. cmd.add(thread_num_);
  337. cmd.add(is_ssl_);
  338. cmd.add(use_itn_);
  339. cmd.add(hotword_);
  340. cmd.parse(argc, argv);
  341. std::string server_ip = server_ip_.getValue();
  342. std::string port = port_.getValue();
  343. std::string wav_path = wav_path_.getValue();
  344. int threads_num = thread_num_.getValue();
  345. int is_ssl = is_ssl_.getValue();
  346. int use_itn = use_itn_.getValue();
  347. std::vector<websocketpp::lib::thread> client_threads;
  348. std::string uri = "";
  349. if (is_ssl == 1) {
  350. uri = "wss://" + server_ip + ":" + port;
  351. } else {
  352. uri = "ws://" + server_ip + ":" + port;
  353. }
  354. // hotwords
  355. std::string hotword_path = hotword_.getValue();
  356. unordered_map<string, int> hws_map;
  357. if(!hotword_path.empty()){
  358. LOG(INFO) << "hotword path: " << hotword_path;
  359. funasr::ExtractHws(hotword_path, hws_map);
  360. }
  361. // read wav_path
  362. std::vector<string> wav_list;
  363. std::vector<string> wav_ids;
  364. string default_id = "wav_default_id";
  365. if(funasr::IsTargetFile(wav_path, "scp")){
  366. ifstream in(wav_path);
  367. if (!in.is_open()) {
  368. printf("Failed to open scp file");
  369. return 0;
  370. }
  371. string line;
  372. while(getline(in, line))
  373. {
  374. istringstream iss(line);
  375. string column1, column2;
  376. iss >> column1 >> column2;
  377. wav_list.emplace_back(column2);
  378. wav_ids.emplace_back(column1);
  379. }
  380. in.close();
  381. }else{
  382. wav_list.emplace_back(wav_path);
  383. wav_ids.emplace_back(default_id);
  384. }
  385. int audio_fs = audio_fs_.getValue();
  386. for (size_t i = 0; i < threads_num; i++) {
  387. client_threads.emplace_back([uri, wav_list, wav_ids, audio_fs, is_ssl, hws_map, use_itn]() {
  388. if (is_ssl == 1) {
  389. WebsocketClient<websocketpp::config::asio_tls_client> c(is_ssl);
  390. c.m_client.set_tls_init_handler(bind(&OnTlsInit, ::_1));
  391. c.run(uri, wav_list, wav_ids, audio_fs, hws_map, use_itn);
  392. } else {
  393. WebsocketClient<websocketpp::config::asio_client> c(is_ssl);
  394. c.run(uri, wav_list, wav_ids, audio_fs, hws_map, use_itn);
  395. }
  396. });
  397. }
  398. for (auto& t : client_threads) {
  399. t.join();
  400. }
  401. }