funasr-wss-client-2pass.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. /**
  2. * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights
  3. * Reserved. MIT License (https://opensource.org/licenses/MIT)
  4. */
  5. /* 2022-2023 by zhaomingwork */
  6. // client for websocket, support multiple threads
  7. // ./funasr-wss-client --server-ip <string>
  8. // --port <string>
  9. // --wav-path <string>
  10. // [--thread-num <int>]
  11. // [--is-ssl <int>] [--]
  12. // [--version] [-h]
  13. // example:
  14. // ./funasr-wss-client --server-ip 127.0.0.1 --port 10095 --wav-path test.wav
  15. // --thread-num 1 --is-ssl 1
  16. #define ASIO_STANDALONE 1
  17. #include <glog/logging.h>
  18. #include <atomic>
  19. #include <fstream>
  20. #include <iostream>
  21. #include <sstream>
  22. #include <thread>
  23. #include <websocketpp/client.hpp>
  24. #include <websocketpp/common/thread.hpp>
  25. #include <websocketpp/config/asio_client.hpp>
  26. #include "audio.h"
  27. #include "nlohmann/json.hpp"
  28. #include "tclap/CmdLine.h"
  29. /**
  30. * Define a semi-cross platform helper method that waits/sleeps for a bit.
  31. */
  32. void WaitABit() {
  33. #ifdef WIN32
  34. Sleep(500);
  35. #else
  36. usleep(500);
  37. #endif
  38. }
  39. std::atomic<int> wav_index(0);
  40. bool IsTargetFile(const std::string& filename, const std::string target) {
  41. std::size_t pos = filename.find_last_of(".");
  42. if (pos == std::string::npos) {
  43. return false;
  44. }
  45. std::string extension = filename.substr(pos + 1);
  46. return (extension == target);
  47. }
  48. typedef websocketpp::config::asio_client::message_type::ptr message_ptr;
  49. typedef websocketpp::lib::shared_ptr<websocketpp::lib::asio::ssl::context>
  50. context_ptr;
  51. using websocketpp::lib::bind;
  52. using websocketpp::lib::placeholders::_1;
  53. using websocketpp::lib::placeholders::_2;
  54. context_ptr OnTlsInit(websocketpp::connection_hdl) {
  55. context_ptr ctx = websocketpp::lib::make_shared<asio::ssl::context>(
  56. asio::ssl::context::sslv23);
  57. try {
  58. ctx->set_options(
  59. asio::ssl::context::default_workarounds | asio::ssl::context::no_sslv2 |
  60. asio::ssl::context::no_sslv3 | asio::ssl::context::single_dh_use);
  61. } catch (std::exception& e) {
  62. LOG(ERROR) << e.what();
  63. }
  64. return ctx;
  65. }
  66. // template for tls or not config
  67. template <typename T>
  68. class WebsocketClient {
  69. public:
  70. // typedef websocketpp::client<T> client;
  71. // typedef websocketpp::client<websocketpp::config::asio_tls_client>
  72. // wss_client;
  73. typedef websocketpp::lib::lock_guard<websocketpp::lib::mutex> scoped_lock;
  74. WebsocketClient(int is_ssl) : m_open(false), m_done(false) {
  75. // set up access channels to only log interesting things
  76. m_client.clear_access_channels(websocketpp::log::alevel::all);
  77. m_client.set_access_channels(websocketpp::log::alevel::connect);
  78. m_client.set_access_channels(websocketpp::log::alevel::disconnect);
  79. m_client.set_access_channels(websocketpp::log::alevel::app);
  80. // Initialize the Asio transport policy
  81. m_client.init_asio();
  82. // Bind the handlers we are using
  83. using websocketpp::lib::bind;
  84. using websocketpp::lib::placeholders::_1;
  85. m_client.set_open_handler(bind(&WebsocketClient::on_open, this, _1));
  86. m_client.set_close_handler(bind(&WebsocketClient::on_close, this, _1));
  87. m_client.set_message_handler(
  88. [this](websocketpp::connection_hdl hdl, message_ptr msg) {
  89. on_message(hdl, msg);
  90. });
  91. m_client.set_fail_handler(bind(&WebsocketClient::on_fail, this, _1));
  92. m_client.clear_access_channels(websocketpp::log::alevel::all);
  93. }
  94. void on_message(websocketpp::connection_hdl hdl, message_ptr msg) {
  95. const std::string& payload = msg->get_payload();
  96. switch (msg->get_opcode()) {
  97. case websocketpp::frame::opcode::text:
  98. nlohmann::json jsonresult = nlohmann::json::parse(payload);
  99. LOG(INFO) << "Thread: " << this_thread::get_id()
  100. << ",on_message = " << payload;
  101. if (jsonresult["is_final"] == true) {
  102. websocketpp::lib::error_code ec;
  103. m_client.close(hdl, websocketpp::close::status::going_away, "", ec);
  104. if (ec) {
  105. LOG(ERROR) << "Error closing connection " << ec.message();
  106. }
  107. }
  108. }
  109. }
  110. // This method will block until the connection is complete
  111. void run(const std::string& uri, const std::vector<string>& wav_list,
  112. const std::vector<string>& wav_ids, std::string asr_mode,
  113. std::vector<int> chunk_size) {
  114. // Create a new connection to the given URI
  115. websocketpp::lib::error_code ec;
  116. typename websocketpp::client<T>::connection_ptr con =
  117. m_client.get_connection(uri, ec);
  118. if (ec) {
  119. m_client.get_alog().write(websocketpp::log::alevel::app,
  120. "Get Connection Error: " + ec.message());
  121. return;
  122. }
  123. // Grab a handle for this connection so we can talk to it in a thread
  124. // safe manor after the event loop starts.
  125. m_hdl = con->get_handle();
  126. // Queue the connection. No DNS queries or network connections will be
  127. // made until the io_service event loop is run.
  128. m_client.connect(con);
  129. // Create a thread to run the ASIO io_service event loop
  130. websocketpp::lib::thread asio_thread(&websocketpp::client<T>::run,
  131. &m_client);
  132. send_wav_data(wav_list[0], wav_ids[0], asr_mode, chunk_size);
  133. WaitABit();
  134. asio_thread.join();
  135. }
  136. // The open handler will signal that we are ready to start sending data
  137. void on_open(websocketpp::connection_hdl) {
  138. m_client.get_alog().write(websocketpp::log::alevel::app,
  139. "Connection opened, starting data!");
  140. scoped_lock guard(m_lock);
  141. m_open = true;
  142. }
  143. // The close handler will signal that we should stop sending data
  144. void on_close(websocketpp::connection_hdl) {
  145. m_client.get_alog().write(websocketpp::log::alevel::app,
  146. "Connection closed, stopping data!");
  147. scoped_lock guard(m_lock);
  148. m_done = true;
  149. }
  150. // The fail handler will signal that we should stop sending data
  151. void on_fail(websocketpp::connection_hdl) {
  152. m_client.get_alog().write(websocketpp::log::alevel::app,
  153. "Connection failed, stopping data!");
  154. scoped_lock guard(m_lock);
  155. m_done = true;
  156. }
  157. // send wav to server
  158. void send_wav_data(string wav_path, string wav_id, std::string asr_mode,
  159. std::vector<int> chunk_vector) {
  160. uint64_t count = 0;
  161. std::stringstream val;
  162. funasr::Audio audio(1);
  163. int32_t sampling_rate = 16000;
  164. std::string wav_format = "pcm";
  165. if (IsTargetFile(wav_path.c_str(), "wav")) {
  166. int32_t sampling_rate = -1;
  167. if (!audio.LoadWav(wav_path.c_str(), &sampling_rate)) return;
  168. } else if (IsTargetFile(wav_path.c_str(), "pcm")) {
  169. if (!audio.LoadPcmwav(wav_path.c_str(), &sampling_rate)) return;
  170. } else {
  171. wav_format = "others";
  172. if (!audio.LoadOthers2Char(wav_path.c_str())) return;
  173. }
  174. float* buff;
  175. int len;
  176. int flag = 0;
  177. bool wait = false;
  178. while (1) {
  179. {
  180. scoped_lock guard(m_lock);
  181. // If the connection has been closed, stop generating data
  182. if (m_done) {
  183. break;
  184. }
  185. // If the connection hasn't been opened yet wait a bit and retry
  186. if (!m_open) {
  187. wait = true;
  188. } else {
  189. break;
  190. }
  191. }
  192. if (wait) {
  193. // LOG(INFO) << "wait.." << m_open;
  194. WaitABit();
  195. continue;
  196. }
  197. }
  198. websocketpp::lib::error_code ec;
  199. nlohmann::json jsonbegin;
  200. nlohmann::json chunk_size = nlohmann::json::array();
  201. chunk_size.push_back(chunk_vector[0]);
  202. chunk_size.push_back(chunk_vector[1]);
  203. chunk_size.push_back(chunk_vector[2]);
  204. jsonbegin["mode"] = asr_mode;
  205. jsonbegin["chunk_size"] = chunk_size;
  206. jsonbegin["wav_name"] = wav_id;
  207. jsonbegin["wav_format"] = wav_format;
  208. jsonbegin["is_speaking"] = true;
  209. m_client.send(m_hdl, jsonbegin.dump(), websocketpp::frame::opcode::text,
  210. ec);
  211. // fetch wav data use asr engine api
  212. if (wav_format == "pcm") {
  213. while (audio.Fetch(buff, len, flag) > 0) {
  214. short* iArray = new short[len];
  215. for (size_t i = 0; i < len; ++i) {
  216. iArray[i] = (short)(buff[i] * 32768);
  217. }
  218. // send data to server
  219. int offset = 0;
  220. int block_size = 102400;
  221. while (offset < len) {
  222. int send_block = 0;
  223. if (offset + block_size <= len) {
  224. send_block = block_size;
  225. } else {
  226. send_block = len - offset;
  227. }
  228. m_client.send(m_hdl, iArray + offset, send_block * sizeof(short),
  229. websocketpp::frame::opcode::binary, ec);
  230. offset += send_block;
  231. }
  232. LOG(INFO) << "sended data len=" << len * sizeof(short);
  233. // The most likely error that we will get is that the connection is
  234. // not in the right state. Usually this means we tried to send a
  235. // message to a connection that was closed or in the process of
  236. // closing. While many errors here can be easily recovered from,
  237. // in this simple example, we'll stop the data loop.
  238. if (ec) {
  239. m_client.get_alog().write(websocketpp::log::alevel::app,
  240. "Send Error: " + ec.message());
  241. break;
  242. }
  243. delete[] iArray;
  244. }
  245. } else {
  246. int offset = 0;
  247. int block_size = 204800;
  248. len = audio.GetSpeechLen();
  249. char* others_buff = audio.GetSpeechChar();
  250. while (offset < len) {
  251. int send_block = 0;
  252. if (offset + block_size <= len) {
  253. send_block = block_size;
  254. } else {
  255. send_block = len - offset;
  256. }
  257. m_client.send(m_hdl, others_buff + offset, send_block,
  258. websocketpp::frame::opcode::binary, ec);
  259. offset += send_block;
  260. }
  261. LOG(INFO) << "sended data len=" << len;
  262. // The most likely error that we will get is that the connection is
  263. // not in the right state. Usually this means we tried to send a
  264. // message to a connection that was closed or in the process of
  265. // closing. While many errors here can be easily recovered from,
  266. // in this simple example, we'll stop the data loop.
  267. if (ec) {
  268. m_client.get_alog().write(websocketpp::log::alevel::app,
  269. "Send Error: " + ec.message());
  270. }
  271. }
  272. nlohmann::json jsonresult;
  273. jsonresult["is_speaking"] = false;
  274. m_client.send(m_hdl, jsonresult.dump(), websocketpp::frame::opcode::text,
  275. ec);
  276. WaitABit();
  277. }
  278. websocketpp::client<T> m_client;
  279. private:
  280. websocketpp::connection_hdl m_hdl;
  281. websocketpp::lib::mutex m_lock;
  282. bool m_open;
  283. bool m_done;
  284. int total_num = 0;
  285. };
  286. int main(int argc, char* argv[]) {
  287. google::InitGoogleLogging(argv[0]);
  288. FLAGS_logtostderr = true;
  289. TCLAP::CmdLine cmd("funasr-wss-client", ' ', "1.0");
  290. TCLAP::ValueArg<std::string> server_ip_("", "server-ip", "server-ip", true,
  291. "127.0.0.1", "string");
  292. TCLAP::ValueArg<std::string> port_("", "port", "port", true, "10095",
  293. "string");
  294. TCLAP::ValueArg<std::string> wav_path_(
  295. "", "wav-path",
  296. "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: "
  297. "asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)",
  298. true, "", "string");
  299. TCLAP::ValueArg<std::string> asr_mode_("", ASR_MODE, "offline, online, 2pass",
  300. false, "2pass", "string");
  301. TCLAP::ValueArg<std::string> chunk_size_("", "chunk-size",
  302. "chunk_size: 5-10-5 or 5-12-5",
  303. false, "5-10-5", "string");
  304. TCLAP::ValueArg<int> thread_num_("", "thread-num", "thread-num", false, 1,
  305. "int");
  306. TCLAP::ValueArg<int> is_ssl_(
  307. "", "is-ssl",
  308. "is-ssl is 1 means use wss connection, or use ws connection", false, 1,
  309. "int");
  310. cmd.add(server_ip_);
  311. cmd.add(port_);
  312. cmd.add(wav_path_);
  313. cmd.add(asr_mode_);
  314. cmd.add(chunk_size_);
  315. cmd.add(thread_num_);
  316. cmd.add(is_ssl_);
  317. cmd.parse(argc, argv);
  318. std::string server_ip = server_ip_.getValue();
  319. std::string port = port_.getValue();
  320. std::string wav_path = wav_path_.getValue();
  321. std::string asr_mode = asr_mode_.getValue();
  322. std::string chunk_size_str = chunk_size_.getValue();
  323. // get chunk_size
  324. std::vector<int> chunk_size;
  325. std::stringstream ss(chunk_size_str);
  326. std::string item;
  327. while (std::getline(ss, item, '-')) {
  328. try {
  329. chunk_size.push_back(stoi(item));
  330. } catch (const invalid_argument&) {
  331. LOG(ERROR) << "Invalid argument: " << item;
  332. exit(-1);
  333. }
  334. }
  335. int threads_num = thread_num_.getValue();
  336. int is_ssl = is_ssl_.getValue();
  337. std::string uri = "";
  338. if (is_ssl == 1) {
  339. uri = "wss://" + server_ip + ":" + port;
  340. } else {
  341. uri = "ws://" + server_ip + ":" + port;
  342. }
  343. // read wav_path
  344. std::vector<string> wav_list;
  345. std::vector<string> wav_ids;
  346. string default_id = "wav_default_id";
  347. if (IsTargetFile(wav_path, "scp")) {
  348. ifstream in(wav_path);
  349. if (!in.is_open()) {
  350. printf("Failed to open scp file");
  351. return 0;
  352. }
  353. string line;
  354. while (getline(in, line)) {
  355. istringstream iss(line);
  356. string column1, column2;
  357. iss >> column1 >> column2;
  358. wav_list.emplace_back(column2);
  359. wav_ids.emplace_back(column1);
  360. }
  361. in.close();
  362. } else {
  363. wav_list.emplace_back(wav_path);
  364. wav_ids.emplace_back(default_id);
  365. }
  366. for (size_t wav_i = 0; wav_i < wav_list.size(); wav_i = wav_i + threads_num) {
  367. std::vector<websocketpp::lib::thread> client_threads;
  368. for (size_t i = 0; i < threads_num; i++) {
  369. if (wav_i + i >= wav_list.size()) {
  370. break;
  371. }
  372. std::vector<string> tmp_wav_list;
  373. std::vector<string> tmp_wav_ids;
  374. tmp_wav_list.emplace_back(wav_list[wav_i + i]);
  375. tmp_wav_ids.emplace_back(wav_ids[wav_i + i]);
  376. client_threads.emplace_back(
  377. [uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size, is_ssl]() {
  378. if (is_ssl == 1) {
  379. WebsocketClient<websocketpp::config::asio_tls_client> c(is_ssl);
  380. c.m_client.set_tls_init_handler(bind(&OnTlsInit, ::_1));
  381. c.run(uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size);
  382. } else {
  383. WebsocketClient<websocketpp::config::asio_client> c(is_ssl);
  384. c.run(uri, tmp_wav_list, tmp_wav_ids, asr_mode, chunk_size);
  385. }
  386. });
  387. }
  388. for (auto& t : client_threads) {
  389. t.join();
  390. }
  391. }
  392. }