| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274 |
- <!DOCTYPE html>
- <html class="writer-html5" lang="en" >
- <head>
- <meta charset="utf-8" />
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
- <title>FunASR Runtime Roadmap — FunASR documentation</title><link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
- <!--[if lt IE 9]>
- <script src="../_static/js/html5shiv.min.js"></script>
- <![endif]-->
- <script id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
- <script src="../_static/jquery.js"></script>
- <script src="../_static/underscore.js"></script>
- <script src="../_static/doctools.js"></script>
- <script src="../_static/language_data.js"></script>
- <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
- <script src="../_static/js/theme.js"></script>
- <link rel="index" title="Index" href="../genindex.html" />
- <link rel="search" title="Search" href="../search.html" />
- <link rel="next" title="FunASR Realtime Transcribe Service" href="docs/SDK_tutorial_online.html" />
- <link rel="prev" title="Pretrained Models on Huggingface" href="../model_zoo/huggingface_models.html" />
- </head>
- <body class="wy-body-for-nav">
- <div class="wy-grid-for-nav">
- <nav data-toggle="wy-nav-shift" class="wy-nav-side">
- <div class="wy-side-scroll">
- <div class="wy-side-nav-search" >
-
-
- <a href="../index.html" class="icon icon-home">
- FunASR
- </a>
- <div role="search">
- <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
- <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
- <input type="hidden" name="check_keywords" value="yes" />
- <input type="hidden" name="area" value="default" />
- </form>
- </div>
- </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
- <p class="caption"><span class="caption-text">Installation</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../installation/installation.html">Installation</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../installation/docker.html">Docker</a></li>
- </ul>
- <p class="caption"><span class="caption-text">Quick Start</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../funasr/quick_start.html">Quick Start</a></li>
- </ul>
- <p class="caption"><span class="caption-text">Academic Egs</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../academic_recipe/asr_recipe.html">Speech Recognition</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../academic_recipe/punc_recipe.html">Punctuation Restoration</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../academic_recipe/vad_recipe.html">Voice Activity Detection</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../academic_recipe/sv_recipe.html">Speaker Verification</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../academic_recipe/sd_recipe.html">Speaker Diarization</a></li>
- </ul>
- <p class="caption"><span class="caption-text">ModelScope Egs</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../modelscope_pipeline/quick_start.html">Quick Start</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../egs_modelscope/asr/TEMPLATE/README.html">Speech Recognition</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../egs_modelscope/vad/TEMPLATE/README.html">Voice Activity Detection</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../egs_modelscope/punctuation/TEMPLATE/README.html">Punctuation Restoration</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../egs_modelscope/tp/TEMPLATE/README.html">Timestamp Prediction (FA)</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../modelscope_pipeline/sv_pipeline.html">Speaker Verification</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../modelscope_pipeline/sd_pipeline.html">Speaker Diarization</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../modelscope_pipeline/itn_pipeline.html">Inverse Text Normalization (ITN)</a></li>
- </ul>
- <p class="caption"><span class="caption-text">Model Zoo</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../model_zoo/modelscope_models.html">Pretrained Models Released on ModelScope</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../model_zoo/huggingface_models.html">Pretrained Models on Huggingface</a></li>
- </ul>
- <p class="caption"><span class="caption-text">Runtime and Service</span></p>
- <ul class="current">
- <li class="toctree-l1 current"><a class="current reference internal" href="#">FunASR Runtime Roadmap</a><ul>
- <li class="toctree-l2"><a class="reference internal" href="#file-transcription-service-english-cpu">File Transcription Service, English (CPU)</a><ul>
- <li class="toctree-l3"><a class="reference internal" href="#whats-new">Whats-new</a></li>
- <li class="toctree-l3"><a class="reference internal" href="#technical-principles">Technical Principles</a></li>
- <li class="toctree-l3"><a class="reference internal" href="#deployment-tutorial">Deployment Tutorial</a></li>
- <li class="toctree-l3"><a class="reference internal" href="#advanced-development-guide">Advanced Development Guide</a></li>
- </ul>
- </li>
- <li class="toctree-l2"><a class="reference internal" href="#the-real-time-transcription-service-mandarin-cpu">The real-time transcription service, Mandarin (CPU)</a><ul>
- <li class="toctree-l3"><a class="reference internal" href="#id1">Whats-new</a></li>
- <li class="toctree-l3"><a class="reference internal" href="#convenient-deployment-tutorial">Convenient Deployment Tutorial</a></li>
- <li class="toctree-l3"><a class="reference internal" href="#development-guide">Development Guide</a></li>
- <li class="toctree-l3"><a class="reference internal" href="#technology-principles-revealed">Technology Principles Revealed</a></li>
- </ul>
- </li>
- <li class="toctree-l2"><a class="reference internal" href="#file-transcription-service-mandarin-cpu">File Transcription Service, Mandarin (CPU)</a><ul>
- <li class="toctree-l3"><a class="reference internal" href="#id2">Whats-new</a></li>
- <li class="toctree-l3"><a class="reference internal" href="#id3">Technical Principles</a></li>
- <li class="toctree-l3"><a class="reference internal" href="#id4">Deployment Tutorial</a></li>
- <li class="toctree-l3"><a class="reference internal" href="#id5">Advanced Development Guide</a></li>
- </ul>
- </li>
- </ul>
- </li>
- <li class="toctree-l1"><a class="reference internal" href="docs/SDK_tutorial_online.html">FunASR Realtime Transcribe Service</a></li>
- <li class="toctree-l1"><a class="reference internal" href="docs/SDK_tutorial.html">Highlights</a></li>
- <li class="toctree-l1"><a class="reference internal" href="docs/SDK_tutorial.html#funasr-offline-file-transcription-service">FunASR Offline File Transcription Service</a></li>
- <li class="toctree-l1"><a class="reference internal" href="html5/readme.html">Speech Recognition Service Html5 Client Access Interface</a></li>
- </ul>
- <p class="caption"><span class="caption-text">Benchmark and Leaderboard</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../benchmark/benchmark_pipeline_cer.html">Leaderboard IO</a></li>
- </ul>
- <p class="caption"><span class="caption-text">Funasr Library</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../reference/build_task.html">Build custom tasks</a></li>
- </ul>
- <p class="caption"><span class="caption-text">Papers</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../reference/papers.html">Papers</a></li>
- </ul>
- <p class="caption"><span class="caption-text">Application</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../reference/application.html">Audio Cut</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../reference/application.html#realtime-speech-recognition">Realtime Speech Recognition</a></li>
- <li class="toctree-l1"><a class="reference internal" href="../reference/application.html#audio-chat">Audio Chat</a></li>
- </ul>
- <p class="caption"><span class="caption-text">FQA</span></p>
- <ul>
- <li class="toctree-l1"><a class="reference internal" href="../reference/FQA.html">FQA</a></li>
- </ul>
- </div>
- </div>
- </nav>
- <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
- <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
- <a href="../index.html">FunASR</a>
- </nav>
- <div class="wy-nav-content">
- <div class="rst-content">
- <div role="navigation" aria-label="Page navigation">
- <ul class="wy-breadcrumbs">
- <li><a href="../index.html" class="icon icon-home" aria-label="Home"></a></li>
- <li class="breadcrumb-item active">FunASR Runtime Roadmap</li>
- <li class="wy-breadcrumbs-aside">
- <a href="../_sources/runtime/readme.md.txt" rel="nofollow"> View page source</a>
- </li>
- </ul>
- <hr/>
- </div>
- <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
- <div itemprop="articleBody">
-
- <div class="section" id="funasr-runtime-roadmap">
- <h1>FunASR Runtime Roadmap<a class="headerlink" href="#funasr-runtime-roadmap" title="Permalink to this headline"></a></h1>
- <p>中文文档(<a class="reference internal" href="readme_cn.html"><span class="doc">点击此处</span></a>)</p>
- <p>FunASR is a speech recognition framework developed by the Speech Lab of DAMO Academy, which integrates industrial-level models in the fields of speech endpoint detection, speech recognition, punctuation segmentation, and more.
- It has attracted many developers to participate in experiencing and developing. To solve the last mile of industrial landing and integrate models into business, we have developed the FunASR runtime-SDK. The SDK supports several service deployments, including:</p>
- <ul class="simple">
- <li><p>File transcription service, Mandarin, CPU version, done</p></li>
- <li><p>The real-time transcription service, Mandarin (CPU), done</p></li>
- <li><p>File transcription service, English, CPU version, done</p></li>
- <li><p>File transcription service, Mandarin, GPU version, in progress</p></li>
- <li><p>and more.</p></li>
- </ul>
- <div class="section" id="file-transcription-service-english-cpu">
- <h2>File Transcription Service, English (CPU)<a class="headerlink" href="#file-transcription-service-english-cpu" title="Permalink to this headline"></a></h2>
- <p>Currently, the FunASR runtime-SDK supports the deployment of file transcription service, English (CPU version), with a complete speech recognition chain that can transcribe tens of hours of audio into punctuated text, and supports recognition for more than a hundred concurrent streams.</p>
- <p>To meet the needs of different users, we have prepared different tutorials with text and images for both novice and advanced developers.</p>
- <div class="section" id="whats-new">
- <h3>Whats-new<a class="headerlink" href="#whats-new" title="Permalink to this headline"></a></h3>
- <ul class="simple">
- <li><p>2023/11/08: Adaptation to runtime structure changes (FunASR/funasr/runtime -> FunASR/runtime), docker image version funasr-runtime-sdk-en-cpu-0.1.1 (27017f70f72a).</p></li>
- <li><p>2023/10/16: English File Transcription Service 1.0 released, docker image version funasr-runtime-sdk-en-cpu-0.1.0 (e0de03eb0163), refer to the detailed documentation(<a class="reference external" href="https://mp.weixin.qq.com/s/DZZUTj-6xwFfi-96ml--4A">here</a>)</p></li>
- </ul>
- </div>
- <div class="section" id="technical-principles">
- <h3>Technical Principles<a class="headerlink" href="#technical-principles" title="Permalink to this headline"></a></h3>
- <p>The technical principles and documentation behind FunASR explain the underlying technology, recognition accuracy, computational efficiency, and core advantages of the framework, including convenience, high precision, high efficiency, and support for long audio chains. For detailed information, please refer to the documentation available by <a class="reference external" href="https://mp.weixin.qq.com/s/DZZUTj-6xwFfi-96ml--4A">docs</a>.</p>
- </div>
- <div class="section" id="deployment-tutorial">
- <h3>Deployment Tutorial<a class="headerlink" href="#deployment-tutorial" title="Permalink to this headline"></a></h3>
- <p>The documentation mainly targets novice users who have no need for modifications or customization. It supports downloading model deployments from modelscope and also supports deploying models that users have fine-tuned. For detailed tutorials, please refer to <a class="reference internal" href="docs/SDK_tutorial_en.html"><span class="doc">docs</span></a>.</p>
- </div>
- <div class="section" id="advanced-development-guide">
- <h3>Advanced Development Guide<a class="headerlink" href="#advanced-development-guide" title="Permalink to this headline"></a></h3>
- <p>The documentation mainly targets advanced developers who require modifications and customization of the service. It supports downloading model deployments from modelscope and also supports deploying models that users have fine-tuned. For detailed information, please refer to the documentation available by <a class="reference internal" href="docs/SDK_advanced_guide_offline_en.html"><span class="doc">docs</span></a></p>
- </div>
- </div>
- <div class="section" id="the-real-time-transcription-service-mandarin-cpu">
- <h2>The real-time transcription service, Mandarin (CPU)<a class="headerlink" href="#the-real-time-transcription-service-mandarin-cpu" title="Permalink to this headline"></a></h2>
- <p>The FunASR real-time speech-to-text service software package not only performs real-time speech-to-text conversion, but also allows high-precision transcription text correction at the end of each sentence and outputs text with punctuation, supporting high-concurrency multiple requests.
- In order to meet the needs of different users for different scenarios, different tutorials are prepared:</p>
- <div class="section" id="id1">
- <h3>Whats-new<a class="headerlink" href="#id1" title="Permalink to this headline"></a></h3>
- <ul class="simple">
- <li><p>2023/11/09: Real-time Transcription Service 1.5 released,fix bug: without online results, docker image version funasr-runtime-sdk-online-cpu-0.1.5 (b16584b6d38b)</p></li>
- <li><p>2023/11/08: Real-time Transcription Service 1.4 released, supporting server-side loading of hotwords (updated hotword communication protocol), adaptation to runtime structure changes (FunASR/funasr/runtime -> FunASR/runtime), docker image version funasr-runtime-sdk-online-cpu-0.1.4(691974017c38).</p></li>
- <li><p>2023/09/19: Real-time Transcription Service 1.2 released, supporting hotwords, timestamps, and ITN model in 2pass mode, docker image version funasr-runtime-sdk-online-cpu-0.1.2 (7222c5319bcf).</p></li>
- <li><p>2023/08/11: Real-time Transcription Service 1.1 released, addressing some known bugs (including server crashes), docker image version funasr-runtime-sdk-online-cpu-0.1.1 (bdbdd0b27dee).</p></li>
- <li><p>2023/08/07: Real-time Transcription Service 1.0 released, docker image version funasr-runtime-sdk-online-cpu-0.1.0(bdbdd0b27dee), refer to the detailed documentation(<a class="reference external" href="https://mp.weixin.qq.com/s/8He081-FM-9IEI4D-lxZ9w">here</a>)</p></li>
- </ul>
- </div>
- <div class="section" id="convenient-deployment-tutorial">
- <h3>Convenient Deployment Tutorial<a class="headerlink" href="#convenient-deployment-tutorial" title="Permalink to this headline"></a></h3>
- <p>This is suitable for scenarios where there is no need to modify the service deployment SDK and the deployed model comes from ModelScope or is finetuned by the user. For detailed tutorials, please refer to <a class="reference internal" href="docs/SDK_tutorial_online.html"><span class="doc">docs</span></a></p>
- </div>
- <div class="section" id="development-guide">
- <h3>Development Guide<a class="headerlink" href="#development-guide" title="Permalink to this headline"></a></h3>
- <p>This is suitable for scenarios where there is a need to modify the service deployment SDK and the deployed model comes from ModelScope or is finetuned by the user. For detailed documentation, please refer to <a class="reference internal" href="docs/SDK_advanced_guide_online.html"><span class="doc">docs</span></a></p>
- </div>
- <div class="section" id="technology-principles-revealed">
- <h3>Technology Principles Revealed<a class="headerlink" href="#technology-principles-revealed" title="Permalink to this headline"></a></h3>
- <p>The document introduces the technology principles behind the service, recognition accuracy, computing efficiency, and core advantages: convenience, high precision, high efficiency, and long audio chain. For detailed documentation, please refer to <a class="reference external" href="https://mp.weixin.qq.com/s/8He081-FM-9IEI4D-lxZ9w">docs</a>.</p>
- </div>
- </div>
- <div class="section" id="file-transcription-service-mandarin-cpu">
- <h2>File Transcription Service, Mandarin (CPU)<a class="headerlink" href="#file-transcription-service-mandarin-cpu" title="Permalink to this headline"></a></h2>
- <p>Currently, the FunASR runtime-SDK supports the deployment of file transcription service, Mandarin (CPU version), with a complete speech recognition chain that can transcribe tens of hours of audio into punctuated text, and supports recognition for more than a hundred concurrent streams.</p>
- <p>To meet the needs of different users, we have prepared different tutorials with text and images for both novice and advanced developers.</p>
- <div class="section" id="id2">
- <h3>Whats-new<a class="headerlink" href="#id2" title="Permalink to this headline"></a></h3>
- <p>2023/11/08: File Transcription Service 3.0 released, supporting punctuation large model, Ngram model, fst hotwords (updated hotword communication protocol), server-side loading of hotwords, adaptation to runtime structure changes (FunASR/funasr/runtime -> FunASR/runtime), docker image version funasr-runtime-sdk-cpu-0.3.0 (caa64bddbb43), refer to the detailed documentation (<a class="reference external" href="#">here</a>)
- 2023/09/19: File Transcription Service 2.2 released, supporting ITN model, docker image version funasr-runtime-sdk-cpu-0.2.2 (2c5286be13e9).
- 2023/08/22: File Transcription Service 2.0 released, integrated ffmpeg to support various audio and video inputs, supporting hotword model and timestamp model, docker image version funasr-runtime-sdk-cpu-0.2.0 (1ad3d19e0707), refer to the detailed documentation (<a class="reference external" href="https://mp.weixin.qq.com/s/oJHe0MKDqTeuIFH-F7GHMg">here</a>)
- 2023/07/03: File Transcription Service 1.0 released, docker image version funasr-runtime-sdk-cpu-0.1.0 (1ad3d19e0707), refer to the detailed documentation (<a class="reference external" href="https://mp.weixin.qq.com/s/DHQwbgdBWcda0w_L60iUww">here</a>)</p>
- </div>
- <div class="section" id="id3">
- <h3>Technical Principles<a class="headerlink" href="#id3" title="Permalink to this headline"></a></h3>
- <p>The technical principles and documentation behind FunASR explain the underlying technology, recognition accuracy, computational efficiency, and core advantages of the framework, including convenience, high precision, high efficiency, and support for long audio chains. For detailed information, please refer to the documentation available by <a class="reference external" href="https://mp.weixin.qq.com/s/DHQwbgdBWcda0w_L60iUww">docs</a>.</p>
- </div>
- <div class="section" id="id4">
- <h3>Deployment Tutorial<a class="headerlink" href="#id4" title="Permalink to this headline"></a></h3>
- <p>The documentation mainly targets novice users who have no need for modifications or customization. It supports downloading model deployments from modelscope and also supports deploying models that users have fine-tuned. For detailed tutorials, please refer to <a class="reference internal" href="docs/SDK_tutorial.html"><span class="doc">docs</span></a>.</p>
- </div>
- <div class="section" id="id5">
- <h3>Advanced Development Guide<a class="headerlink" href="#id5" title="Permalink to this headline"></a></h3>
- <p>The documentation mainly targets advanced developers who require modifications and customization of the service. It supports downloading model deployments from modelscope and also supports deploying models that users have fine-tuned. For detailed information, please refer to the documentation available by <a class="reference internal" href="docs/SDK_advanced_guide_offline.html"><span class="doc">docs</span></a></p>
- </div>
- </div>
- </div>
- </div>
- </div>
- <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
- <a href="../model_zoo/huggingface_models.html" class="btn btn-neutral float-left" title="Pretrained Models on Huggingface" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
- <a href="docs/SDK_tutorial_online.html" class="btn btn-neutral float-right" title="FunASR Realtime Transcribe Service" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
- </div>
- <hr/>
- <div role="contentinfo">
- <p>© Copyright 2022, Speech Lab, Alibaba Group.</p>
- </div>
- Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
- <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
- provided by <a href="https://readthedocs.org">Read the Docs</a>.
-
- </footer>
- </div>
- </div>
- </section>
- </div>
- <script>
- jQuery(function () {
- SphinxRtdTheme.Navigation.enable(true);
- });
- </script>
- </body>
- </html>
|