2 yıl önce · 81e56b2627
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -18,10 +18,10 @@ jobs:
 
				         with:
			
 
				           docs-folder: "docs/"
			
 
				           pre-build-command: "pip install sphinx-markdown-tables nbsphinx jinja2 recommonmark sphinx_rtd_theme"
			
 
				-      - uses: ammaraskar/sphinx-action@master
			
 
				-        with:
			
 
				-          docs-folder: "docs_cn/"
			
 
				-          pre-build-command: "pip install sphinx-markdown-tables nbsphinx jinja2 recommonmark sphinx_rtd_theme"
			
 
				+#      - uses: ammaraskar/sphinx-action@master
			
 
				+#        with:
			
 
				+#          docs-folder: "docs_cn/"
			
 
				+#          pre-build-command: "pip install sphinx-markdown-tables nbsphinx jinja2 recommonmark sphinx_rtd_theme"
			
 
				 
			
 
				       - name: deploy copy
			
 
				         if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev_wjm' || github.ref == 'refs/heads/dev_lyh'
			
@@ -31,9 +31,9 @@ jobs:
 
				           mkdir public/en
			
 
				           touch public/en/.nojekyll
			
 
				           cp -r docs/_build/html/* public/en/
			
 
				-          mkdir public/cn
			
 
				-          touch public/cn/.nojekyll
			
 
				-          cp -r docs_cn/_build/html/* public/cn/
			
 
				+#          mkdir public/cn
			
 
				+#          touch public/cn/.nojekyll
			
 
				+#          cp -r docs_cn/_build/html/* public/cn/
			
 
				           mkdir public/m2met2
			
 
				           touch public/m2met2/.nojekyll
			
 
				           cp -r docs_m2met2/_build/html/* public/m2met2/
			
--- a/docs/benchmark/benchmark_libtorch.md
+++ b/docs/benchmark/benchmark_libtorch.md
@@ -0,0 +1 @@
 
				+../../funasr/runtime/python/benchmark_libtorch.md
			
--- a/docs/benchmark/benchmark_onnx.md
+++ b/docs/benchmark/benchmark_onnx.md
@@ -0,0 +1 @@
 
				+../../funasr/runtime/python/benchmark_onnx.md
			
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -61,6 +61,13 @@ FunASR hopes to build a bridge between academic research and industrial applicat
 
				    ./runtime/grpc_cpp.md
			
 
				    ./runtime/websocket_python.md
			
 
				 
			
 
				+.. toctree::
			
 
				+   :maxdepth: 1
			
 
				+   :caption: Benchmark and Leadboard
			
 
				+
			
 
				+   ./benchmark/benchmark_onnx.md
			
 
				+   ./benchmark/benchmark_libtorch.md
			
 
				+
			
 
				 .. toctree::
			
 
				    :maxdepth: 1
			
 
				    :caption: Papers
			
--- a/docs_cn/Makefile
+++ b/docs_cn/Makefile
@@ -1,21 +0,0 @@
 
				-# Minimal makefile for Sphinx documentation
			
 
				-#
			
 
				-
			
 
				-# You can set these variables from the command line, and also
			
 
				-# from the environment for the first two.
			
 
				-SPHINXOPTS    =
			
 
				-SPHINXBUILD   = sphinx-build
			
 
				-SPHINXPROJ    = FunASR
			
 
				-SOURCEDIR     = .
			
 
				-BUILDDIR      = _build
			
 
				-
			
 
				-# Put it first so that "make" without argument is like "make help".
			
 
				-help:
			
 
				-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
			
 
				-
			
 
				-.PHONY: help Makefile
			
 
				-
			
 
				-# Catch-all target: route all unknown targets to Sphinx using the new
			
 
				-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
			
 
				-%: Makefile
			
 
				-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
			
--- a/docs_cn/build_task.md
+++ b/docs_cn/build_task.md
@@ -1,124 +0,0 @@
 
				-# 搭建自定义任务
			
 
				-FunASR类似ESPNet，以`Task`为通用接口，从而实现模型的训练和推理。每一个`Task`是一个类，其需要继承`AbsTask`，其对应的具体代码见`funasr/tasks/abs_task.py`。下面给出其包含的主要函数及功能介绍：
			
 
				-```python
			
 
				-class AbsTask(ABC):
			
 
				-    @classmethod
			
 
				-    def add_task_arguments(cls, parser: argparse.ArgumentParser):
			
 
				-        pass
			
 
				-    
			
 
				-    @classmethod
			
 
				-    def build_preprocess_fn(cls, args, train):
			
 
				-        (...)
			
 
				-    
			
 
				-    @classmethod
			
 
				-    def build_collate_fn(cls, args: argparse.Namespace):
			
 
				-        (...)
			
 
				-
			
 
				-    @classmethod
			
 
				-    def build_model(cls, args):
			
 
				-        (...)
			
 
				-    
			
 
				-    @classmethod
			
 
				-    def main(cls, args):
			
 
				-        (...)
			
 
				-```
			
 
				-- add_task_arguments：添加特定`Task`需要的参数
			
 
				-- build_preprocess_fn：定义如何处理对样本进行预处理
			
 
				-- build_collate_fn：定义如何将多个样本组成一个`batch`
			
 
				-- build_model：定义模型
			
 
				-- main：训练入口，通过`Task.main()`来启动训练
			
 
				-
			
 
				-下面我们将以语音识别任务为例，介绍如何定义一个新的`Task`，具体代码见`funasr/tasks/asr.py`中的`ASRTask`。 定义新的`Task`的过程，其实就是根据任务需求，重定义上述函数的过程。
			
 
				-- add_task_arguments
			
 
				-```python
			
 
				-@classmethod
			
 
				-def add_task_arguments(cls, parser: argparse.ArgumentParser):
			
 
				-    group = parser.add_argument_group(description="Task related")
			
 
				-    group.add_argument(
			
 
				-        "--token_list",
			
 
				-        type=str_or_none,
			
 
				-        default=None,
			
 
				-        help="A text mapping int-id to token",
			
 
				-    )
			
 
				-    (...)
			
 
				-```
			
 
				-对于语音识别任务，需要的特定参数包括`token_list`等。根据不同任务的特定需求，用户可以在此函数中定义相应的参数。
			
 
				-
			
 
				-- build_preprocess_fn
			
 
				-```python
			
 
				-@classmethod
			
 
				-def build_preprocess_fn(cls, args, train):
			
 
				-    if args.use_preprocessor:
			
 
				-        retval = CommonPreprocessor(
			
 
				-                    train=train,
			
 
				-                    token_type=args.token_type,
			
 
				-                    token_list=args.token_list,
			
 
				-                    bpemodel=args.bpemodel,
			
 
				-                    non_linguistic_symbols=args.non_linguistic_symbols,
			
 
				-                    text_cleaner=args.cleaner,
			
 
				-                    ...
			
 
				-                )
			
 
				-    else:
			
 
				-        retval = None
			
 
				-    return retval
			
 
				-```
			
 
				-该函数定义了如何对样本进行预处理。具体地，语音识别任务的输入包括音频和抄本。对于音频，在此实现了(可选)对音频加噪声，加混响等功能；对于抄本，在此实现了(可选)根据bpe处理抄本，将抄本映射成`tokenid`等功能。用户可以自己选择需要对样本进行的预处理操作，实现方法可以参考`CommonPreprocessor`。
			
 
				-
			
 
				-- build_collate_fn
			
 
				-```python
			
 
				-@classmethod
			
 
				-def build_collate_fn(cls, args, train):
			
 
				-    return CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
			
 
				-```
			
 
				-该函数定义了如何将多个样本组成一个`batch`。对于语音识别任务，在此实现的是将不同的音频和抄本，通过`padding`的方式来得到等长的数据。具体地，我们默认用`0.0`来作为音频的填充值，用`-1`作为抄本的默认填充值。用户可以在此定义不同的组`batch`操作，实现方法可以参考`CommonCollateFn`。
			
 
				-
			
 
				-- build_model
			
 
				-```python
			
 
				-@classmethod
			
 
				-def build_model(cls, args, train):
			
 
				-    with open(args.token_list, encoding="utf-8") as f:
			
 
				-        token_list = [line.rstrip() for line in f]
			
 
				-        vocab_size = len(token_list)
			
 
				-        frontend = frontend_class(**args.frontend_conf)
			
 
				-        specaug = specaug_class(**args.specaug_conf)
			
 
				-        normalize = normalize_class(**args.normalize_conf)
			
 
				-        preencoder = preencoder_class(**args.preencoder_conf)
			
 
				-        encoder = encoder_class(input_size=input_size, **args.encoder_conf)
			
 
				-        postencoder = postencoder_class(input_size=encoder_output_size, **args.postencoder_conf)
			
 
				-        decoder = decoder_class(vocab_size=vocab_size, encoder_output_size=encoder_output_size,  **args.decoder_conf)
			
 
				-        ctc = CTC(odim=vocab_size, encoder_output_size=encoder_output_size, **args.ctc_conf)
			
 
				-        model = model_class(
			
 
				-            vocab_size=vocab_size,
			
 
				-            frontend=frontend,
			
 
				-            specaug=specaug,
			
 
				-            normalize=normalize,
			
 
				-            preencoder=preencoder,
			
 
				-            encoder=encoder,
			
 
				-            postencoder=postencoder,
			
 
				-            decoder=decoder,
			
 
				-            ctc=ctc,
			
 
				-            token_list=token_list,
			
 
				-            **args.model_conf,
			
 
				-        )
			
 
				-    return model
			
 
				-```
			
 
				-该函数定义了具体的模型。对于不同的语音识别模型，往往可以共用同一个语音识别`Task`，额外需要做的是在此函数中定义特定的模型。例如，这里给出的是一个标准的encoder-decoder结构的语音识别模型。具体地，先定义该模型的各个模块，包括encoder，decoder等，然后在将这些模块组合在一起得到一个完整的模型。在FunASR中，模型需要继承`AbsESPnetModel`，其具体代码见`funasr/train/abs_espnet_model.py`，主要需要实现的是`forward`函数。
			
 
				-
			
 
				-下面我们将以`SANMEncoder`为例，介绍如何在定义模型的时候，使用自定义的`encoder`来作为模型的组成部分，其具体的代码见`funasr/models/encoder/sanm_encoder.py`。对于自定义的`encoder`，除了需要继承通用的`encoder`类`AbsEncoder`外，还需要自定义`forward`函数，实现`encoder`的前向计算。在定义完`encoder`后，还需要在`Task`中对其进行注册，下面给出了相应的代码示例：
			
 
				-```python
			
 
				-encoder_choices = ClassChoices(
			
 
				-    "encoder",
			
 
				-    classes=dict(
			
 
				-        conformer=ConformerEncoder,
			
 
				-        transformer=TransformerEncoder,
			
 
				-        rnn=RNNEncoder,
			
 
				-        sanm=SANMEncoder,
			
 
				-        sanm_chunk_opt=SANMEncoderChunkOpt,
			
 
				-        data2vec_encoder=Data2VecEncoder,
			
 
				-        mfcca_enc=MFCCAEncoder,
			
 
				-    ),
			
 
				-    type_check=AbsEncoder,
			
 
				-    default="rnn",
			
 
				-)
			
 
				-```
			
 
				-可以看到，`sanm=SANMEncoder`将新定义的`SANMEncoder`作为了`encoder`的一种可选项，当用户在配置文件中指定`encoder`为`sanm`时，即会相应地将`SANMEncoder`作为模型的`encoder`模块。
			
--- a/docs_cn/conf.py
+++ b/docs_cn/conf.py
@@ -1,67 +0,0 @@
 
				-# Configuration file for the Sphinx documentation builder.
			
 
				-#
			
 
				-# This file only contains a selection of the most common options. For a full
			
 
				-# list see the documentation:
			
 
				-# https://www.sphinx-doc.org/en/master/usage/configuration.html
			
 
				-
			
 
				-# -- Path setup --------------------------------------------------------------
			
 
				-
			
 
				-# If extensions (or modules to document with autodoc) are in another directory,
			
 
				-# add these directories to sys.path here. If the directory is relative to the
			
 
				-# documentation root, use os.path.abspath to make it absolute, like shown here.
			
 
				-#
			
 
				-# import os
			
 
				-# import sys
			
 
				-# sys.path.insert(0, os.path.abspath('.'))
			
 
				-
			
 
				-
			
 
				-# -- Project information -----------------------------------------------------
			
 
				-
			
 
				-project = 'FunASR'
			
 
				-copyright = '2022, Speech Lab, Alibaba Group'
			
 
				-author = 'Speech Lab, Alibaba Grou'
			
 
				-
			
 
				-
			
 
				-# -- General configuration ---------------------------------------------------
			
 
				-
			
 
				-# Add any Sphinx extension module names here, as strings. They can be
			
 
				-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
			
 
				-# ones.
			
 
				-extensions = [
			
 
				-    "nbsphinx",
			
 
				-    "sphinx.ext.autodoc",
			
 
				-    'sphinx.ext.napoleon',
			
 
				-    'sphinx.ext.viewcode',
			
 
				-    "sphinx.ext.mathjax",
			
 
				-    "sphinx.ext.todo",
			
 
				-    # "sphinxarg.ext",
			
 
				-    "sphinx_markdown_tables",
			
 
				-    'recommonmark',
			
 
				-    'sphinx_rtd_theme',
			
 
				-]
			
 
				-
			
 
				-# Add any paths that contain templates here, relative to this directory.
			
 
				-templates_path = ['_templates']
			
 
				-
			
 
				-source_suffix = [".rst", ".md"]
			
 
				-
			
 
				-# List of patterns, relative to source directory, that match files and
			
 
				-# directories to ignore when looking for source files.
			
 
				-# This pattern also affects html_static_path and html_extra_path.
			
 
				-exclude_patterns = []
			
 
				-
			
 
				-# The name of the Pygments (syntax highlighting) style to use.
			
 
				-pygments_style = "sphinx"
			
 
				-
			
 
				-# -- Options for HTML output -------------------------------------------------
			
 
				-
			
 
				-# The theme to use for HTML and HTML Help pages.  See the documentation for
			
 
				-# a list of builtin themes.
			
 
				-#
			
 
				-
			
 
				-html_theme = "sphinx_rtd_theme"
			
 
				-
			
 
				-# Add any paths that contain custom static files (such as style sheets) here,
			
 
				-# relative to this directory. They are copied after the builtin static files,
			
 
				-# so a file named "default.css" will overwrite the builtin "default.css".
			
 
				-html_static_path = ['_static']
			
--- a/docs_cn/get_started.md
+++ b/docs_cn/get_started.md
@@ -1,131 +0,0 @@
 
				-# 快速开始
			
 
				-在此我们将以"使用AISHELL-1数据集，从随机初始化训练一个paraformer模型"为例，介绍如何使用FunASR。根据这个例子，用户可以类似地使用别的数据集（如AISHELL-2数据集等）训练别的模型（如conformer，transformer等）。
			
 
				-
			
 
				-## 整体介绍
			
 
				-
			
 
				-我们提供了`egs/aishell/paraformer/run.sh`来实现使用AISHELL-1数据集训练一个paraformer模型。该脚本包含5个阶段，包括从数据处理到训练解码等整个流程，同时提供了单/多GPU训练和CPU/GPU解码。在详细介绍每个阶段之前，我们先对用户需要手动设置的一些参数进行说明。
			
 
				-- `CUDA_VISIBLE_DEVICES`: 可用的GPU列表
			
 
				-- `gpu_num`: 用于训练的GPU数量
			
 
				-- `gpu_inference`: 是否使用GPU进行解码
			
 
				-- `njob`: for CPU decoding, indicating the total number of CPU jobs; for GPU decoding, indicating the number of jobs on each GPU. 对于CPU解码，表示解码任务数；对于GPU解码
			
 
				-- `data_aishell`: AISHELL-1原始数据的路径
			
 
				-- `feats_dir`: 经过处理得到的特征的保存路径
			
 
				-- `nj`: 数据处理时的并行任务数
			
 
				-- `speed_perturb`: 变速设置
			
 
				-- `exp_dir`: 实验结果的保存路径
			
 
				-- `tag`: 实验结果目录的后缀名
			
 
				-
			
 
				-## 阶段 0： 数据准备
			
 
				-本阶段用于处理原始的AISHELL-1数据，并生成相应的`wav.scp`和`text`，保存在`$feats_dir/data/xxx`目录下，这里的`xxx`表示`train`, `dev` 或 `test`（下同）。 这里我们假设用户已经下载好了AISHELL-1数据集。如果没有，用户可以在[这里](https://www.openslr.org/33/) 下载数据，并将`$data_aishell`设置为相应的路径。下面给出生成的`wav.scp`和`text`的示例：
			
 
				-本阶段用于处理原始的AISHELL-1数据，并生成相应的`wav.scp`和`text`，保存在`$feats_dir/data/xxx`目录下，这里的`xxx`表示`train`, `dev` 或 `test`（下同）。 这里我们假设用户已经下载好了AISHELL-1数据集。如果没有，用户可以在[这里](https://www.openslr.org/33/) 下载数据，并将`$data_aishell`设置为相应的路径。下面给出生成的`wav.scp`和`text`的示例：
			
 
				-* `wav.scp`
			
 
				-```
			
 
				-BAC009S0002W0122 /nfs/ASR_DATA/AISHELL-1/data_aishell/wav/train/S0002/BAC009S0002W0122.wav
			
 
				-BAC009S0002W0123 /nfs/ASR_DATA/AISHELL-1/data_aishell/wav/train/S0002/BAC009S0002W0123.wav
			
 
				-BAC009S0002W0124 /nfs/ASR_DATA/AISHELL-1/data_aishell/wav/train/S0002/BAC009S0002W0124.wav
			
 
				-...
			
 
				-```
			
 
				-* `text`
			
 
				-```
			
 
				-BAC009S0002W0122 而 对 楼 市 成 交 抑 制 作 用 最 大 的 限 购
			
 
				-BAC009S0002W0123 也 成 为 地 方 政 府 的 眼 中 钉
			
 
				-BAC009S0002W0124 自 六 月 底 呼 和 浩 特 市 率 先 宣 布 取 消 限 购 后
			
 
				-...
			
 
				-```
			
 
				-可以看到，这两个文件均包括两列，第一列是音频的id，第二列分别是音频路径和音频对应的抄本。
			
 
				-
			
 
				-## 阶段 1：特征提取
			
 
				-本阶段将会基于原始的音频`wav.scp`提取FBank特征。如果指定了参数`speed_perturb`，则会额外对音频进行变速来实现数据增强。用户可以设置`nj`参数来控制特征提取的并行任务数。处理后的特征保存在目录`$feats_dir/dump/xxx/ark`下，相应的`feats.scp`文件路径为`$feats_dir/dump/xxx/feats.scp`。下面给出`feats.scp`的示例：
			
 
				-* `feats.scp`
			
 
				-```
			
 
				-...
			
 
				-BAC009S0002W0122_sp0.9 /nfs/funasr_data/aishell-1/dump/fbank/train/ark/feats.16.ark:592751055
			
 
				-...
			
 
				-```
			
 
				-注意，该文件的样本顺序已经进行了随机打乱。该文件包括两列，第一列是音频的id，第二列是对应的kaldi-ark格式的特征。另外，在此阶段还会生成训练需要用到的`speech_shape`和`text_shape`两个文件，记录了每个样本的特征维度和抄本长度。下面给出这两个文件的示例：
			
 
				-* `speech_shape`
			
 
				-```
			
 
				-...
			
 
				-BAC009S0002W0122_sp0.9 665,80
			
 
				-...
			
 
				-```
			
 
				-* `text_shape`
			
 
				-```
			
 
				-...
			
 
				-BAC009S0002W0122_sp0.9 15
			
 
				-...
			
 
				-```
			
 
				-可以看到，这两个文件均包括两列，第一列是音频的id，第二列是对应的特征的维度和抄本的长度。
			
 
				-
			
 
				-## 阶段 2：字典准备
			
 
				-本阶段用于生成字典，用于训练过程中，字符到整数索引之间的映射。生成的字典文件的路径为`$feats_dir/data/zh_toekn_list/char/tokens.txt`。下面给出`tokens.txt`的示例：
			
 
				-* `tokens.txt`
			
 
				-```
			
 
				-<blank>
			
 
				-<s>
			
 
				-</s>
			
 
				-一
			
 
				-丁
			
 
				-...
			
 
				-龚
			
 
				-龟
			
 
				-<unk>
			
 
				-```
			
 
				-* `<blank>`: 表示CTC训练中的blank
			
 
				-* `<s>`: 表示句子的起始符
			
 
				-* `</s>`: 表示句子的终止符
			
 
				-* `<unk>`: 表示字典外的字符
			
 
				-
			
 
				-## 阶段 3：训练
			
 
				-本阶段对应模型的训练。在开始训练之前，需要指定实验结果保存目录`exp_dir`，训练可用GPU`CUDA_VISIBLE_DEVICES`和训练的gpu数量`gpu_num`。默认情况下，最好的`$keep_nbest_models`模型结果会被平均从而来获取更好的性能。
			
 
				-
			
 
				-* DDP Training
			
 
				-
			
 
				-我们提供了分布式训练（DDP）功能，具体的细节可以在[这里](https://pytorch.org/tutorials/intermediate/ddp_tutorial.html) 找到。为了开启分布式训练，需要设置`gpu_num`大于1。例如，设置`CUDA_VISIBLE_DEVICES=0,1,5,6,7`，`gpu_num=3`，则编号为0，1和5的GPU会被用于训练。
			
 
				-
			
 
				-* DataLoader
			
 
				-
			
 
				-我们提供了基于[Pytorch Iterable-style DataPipes](https://pytorch.org/data/beta/torchdata.datapipes.iter.html) 实现的大数据DataLoader，用户可以通过设置`dataset_type=large`来启用。 
			
 
				-
			
 
				-* Configuration
			
 
				-
			
 
				-训练相关的参数，包括模型，优化器，数据等，均可以通过`conf`目录下的config文件指定。同时，用户也可以直接在`run.sh`脚本中指定相关参数。请避免在config文件和`run.sh`脚本中设置相同的参数，以免造成歧义。
			
 
				-
			
 
				-* Training Steps
			
 
				-
			
 
				-我们提供了两种方式来控制训练的总步数，对应的参数分别为`max_epoch`和`max_update`。`max_epoch`表示训练的最大epoch数，`max_update`表示训练的最大迭代次数。如果这两个参数同时被指定，则一旦训练步数到达其中任意一个参数，训练结束。
			
 
				-
			
 
				-* Tensorboard
			
 
				-
			
 
				-用户可以通过tensorboard来观察训练过程中的损失，学习率等。可以通过下述指定来实现：
			
 
				-```
			
 
				-tensorboard --logdir ${exp_dir}/exp/${model_dir}/tensorboard/train
			
 
				-```
			
 
				-
			
 
				-## 阶段 4: 解码
			
 
				-本阶段用于解码得到识别结果，同时计算CER来验证训练得到的模型性能。
			
 
				-
			
 
				-* Mode Selection
			
 
				-
			
 
				-由于我们提供了paraformer，uniasr和conformer等模型，因此在解码时，需要指定相应的解码模式。对应的参数为`mode`，相应的可选设置为`asr/paraformer/uniasr`等。
			
 
				-
			
 
				-* Configuration
			
 
				-
			
 
				-我们提供了ctc解码, attention解码和ctc-attention混合解码。这几种解码方式可以通过`conf`下的解码配置文件中的`ctc_weight`参数来指定。具体的，`ctc_weight=1.0`表示CTC解码, `ctc_weight=0.0`表示attention解码, `0.0<ctc_weight<1.0`表示ctc-attention混合解码。
			
 
				-
			
 
				-* CPU/GPU Decoding
			
 
				-
			
 
				-我们提供CPU/GPU解码。对于CPU解码，用户需要设置`gpu_inference=False`，同时设置`njob`来指定并行解码任务数量。对于GPU解码，用户需要设置`gpu_inference=True`，设置`gpuid_list`来指定哪些GPU用于解码，设置`njobs`来指定每张GPU上的并行解码任务数量。
			
 
				-
			
 
				-* Performance
			
 
				-
			
 
				-我们采用`CER`来验证模型的性能。解码结果保存在`$exp_dir/exp/$model_dir/$decoding_yaml_name/$average_model_name/$dset`，具体包括`text.cer`和`text.cer.txt`两个文件。`text.cer`中的内容为识别结果和对应抄本之间的比较，`text.cer.txt`记录了最终的`CER`。下面给出`text.cer`的示例:
			
 
				-* `text.cer`
			
 
				-```
			
 
				-...
			
 
				-BAC009S0764W0213(nwords=11,cor=11,ins=0,del=0,sub=0) corr=100.00%,cer=0.00%
			
 
				-ref:    构 建 良 好 的 旅 游 市 场 环 境
			
 
				-res:    构 建 良 好 的 旅 游 市 场 环 境
			
 
				-...
			
 
				-```
			
 
				-
			
--- a/docs_cn/images/DeepScience.png
+++ b/docs_cn/images/DeepScience.png
--- a/docs_cn/images/dingding.jpg
+++ b/docs_cn/images/dingding.jpg
--- a/docs_cn/images/funasr_logo.jpg
+++ b/docs_cn/images/funasr_logo.jpg
--- a/docs_cn/images/wechat.png
+++ b/docs_cn/images/wechat.png
--- a/docs_cn/index.rst
+++ b/docs_cn/index.rst
@@ -1,33 +0,0 @@
 
				-.. Funasr documentation master file, created by
			
 
				-   sphinx-quickstart on Tues Dec 6 19:05:00 2022.
			
 
				-   You can adapt this file completely to your liking, but it should at least
			
 
				-   contain the root `toctree` directive.
			
 
				-
			
 
				-FunASR: A Fundamental End-to-End Speech Recognition Toolkit
			
 
				-============================================================
			
 
				-.. image:: ./images/funasr_logo.jpg
			
 
				-
			
 
				-FunASR致力于在语音识别的学术研究和工业应用之间构建起一座桥梁。通过在 `ModelScope <https://www.modelscope.cn/models?page=1&tasks=auto-speech-recognition>`_ 上发布工业级语音识别模型以及支持相关的训练和微调，研究者和开发者们可以更方便地进行语音识别模型的研究和生产，促进语音识别生态的发展。ASR for Fun!
			
 
				-
			
 
				-.. toctree::
			
 
				-   :maxdepth: 1
			
 
				-   :caption: 教程:
			
 
				-
			
 
				-   ./installation.md
			
 
				-   ./papers.md
			
 
				-   ./get_started.md
			
 
				-   ./build_task.md
			
 
				-
			
 
				-.. toctree::
			
 
				-   :maxdepth: 1
			
 
				-   :caption: ModelScope:
			
 
				-
			
 
				-   ./modelscope_models.md
			
 
				-   ./modelscope_usages.md
			
 
				-
			
 
				-Indices and tables
			
 
				-==================
			
 
				-
			
 
				-* :ref:`genindex`
			
 
				-* :ref:`modindex`
			
 
				-* :ref:`search`
			
--- a/docs_cn/installation.md
+++ b/docs_cn/installation.md
@@ -1,37 +0,0 @@
 
				-# 安装
			
 
				-FunASR的安装十分便捷，下面将给出详细的安装步骤：
			
 
				-
			
 
				-- 安装Conda并创建虚拟环境
			
 
				-``` sh
			
 
				-wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh
			
 
				-sh Miniconda3-latest-Linux-x86_64.sh
			
 
				-source ~/.bashrc
			
 
				-conda create -n funasr python=3.7
			
 
				-conda activate funasr
			
 
				-```
			
 
				-
			
 
				-- 安装Pytorch (版本 >= 1.7.0):
			
 
				-
			
 
				-```sh
			
 
				-pip install torch torchaudio
			
 
				-```
			
 
				-
			
 
				-关于更多的版本, 请参照 [https://pytorch.org/get-started/locally](https://pytorch.org/get-started/locally)
			
 
				-
			
 
				-- 安装 ModelScope
			
 
				-
			
 
				-对于国内用户，可以通过配置下述镜像源来加快下载速度
			
 
				-```sh
			
 
				-pip config set global.index-url https://mirror.sjtu.edu.cn/pypi/web/simple
			
 
				-```
			
 
				-
			
 
				-安装或更新ModelScope
			
 
				-``` sh
			
 
				-pip install "modelscope[audio_asr]" --upgrade -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
			
 
				-```
			
 
				-
			
 
				-- 下载FunASR仓库，并安装剩余所需依赖
			
 
				-``` sh
			
 
				-git clone https://github.com/alibaba/FunASR.git && cd FunASR
			
 
				-pip install --editable ./
			
 
				-```
			
--- a/docs_cn/make.bat
+++ b/docs_cn/make.bat
@@ -1,35 +0,0 @@
 
				-@ECHO OFF

			
 
				-

			
 
				-pushd %~dp0

			
 
				-

			
 
				-REM Command file for Sphinx documentation

			
 
				-

			
 
				-if "%SPHINXBUILD%" == "" (

			
 
				-	set SPHINXBUILD=sphinx-build

			
 
				-)

			
 
				-set SOURCEDIR=source

			
 
				-set BUILDDIR=build

			
 
				-

			
 
				-%SPHINXBUILD% >NUL 2>NUL

			
 
				-if errorlevel 9009 (

			
 
				-	echo.

			
 
				-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx

			
 
				-	echo.installed, then set the SPHINXBUILD environment variable to point

			
 
				-	echo.to the full path of the 'sphinx-build' executable. Alternatively you

			
 
				-	echo.may add the Sphinx directory to PATH.

			
 
				-	echo.

			
 
				-	echo.If you don't have Sphinx installed, grab it from

			
 
				-	echo.https://www.sphinx-doc.org/

			
 
				-	exit /b 1

			
 
				-)

			
 
				-

			
 
				-if "%1" == "" goto help

			
 
				-

			
 
				-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

			
 
				-goto end

			
 
				-

			
 
				-:help

			
 
				-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

			
 
				-

			
 
				-:end

			
 
				-popd

			
--- a/docs_cn/modelscope_models.md
+++ b/docs_cn/modelscope_models.md
@@ -1,34 +0,0 @@
 
				-# ModelScope上的预训练模型
			
 
				-
			
 
				-## 模型许可证
			
 
				--  Apache License 2.0
			
 
				-
			
 
				-## 模型库
			
 
				-这里我们提供了一些基于不同数据集训练得到的几种预训练模型，所有的预训练模型和更多细节可以参见 [ModelScope](https://www.modelscope.cn/models?page=1&tasks=auto-speech-recognition) 。
			
 
				-
			
 
				-| Datasets  | Hours |     Model      | Online/Offline | Language | Framework | Checkpoint |
			
 
				-|:-----:|:-----:|:--------------:|:--------------:| :---: | :---: | --- |
			
 
				-| Alibaba Speech Data | 60000 |   Paraformer   |   Offline   |       CN       | Pytorch |[speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) |
			
 
				-| Alibaba Speech Data | 50000 |   Paraformer   |   Offline   |       CN       | Tensorflow |[speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1/summary) |
			
 
				-| Alibaba Speech Data | 50000 |   Paraformer   |   Offline   |       CN       | Tensorflow |[speech_paraformer_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1](https://www.modelscope.cn/models/damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1/summary) |
			
 
				-| Alibaba Speech Data | 50000 |   Paraformer   |   Online    |       CN       | Tensorflow |[speech_paraformer_asr_nat-zh-cn-16k-common-vocab3444-tensorflow1-online](http://www.modelscope.cn/models/damo/speech_paraformer_asr_nat-zh-cn-16k-common-vocab3444-tensorflow1-online/summary) |
			
 
				-| Alibaba Speech Data | 50000 |    UniASR     |   Online    |       CN       | Tensorflow |[speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-online](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-online/summary) |
			
 
				-| Alibaba Speech Data | 50000 |    UniASR     |   Offline   |       CN       | Tensorflow |[speech_UniASR-large_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline](https://www.modelscope.cn/models/damo/speech_UniASR-large_asr_2pass-zh-cn-16k-common-vocab8358-tensorflow1-offline/summary) |
			
 
				-| Alibaba Speech Data | 50000 |    UniASR     |   Online    |     CN&EN      | Tensorflow |[speech_UniASR_asr_2pass-cn-en-moe-16k-vocab8358-tensorflow1-online](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-cn-en-moe-16k-vocab8358-tensorflow1-online/summary) |
			
 
				-| Alibaba Speech Data | 50000 |    UniASR     |   Offline   |     CN&EN      | Tensorflow |[speech_UniASR_asr_2pass-cn-en-moe-16k-vocab8358-tensorflow1-offline](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-cn-en-moe-16k-vocab8358-tensorflow1-offline/summary) |
			
 
				-| Alibaba Speech Data | 20000 |    UniASR     |   Online    |   CN-Accent    | Tensorflow |[speech_UniASR_asr_2pass-cn-dialect-16k-vocab8358-tensorflow1-online](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-cn-dialect-16k-vocab8358-tensorflow1-online/summary) |
			
 
				-| Alibaba Speech Data | 20000 |    UniASR     |    Offline     |   CN-Accent    | Tensorflow |[speech_UniASR_asr_2pass-cn-dialect-16k-vocab8358-tensorflow1-offline](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-cn-dialect-16k-vocab8358-tensorflow1-offline/summary) |
			
 
				-| Alibaba Speech Data | 30000 | Paraformer-8K |     Online     |       CN       | Tensorflow |[speech_paraformer_asr_nat-zh-cn-8k-common-vocab3444-tensorflow1-online](https://www.modelscope.cn/models/damo/speech_paraformer_asr_nat-zh-cn-8k-common-vocab3444-tensorflow1-online/summary) |
			
 
				-| Alibaba Speech Data |  30000   | Paraformer-8K |    Offline     |       CN       | Tensorflow |[speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1](https://www.modelscope.cn/models/damo/speech_paraformer_asr_nat-zh-cn-8k-common-vocab8358-tensorflow1/summary) |
			
 
				-| Alibaba Speech Data |  30000   | Paraformer-8K |     Online     |       CN       | Pytorch |[speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary) |
			
 
				-| Alibaba Speech Data |  30000   | Paraformer-8K |    Offline     |       CN       | Pytorch |[speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/summary) |
			
 
				-| Alibaba Speech Data |  30000   |   UniASR-8K   |     Online     |       CN       | Tensorflow |[speech_UniASR_asr_2pass-zh-cn-8k-common-vocab8358-tensorflow1-online](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab8358-tensorflow1-online/summary) |
			
 
				-| Alibaba Speech Data |  30000   |   UniASR-8K   |    Offline     |       CN       | Tensorflow |[speech_UniASR_asr_2pass-zh-cn-8k-common-vocab8358-tensorflow1-offline](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab8358-tensorflow1-offline/summary) |
			
 
				-| Alibaba Speech Data |  30000   |   UniASR-8K   |     Online     |       CN       | Pytorch |[speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-online/summary) |
			
 
				-| Alibaba Speech Data |  30000   |   UniASR-8K   |    Offline     |       CN       | Pytorch |[speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline](https://www.modelscope.cn/models/damo/speech_UniASR_asr_2pass-zh-cn-8k-common-vocab3445-pytorch-offline/summary) |
			
 
				-| AISHELL-1 |  178  |   Paraformer   | Offline |       CN       | Pytorch | [speech_paraformer_asr_nat-aishell1-pytorch](https://www.modelscope.cn/models/damo/speech_paraformer_asr_nat-aishell1-pytorch/summary) |
			
 
				-| AISHELL-2 | 1000  |   Paraformer   |   Offline   |       CN       | Pytorch | [speech_paraformer_asr_nat-aishell2-pytorch](https://www.modelscope.cn/models/damo/speech_paraformer_asr_nat-aishell2-pytorch/summary) |
			
 
				-| AISHELL-1 |  178  | ParaformerBert |   Offline   |       CN       | Pytorch | [speech_paraformerbert_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch](https://modelscope.cn/models/damo/speech_paraformerbert_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch/summary) |
			
 
				-| AISHELL-2 | 1000  | ParaformerBert |   Offline   |       CN       | Pytorch | [speech_paraformerbert_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch](https://modelscope.cn/models/damo/speech_paraformerbert_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch/summary) |
			
 
				-| AISHELL-1 |  178  |   Conformer   |    Offline     |       CN       | Pytorch | [speech_conformer_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch](https://modelscope.cn/models/damo/speech_conformer_asr_nat-zh-cn-16k-aishell1-vocab4234-pytorch/summary) |
			
 
				-| AISHELL-2 | 1000  |   Conformer   |    Offline     |       CN       | Pytorch | [speech_conformer_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch](https://modelscope.cn/models/damo/speech_conformer_asr_nat-zh-cn-16k-aishell2-vocab5212-pytorch/summary) |
			
--- a/docs_cn/modelscope_usages.md
+++ b/docs_cn/modelscope_usages.md
@@ -1,52 +0,0 @@
 
				-# ModelScope 使用说明
			
 
				-ModelScope是阿里巴巴推出的开源模型即服务共享平台，为广大学术界用户和工业界用户提供灵活、便捷的模型应用支持。具体的使用方法和开源模型可以参见[ModelScope](https://www.modelscope.cn/models?page=1&tasks=auto-speech-recognition) 。在语音方向，我们提供了自回归/非自回归语音识别，语音预训练，标点预测等模型，用户可以方便使用。
			
 
				-
			
 
				-## 整体介绍
			
 
				-我们在`egs_modelscope` 目录下提供了不同模型的使用方法，支持直接用我们提供的模型进行推理，同时也支持将我们提供的模型作为预训练好的初始模型进行微调。下面，我们将以`egs_modelscope/asr/paraformer/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch`目录中提供的模型来进行介绍，包括`infer.py`，`finetune.py`和`infer_after_finetune.py`，对应的功能如下：
			
 
				-- `infer.py`: 基于我们提供的模型，对指定的数据集进行推理
			
 
				-- `finetune.py`: 将我们提供的模型作为初始模型进行微调
			
 
				-- `infer_after_finetune.py`: 基于微调得到的模型，对指定的数据集进行推理
			
 
				-
			
 
				-## 模型推理
			
 
				-我们提供了`infer.py`来实现模型推理。基于此文件，用户可以基于我们提供的模型，对指定的数据集进行推理，得到相应的识别结果。如果给定了抄本，则会同时计算`CER`。在开始推理前，用户可以指定如下参数来修改推理配置：
			
 
				-* `data_dir`：数据集目录。目录下应该包括音频列表文件`wav.scp`和抄本文件`text`(可选)，具体格式可以参见[快速开始](./get_started.md)中的说明。如果`text`文件存在，则会相应的计算CER，否则会跳过。
			
 
				-* `output_dir`：推理结果保存目录
			
 
				-* `batch_size`：推理时的batch大小
			
 
				-* `ctc_weight`：部分模型包含CTC模块，可以设置该参数来指定推理时，CTC模块的权重
			
 
				-
			
 
				-除了直接在`infer.py`中设置参数外，用户也可以通过手动修改模型下载目录下的`decoding.yaml`文件中的参数来修改推理配置。
			
 
				-
			
 
				-## 模型微调
			
 
				-我们提供了`finetune.py`来实现模型微调。基于此文件，用户可以基于我们提供的模型作为初始模型，在指定的数据集上进行微调，从而在特征领域取得更好的性能。在微调开始前，用户可以指定如下参数来修改微调配置：
			
 
				-* `data_path`：数据目录。该目录下应该包括存放训练集数据的`train`目录和存放验证集数据的`dev`目录。每个目录中需要包括音频列表文件`wav.scp`和抄本文件`text`
			
 
				-* `output_dir`：微调结果保存目录
			
 
				-* `dataset_type`：对于小数据集，设置为`small`；当数据量大于1000小时时，设置为`large`
			
 
				-* `batch_bins`：batch size，如果dataset_type设置为`small`，batch_bins单位为fbank特征帧数；如果dataset_type设置为`large`，batch_bins单位为毫秒
			
 
				-* `max_epoch`：最大的训练轮数
			
 
				-
			
 
				-以下参数也可以进行设置。但是如果没有特别的需求，可以忽略，直接使用我们给定的默认值：
			
 
				-* `accum_grad`：梯度累积
			
 
				-* `keep_nbest_models`：选择性能最好的`keep_nbest_models`个模型的参数进行平均，得到性能更好的模型
			
 
				-* `optim`：设置优化器
			
 
				-* `lr`：设置学习率
			
 
				-* `scheduler`：设置学习率调整策略
			
 
				-* `scheduler_conf`：学习率调整策略的相关参数
			
 
				-* `specaug`：设置谱增广
			
 
				-* `specaug_conf`：谱增广的相关参数
			
 
				-
			
 
				-除了直接在`finetune.py`中设置参数外，用户也可以通过手动修改模型下载目录下的`finetune.yaml`文件中的参数来修改微调配置。
			
 
				-
			
 
				-## 基于微调后的模型推理
			
 
				-我们提供了`infer_after_finetune.py`来实现基于用户自己微调得到的模型进行推理。基于此文件，用户可以基于微调后的模型，对指定的数据集进行推理，得到相应的识别结果。如果给定了抄本，则会同时计算CER。在开始推理前，用户可以指定如下参数来修改推理配置：
			
 
				-* `data_dir`：数据集目录。目录下应该包括音频列表文件`wav.scp`和抄本文件`text`(可选)。如果`text`文件存在，则会相应的计算CER，否则会跳过。
			
 
				-* `output_dir`：推理结果保存目录
			
 
				-* `batch_size`：推理时的batch大小
			
 
				-* `ctc_weight`：部分模型包含CTC模块，可以设置该参数来指定推理时，CTC模块的权重
			
 
				-* `decoding_model_name`：指定用于推理的模型名
			
 
				-
			
 
				-以下参数也可以进行设置。但是如果没有特别的需求，可以忽略，直接使用我们给定的默认值：
			
 
				-* `modelscope_model_name`：微调时使用的初始模型名
			
 
				-* `required_files`：使用modelscope接口进行推理时需要用到的文件
			
 
				-
			
 
				-## 注意事项
			
 
				-部分模型可能在微调、推理时存在一些特有的参数，这部分参数可以在对应目录的`README.md`文件中找到具体用法。
			
--- a/docs_cn/papers.md
+++ b/docs_cn/papers.md
@@ -1,4 +0,0 @@
 
				-# 论文
			
 
				-
			
 
				-- [Universal ASR: Unifying Streaming and Non-Streaming ASR Using a Single Encoder-Decoder Model](https://arxiv.org/abs/2010.14099), arXiv preprint arXiv:2010.14099, 2020.
			
 
				-- [Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition](https://arxiv.org/abs/2206.08317), INTERSPEECH 2022.
			
--- a/funasr/runtime/python/benchmark_libtorch.md
+++ b/funasr/runtime/python/benchmark_libtorch.md
@@ -1,27 +1,32 @@
 
				-# Benchmark 
			
 
				+# CPU Benchmark (Libtorch)
			
 
				 
			
 
				+## Configuration
			
 
				 ### Data set:
			
 
				 Aishell1 [test set](https://www.openslr.org/33/) , the total audio duration is 36108.919 seconds.
			
 
				 
			
 
				 ### Tools
			
 
				-- Install ModelScope and FunASR
			
 
				-
			
 
				-    ```shell
			
 
				-    pip install "modelscope[audio_asr]" --upgrade -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
			
 
				-    git clone https://github.com/alibaba-damo-academy/FunASR.git && cd FunASR
			
 
				-    pip install --editable ./
			
 
				-    cd funasr/runtime/python/utils
			
 
				-    pip install -r requirements.txt
			
 
				-    ```
			
 
				-
			
 
				-- recipe
			
 
				-
			
 
				-    set the model, data path and output_dir
			
 
				-
			
 
				-    ```shell
			
 
				-    nohup bash test_rtf.sh &> log.txt &
			
 
				-    ```
			
 
				-
			
 
				+#### Install Requirements
			
 
				+Install ModelScope and FunASR
			
 
				+```shell
			
 
				+pip install -U modelscope funasr
			
 
				+# For the users in China, you could install with the command:
			
 
				+#pip install -U funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple
			
 
				+```
			
 
				+
			
 
				+Install requirements
			
 
				+```shell
			
 
				+git clone https://github.com/alibaba-damo-academy/FunASR.git && cd FunASR
			
 
				+cd funasr/runtime/python/utils
			
 
				+pip install -r requirements.txt
			
 
				+```
			
 
				+
			
 
				+#### Recipe
			
 
				+
			
 
				+set the model, data path and output_dir
			
 
				+
			
 
				+```shell
			
 
				+nohup bash test_rtf.sh &> log.txt &
			
 
				+```
			
 
				 
			
 
				 
			
 
				 ## [Paraformer-large](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary) 
			
--- a/funasr/runtime/python/benchmark_onnx.md
+++ b/funasr/runtime/python/benchmark_onnx.md
@@ -1,26 +1,32 @@
 
				-# Benchmark 
			
 
				+# CPU Benchmark (ONNX)
			
 
				 
			
 
				+## Configuration
			
 
				 ### Data set:
			
 
				 Aishell1 [test set](https://www.openslr.org/33/) , the total audio duration is 36108.919 seconds.
			
 
				 
			
 
				 ### Tools
			
 
				-- Install ModelScope and FunASR
			
 
				-
			
 
				-    ```shell
			
 
				-    pip install "modelscope[audio_asr]" --upgrade -f https://modelscope.oss-cn-beijing.aliyuncs.com/releases/repo.html
			
 
				-    git clone https://github.com/alibaba-damo-academy/FunASR.git && cd FunASR
			
 
				-    pip install --editable ./
			
 
				-    cd funasr/runtime/python/utils
			
 
				-    pip install -r requirements.txt
			
 
				-    ```
			
 
				-
			
 
				-- recipe
			
 
				-
			
 
				-    set the model, data path and output_dir
			
 
				-
			
 
				-    ```shell
			
 
				-    nohup bash test_rtf.sh &> log.txt &
			
 
				-    ```
			
 
				+#### Install Requirements
			
 
				+Install ModelScope and FunASR
			
 
				+```shell
			
 
				+pip install -U modelscope funasr
			
 
				+# For the users in China, you could install with the command:
			
 
				+#pip install -U funasr -i https://mirror.sjtu.edu.cn/pypi/web/simple
			
 
				+```
			
 
				+
			
 
				+Install requirements
			
 
				+```shell
			
 
				+git clone https://github.com/alibaba-damo-academy/FunASR.git && cd FunASR
			
 
				+cd funasr/runtime/python/utils
			
 
				+pip install -r requirements.txt
			
 
				+```
			
 
				+
			
 
				+#### Recipe
			
 
				+
			
 
				+set the model, data path and output_dir
			
 
				+
			
 
				+```shell
			
 
				+nohup bash test_rtf.sh &> log.txt &
			
 
				+```
			
 
				 
			
 
				 
			
 
				 ## [Paraformer-large](https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary)
		`@@ -0,0 +1 @@`
		`+../../funasr/runtime/python/benchmark_libtorch.md`