1 rok temu · 790bf54944
--- a/README.md
+++ b/README.md
@@ -115,7 +115,7 @@ res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
 
				                      hotword='魔搭')
			
 
				 print(res)
			
 
				 ```
			
 
				-Note: `model_hub`: represents the model repository, `ms` stands for selecting ModelScope download, `hf` stands for selecting Huggingface download.
			
 
				+Note: `hub`: represents the model repository, `ms` stands for selecting ModelScope download, `hf` stands for selecting Huggingface download.
			
 
				 
			
 
				 ### Speech Recognition (Streaming)
			
 
				 ```python
			
--- a/README_zh.md
+++ b/README_zh.md
@@ -111,7 +111,7 @@ res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
 
				             hotword='魔搭')
			
 
				 print(res)
			
 
				 ```
			
 
				-注：`model_hub`：表示模型仓库，`ms`为选择modelscope下载，`hf`为选择huggingface下载。
			
 
				+注：`hub`：表示模型仓库，`ms`为选择modelscope下载，`hf`为选择huggingface下载。
			
 
				 
			
 
				 ### 实时语音识别
			
 
				 
			
--- a/examples/industrial_data_pretraining/whisper/demo_from_openai.py
+++ b/examples/industrial_data_pretraining/whisper/demo_from_openai.py
@@ -0,0 +1,17 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- encoding: utf-8 -*-
			
 
				+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
			
 
				+#  MIT License  (https://opensource.org/licenses/MIT)
			
 
				+
			
 
				+from funasr import AutoModel
			
 
				+
			
 
				+# model = AutoModel(model="Whisper-small", hub="openai")
			
 
				+# model = AutoModel(model="Whisper-medium", hub="openai")
			
 
				+model = AutoModel(model="Whisper-large-v2", hub="openai")
			
 
				+# model = AutoModel(model="Whisper-large-v3", hub="openai")
			
 
				+
			
 
				+res = model.generate(
			
 
				+	language=None,
			
 
				+	task="transcribe",
			
 
				+	input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")
			
 
				+print(res)
			
--- a/examples/industrial_data_pretraining/whisper/infer_from_openai.sh
+++ b/examples/industrial_data_pretraining/whisper/infer_from_openai.sh
@@ -0,0 +1,24 @@
 
				+# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
			
 
				+#  MIT License  (https://opensource.org/licenses/MIT)
			
 
				+
			
 
				+# method1, inference from model hub
			
 
				+
			
 
				+# for more input type, please ref to readme.md
			
 
				+input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav"
			
 
				+
			
 
				+output_dir="./outputs/debug"
			
 
				+
			
 
				+#model="Whisper-small"
			
 
				+#model="Whisper-medium"
			
 
				+model="Whisper-large-v2"
			
 
				+#model="Whisper-large-v3"
			
 
				+hub="openai"
			
 
				+
			
 
				+device="cuda:0" # "cuda:0" for gpu0, "cuda:1" for gpu1, "cpu"
			
 
				+
			
 
				+python -m funasr.bin.inference \
			
 
				+++model=${model} \
			
 
				+++hub=${hub} \
			
 
				+++input="${input}" \
			
 
				+++output_dir="${output_dir}" \
			
 
				+++device="${device}" \
			
--- a/funasr/auto/auto_frontend.py
+++ b/funasr/auto/auto_frontend.py
@@ -31,7 +31,7 @@ class AutoFrontend:
 
				     def __init__(self, **kwargs):
			
 
				         assert "model" in kwargs
			
 
				         if "model_conf" not in kwargs:
			
 
				-            logging.info("download models from model hub: {}".format(kwargs.get("model_hub", "ms")))
			
 
				+            logging.info("download models from model hub: {}".format(kwargs.get("hub", "ms")))
			
 
				             kwargs = download_model(**kwargs)
			
 
				         
			
 
				         # build frontend
			
--- a/funasr/auto/auto_model.py
+++ b/funasr/auto/auto_model.py
@@ -143,7 +143,7 @@ class AutoModel:
 
				     def build_model(self, **kwargs):
			
 
				         assert "model" in kwargs
			
 
				         if "model_conf" not in kwargs:
			
 
				-            logging.info("download models from model hub: {}".format(kwargs.get("model_hub", "ms")))
			
 
				+            logging.info("download models from model hub: {}".format(kwargs.get("hub", "ms")))
			
 
				             kwargs = download_model(**kwargs)
			
 
				         
			
 
				         set_all_random_seed(kwargs.get("seed", 0))
			
@@ -180,7 +180,7 @@ class AutoModel:
 
				         
			
 
				         # build model
			
 
				         model_class = tables.model_classes.get(kwargs["model"])
			
 
				-        model = model_class(**kwargs, **kwargs["model_conf"], vocab_size=vocab_size)
			
 
				+        model = model_class(**kwargs, **kwargs.get("model_conf", {}), vocab_size=vocab_size)
			
 
				         model.to(device)
			
 
				         
			
 
				         # init_param
			
--- a/funasr/bin/compute_audio_cmvn.py
+++ b/funasr/bin/compute_audio_cmvn.py
@@ -18,7 +18,7 @@ def main_hydra(kwargs: DictConfig):
 
				 
			
 
				     assert "model" in kwargs
			
 
				     if "model_conf" not in kwargs:
			
 
				-        logging.info("download models from model hub: {}".format(kwargs.get("model_hub", "ms")))
			
 
				+        logging.info("download models from model hub: {}".format(kwargs.get("hub", "ms")))
			
 
				         kwargs = download_model(is_training=kwargs.get("is_training", True), **kwargs)
			
 
				     
			
 
				 
			
--- a/funasr/bin/train.py
+++ b/funasr/bin/train.py
@@ -35,7 +35,7 @@ def main_hydra(kwargs: DictConfig):
 
				 
			
 
				     assert "model" in kwargs
			
 
				     if "model_conf" not in kwargs:
			
 
				-        logging.info("download models from model hub: {}".format(kwargs.get("model_hub", "ms")))
			
 
				+        logging.info("download models from model hub: {}".format(kwargs.get("hub", "ms")))
			
 
				         kwargs = download_model(is_training=kwargs.get("is_training", True), **kwargs)
			
 
				     
			
 
				 
			
--- a/funasr/download/download_from_hub.py
+++ b/funasr/download/download_from_hub.py
@@ -2,13 +2,20 @@ import os
 
				 import json
			
 
				 from omegaconf import OmegaConf
			
 
				 
			
 
				-from funasr.download.name_maps_from_hub import name_maps_ms, name_maps_hf
			
 
				+from funasr.download.name_maps_from_hub import name_maps_ms, name_maps_hf, name_maps_openai
			
 
				 
			
 
				 
			
 
				 def download_model(**kwargs):
			
 
				-    model_hub = kwargs.get("model_hub", "ms")
			
 
				-    if model_hub == "ms":
			
 
				+    hub = kwargs.get("hub", "ms")
			
 
				+    if hub == "ms":
			
 
				         kwargs = download_from_ms(**kwargs)
			
 
				+    elif hub == "hf":
			
 
				+        pass
			
 
				+    elif hub == "openai":
			
 
				+        model_or_path = kwargs.get("model")
			
 
				+        if model_or_path in name_maps_openai:
			
 
				+            model_or_path = name_maps_openai[model_or_path]
			
 
				+        kwargs["model_path"] = model_or_path
			
 
				     
			
 
				     return kwargs
			
 
				 
			
@@ -18,7 +25,13 @@ def download_from_ms(**kwargs):
 
				         model_or_path = name_maps_ms[model_or_path]
			
 
				     model_revision = kwargs.get("model_revision")
			
 
				     if not os.path.exists(model_or_path):
			
 
				-        model_or_path = get_or_download_model_dir(model_or_path, model_revision, is_training=kwargs.get("is_training"), check_latest=kwargs.get("check_latest", True))
			
 
				+        try:
			
 
				+            model_or_path = get_or_download_model_dir(model_or_path, model_revision,
			
 
				+                                                      is_training=kwargs.get("is_training"),
			
 
				+                                                      check_latest=kwargs.get("check_latest", True))
			
 
				+        except Exception as e:
			
 
				+            print(f"Download: {model_or_path} failed!: {e}")
			
 
				+    
			
 
				     kwargs["model_path"] = model_or_path
			
 
				     
			
 
				     if os.path.exists(os.path.join(model_or_path, "configuration.json")):
			
@@ -50,7 +63,9 @@ def download_from_ms(**kwargs):
 
				             kwargs["frontend_conf"]["cmvn_file"] = os.path.join(model_or_path, "am.mvn")
			
 
				         if os.path.exists(os.path.join(model_or_path, "jieba_usr_dict")):
			
 
				             kwargs["jieba_usr_dict"] = os.path.join(model_or_path, "jieba_usr_dict")
			
 
				-    return OmegaConf.to_container(kwargs, resolve=True)
			
 
				+    if isinstance(kwargs, OmegaConf):
			
 
				+        kwargs = OmegaConf.to_container(kwargs, resolve=True)
			
 
				+    return kwargs
			
 
				 
			
 
				 def add_file_root_path(model_or_path: str, file_path_metas: dict, cfg = {}):
			
 
				     
			
--- a/funasr/download/name_maps_from_hub.py
+++ b/funasr/download/name_maps_from_hub.py
@@ -13,4 +13,19 @@ name_maps_ms = {
 
				 
			
 
				 name_maps_hf = {
			
 
				 
			
 
				+}
			
 
				+
			
 
				+name_maps_openai = {
			
 
				+    "Whisper-tiny.en": "tiny.en",
			
 
				+    "Whisper-tiny": "tiny",
			
 
				+    "Whisper-base.en": "base.en",
			
 
				+    "Whisper-base": "base",
			
 
				+    "Whisper-small.en": "small.en",
			
 
				+    "Whisper-small": "small",
			
 
				+    "Whisper-medium.en": "medium.en",
			
 
				+    "Whisper-medium": "medium",
			
 
				+    "Whisper-large-v1": "large-v1",
			
 
				+    "Whisper-large-v2": "large-v2",
			
 
				+    "Whisper-large-v3": "large-v3",
			
 
				+    "Whisper-large": "large",
			
 
				 }
			
--- a/funasr/models/qwen_audio/model.py
+++ b/funasr/models/qwen_audio/model.py
@@ -14,7 +14,7 @@ from funasr.register import tables
 
				 
			
 
				 
			
 
				 
			
 
				-@tables.register("model_classes", "WhisperWarp")
			
 
				+@tables.register("model_classes", "QwenAudioWarp")
			
 
				 class WhisperWarp(nn.Module):
			
 
				     def __init__(self, whisper_dims: dict, **kwargs):
			
 
				         super().__init__()
			
--- a/funasr/models/whisper/model.py
+++ b/funasr/models/whisper/model.py
@@ -13,16 +13,29 @@ from funasr.utils.load_utils import load_audio_text_image_video, extract_fbank
 
				 from funasr.register import tables
			
 
				 
			
 
				 
			
 
				-
			
 
				-@tables.register("model_classes", "WhisperWarp")
			
 
				+@tables.register("model_classes", "Whisper-tiny.en")
			
 
				+@tables.register("model_classes", "Whisper-tiny")
			
 
				+@tables.register("model_classes", "Whisper-base.en")
			
 
				+@tables.register("model_classes", "Whisper-base")
			
 
				+@tables.register("model_classes", "Whisper-small.en")
			
 
				+@tables.register("model_classes", "Whisper-small")
			
 
				+@tables.register("model_classes", "Whisper-medium.en")
			
 
				+@tables.register("model_classes", "Whisper-medium")
			
 
				+@tables.register("model_classes", "Whisper-large-v1")
			
 
				+@tables.register("model_classes", "Whisper-large-v2")
			
 
				+@tables.register("model_classes", "Whisper-large-v3")
			
 
				+@tables.register("model_classes", "Whisper-WhisperWarp")
			
 
				 class WhisperWarp(nn.Module):
			
 
				-    def __init__(self, whisper_dims: dict, **kwargs):
			
 
				+    def __init__(self, *args, **kwargs):
			
 
				         super().__init__()
			
 
				         hub = kwargs.get("hub", "funasr")
			
 
				         if hub == "openai":
			
 
				-            init_param_path = kwargs.get("init_param_path", "large-v3")
			
 
				-            model = whisper.load_model(init_param_path)
			
 
				+            model_or_path = kwargs.get("model_path", "Whisper-large-v3")
			
 
				+            if model_or_path.startswith("Whisper-"):
			
 
				+                model_or_path = model_or_path.replace("Whisper-", "")
			
 
				+            model = whisper.load_model(model_or_path)
			
 
				         else:
			
 
				+            whisper_dims = kwargs.get("whisper_dims", {})
			
 
				             dims = whisper.model.ModelDimensions(**whisper_dims)
			
 
				             model = whisper.model.Whisper(dims=dims)
			
 
				         
			
--- a/funasr/version.txt
+++ b/funasr/version.txt
@@ -1 +1 @@
 
				-1.0.11
			
 
				+1.0.12
@@ -1 +1 @@
 				-1.0.11
 				+1.0.12