|
@@ -38,9 +38,7 @@ from funasr.text.build_tokenizer import build_tokenizer
|
|
|
from funasr.text.token_id_converter import TokenIDConverter
|
|
from funasr.text.token_id_converter import TokenIDConverter
|
|
|
from funasr.torch_utils.device_funcs import to_device
|
|
from funasr.torch_utils.device_funcs import to_device
|
|
|
from funasr.utils.timestamp_tools import ts_prediction_lfr6_standard
|
|
from funasr.utils.timestamp_tools import ts_prediction_lfr6_standard
|
|
|
-from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode
|
|
|
|
|
-from funasr.utils.whisper_utils.transcribe import transcribe
|
|
|
|
|
-from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram
|
|
|
|
|
|
|
+
|
|
|
|
|
|
|
|
class Speech2Text:
|
|
class Speech2Text:
|
|
|
"""Speech2Text class
|
|
"""Speech2Text class
|
|
@@ -1923,9 +1921,13 @@ class Speech2TextWhisper:
|
|
|
**kwargs,
|
|
**kwargs,
|
|
|
):
|
|
):
|
|
|
|
|
|
|
|
|
|
+ from funasr.tasks.whisper import ASRTask
|
|
|
|
|
+ from funasr.utils.whisper_utils.transcribe import transcribe
|
|
|
|
|
+ from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram
|
|
|
|
|
+ from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode
|
|
|
|
|
+
|
|
|
# 1. Build ASR model
|
|
# 1. Build ASR model
|
|
|
scorers = {}
|
|
scorers = {}
|
|
|
- from funasr.tasks.whisper import ASRTask
|
|
|
|
|
asr_model, asr_train_args = ASRTask.build_model_from_file(
|
|
asr_model, asr_train_args = ASRTask.build_model_from_file(
|
|
|
asr_train_config, asr_model_file, cmvn_file, device
|
|
asr_train_config, asr_model_file, cmvn_file, device
|
|
|
)
|
|
)
|