whisper_tokenizer.py 597 B

123456789101112131415161718192021222324
  1. try:
  2. from whisper.tokenizer import get_tokenizer
  3. except:
  4. print("If you want to use hugging, please `pip install -U transformers`")
  5. from funasr.register import tables
  6. @tables.register("tokenizer_classes", "WhisperTokenizer")
  7. def WhisperTokenizer(**kwargs):
  8. language = kwargs.get("language", None)
  9. task = kwargs.get("task", "transcribe")
  10. is_multilingual = kwargs.get("is_multilingual", True)
  11. num_languages = kwargs.get("num_languages", 99)
  12. tokenizer = get_tokenizer(
  13. multilingual=is_multilingual,
  14. num_languages=num_languages,
  15. language=language,
  16. task=task,
  17. )
  18. return tokenizer