verbalize.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. import pynini
  2. from fun_text_processing.text_normalization.zh.graph_utils import GraphFst
  3. from fun_text_processing.text_normalization.zh.verbalizers.cardinal import Cardinal
  4. from fun_text_processing.text_normalization.zh.verbalizers.char import Char
  5. from fun_text_processing.text_normalization.zh.verbalizers.date import Date
  6. from fun_text_processing.text_normalization.zh.verbalizers.fraction import Fraction
  7. from fun_text_processing.text_normalization.zh.verbalizers.math_symbol import MathSymbol
  8. from fun_text_processing.text_normalization.zh.verbalizers.measure import Measure
  9. from fun_text_processing.text_normalization.zh.verbalizers.money import Money
  10. from fun_text_processing.text_normalization.zh.verbalizers.time import Time
  11. from fun_text_processing.text_normalization.zh.verbalizers.whitelist import Whitelist
  12. class VerbalizeFst(GraphFst):
  13. """
  14. Composes other verbalizer grammars.
  15. For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File.
  16. More details to deployment at NeMo/tools/text_processing_deployment.
  17. Args:
  18. deterministic: if True will provide a single transduction option,
  19. for False multiple options (used for audio-based normalization)
  20. """
  21. def __init__(self, deterministic: bool = True):
  22. super().__init__(name="verbalize", kind="verbalize", deterministic=deterministic)
  23. date = Date(deterministic=deterministic)
  24. cardinal = Cardinal(deterministic=deterministic)
  25. char = Char(deterministic=deterministic)
  26. fraction = Fraction(deterministic=deterministic)
  27. math_symbol = MathSymbol(deterministic=deterministic)
  28. money = Money(deterministic=deterministic)
  29. measure = Measure(deterministic=deterministic)
  30. time = Time(deterministic=deterministic)
  31. whitelist = Whitelist(deterministic=deterministic)
  32. graph = pynini.union(
  33. date.fst,
  34. cardinal.fst,
  35. fraction.fst,
  36. char.fst,
  37. math_symbol.fst,
  38. money.fst,
  39. measure.fst,
  40. time.fst,
  41. whitelist.fst,
  42. )
  43. self.fst = graph.optimize()