verbalize.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. from fun_text_processing.text_normalization.en.graph_utils import GraphFst
  2. from fun_text_processing.text_normalization.en.verbalizers.abbreviation import AbbreviationFst
  3. from fun_text_processing.text_normalization.en.verbalizers.cardinal import CardinalFst
  4. from fun_text_processing.text_normalization.en.verbalizers.date import DateFst
  5. from fun_text_processing.text_normalization.en.verbalizers.decimal import DecimalFst
  6. from fun_text_processing.text_normalization.en.verbalizers.electronic import ElectronicFst
  7. from fun_text_processing.text_normalization.en.verbalizers.fraction import FractionFst
  8. from fun_text_processing.text_normalization.en.verbalizers.measure import MeasureFst
  9. from fun_text_processing.text_normalization.en.verbalizers.money import MoneyFst
  10. from fun_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst
  11. from fun_text_processing.text_normalization.en.verbalizers.roman import RomanFst
  12. from fun_text_processing.text_normalization.en.verbalizers.telephone import TelephoneFst
  13. from fun_text_processing.text_normalization.en.verbalizers.time import TimeFst
  14. from fun_text_processing.text_normalization.en.verbalizers.whitelist import WhiteListFst
  15. class VerbalizeFst(GraphFst):
  16. """
  17. Composes other verbalizer grammars.
  18. For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File.
  19. More details to deployment at NeMo/tools/text_processing_deployment.
  20. Args:
  21. deterministic: if True will provide a single transduction option,
  22. for False multiple options (used for audio-based normalization)
  23. """
  24. def __init__(self, deterministic: bool = True):
  25. super().__init__(name="verbalize", kind="verbalize", deterministic=deterministic)
  26. cardinal = CardinalFst(deterministic=deterministic)
  27. cardinal_graph = cardinal.fst
  28. decimal = DecimalFst(cardinal=cardinal, deterministic=deterministic)
  29. decimal_graph = decimal.fst
  30. ordinal = OrdinalFst(deterministic=deterministic)
  31. ordinal_graph = ordinal.fst
  32. fraction = FractionFst(deterministic=deterministic)
  33. fraction_graph = fraction.fst
  34. telephone_graph = TelephoneFst(deterministic=deterministic).fst
  35. electronic_graph = ElectronicFst(deterministic=deterministic).fst
  36. measure = MeasureFst(decimal=decimal, cardinal=cardinal, fraction=fraction, deterministic=deterministic)
  37. measure_graph = measure.fst
  38. time_graph = TimeFst(deterministic=deterministic).fst
  39. date_graph = DateFst(ordinal=ordinal, deterministic=deterministic).fst
  40. money_graph = MoneyFst(decimal=decimal, deterministic=deterministic).fst
  41. whitelist_graph = WhiteListFst(deterministic=deterministic).fst
  42. graph = (
  43. time_graph
  44. | date_graph
  45. | money_graph
  46. | measure_graph
  47. | ordinal_graph
  48. | decimal_graph
  49. | cardinal_graph
  50. | telephone_graph
  51. | electronic_graph
  52. | fraction_graph
  53. | whitelist_graph
  54. )
  55. roman_graph = RomanFst(deterministic=deterministic).fst
  56. graph |= roman_graph
  57. if not deterministic:
  58. abbreviation_graph = AbbreviationFst(deterministic=deterministic).fst
  59. graph |= abbreviation_graph
  60. self.fst = graph