verbalize.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. from fun_text_processing.text_normalization.de.taggers.cardinal import CardinalFst as CardinalTagger
  2. from fun_text_processing.text_normalization.de.verbalizers.cardinal import CardinalFst
  3. from fun_text_processing.text_normalization.de.verbalizers.date import DateFst
  4. from fun_text_processing.text_normalization.de.verbalizers.decimal import DecimalFst
  5. from fun_text_processing.text_normalization.de.verbalizers.electronic import ElectronicFst
  6. from fun_text_processing.text_normalization.de.verbalizers.fraction import FractionFst
  7. from fun_text_processing.text_normalization.de.verbalizers.measure import MeasureFst
  8. from fun_text_processing.text_normalization.de.verbalizers.money import MoneyFst
  9. from fun_text_processing.text_normalization.de.verbalizers.ordinal import OrdinalFst
  10. from fun_text_processing.text_normalization.de.verbalizers.telephone import TelephoneFst
  11. from fun_text_processing.text_normalization.de.verbalizers.time import TimeFst
  12. from fun_text_processing.text_normalization.en.graph_utils import GraphFst
  13. from fun_text_processing.text_normalization.en.verbalizers.whitelist import WhiteListFst
  14. class VerbalizeFst(GraphFst):
  15. """
  16. Composes other verbalizer grammars.
  17. For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File.
  18. More details to deployment at NeMo/tools/text_processing_deployment.
  19. Args:
  20. deterministic: if True will provide a single transduction option,
  21. for False multiple options (used for audio-based normalization)
  22. """
  23. def __init__(self, deterministic: bool = True):
  24. super().__init__(name="verbalize", kind="verbalize", deterministic=deterministic)
  25. cardinal_tagger = CardinalTagger(deterministic=deterministic)
  26. cardinal = CardinalFst(deterministic=deterministic)
  27. cardinal_graph = cardinal.fst
  28. ordinal = OrdinalFst(deterministic=deterministic)
  29. ordinal_graph = ordinal.fst
  30. decimal = DecimalFst(deterministic=deterministic)
  31. decimal_graph = decimal.fst
  32. fraction = FractionFst(ordinal=ordinal, deterministic=deterministic)
  33. fraction_graph = fraction.fst
  34. date = DateFst(ordinal=ordinal)
  35. date_graph = date.fst
  36. measure = MeasureFst(cardinal=cardinal, decimal=decimal, fraction=fraction, deterministic=deterministic)
  37. measure_graph = measure.fst
  38. electronic = ElectronicFst(deterministic=deterministic)
  39. electronic_graph = electronic.fst
  40. whitelist_graph = WhiteListFst(deterministic=deterministic).fst
  41. money_graph = MoneyFst(decimal=decimal).fst
  42. telephone_graph = TelephoneFst(deterministic=deterministic).fst
  43. time_graph = TimeFst(cardinal_tagger=cardinal_tagger, deterministic=deterministic).fst
  44. graph = (
  45. cardinal_graph
  46. | measure_graph
  47. | decimal_graph
  48. | ordinal_graph
  49. | date_graph
  50. | electronic_graph
  51. | money_graph
  52. | fraction_graph
  53. | whitelist_graph
  54. | telephone_graph
  55. | time_graph
  56. )
  57. self.fst = graph