measure.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536
  1. import pynini
  2. from fun_text_processing.text_normalization.en.graph_utils import (
  3. DAMO_NON_BREAKING_SPACE,
  4. DAMO_SPACE,
  5. GraphFst,
  6. delete_space,
  7. )
  8. from fun_text_processing.text_normalization.ru.alphabet import RU_ALPHA
  9. from pynini.lib import pynutil
  10. class MeasureFst(GraphFst):
  11. """
  12. Finite state transducer for verbalizing measure, e.g.
  13. measure { cardinal { integer: "два килограма" } } -> "два килограма"
  14. Args:
  15. deterministic: if True will provide a single transduction option,
  16. for False multiple transduction are generated (used for audio-based normalization)
  17. """
  18. def __init__(self, deterministic: bool = True):
  19. super().__init__(name="measure", kind="verbalize", deterministic=deterministic)
  20. graph = (
  21. pynutil.delete(" cardinal { integer: \"")
  22. + pynini.closure(RU_ALPHA | DAMO_SPACE | DAMO_NON_BREAKING_SPACE)
  23. + pynutil.delete("\"")
  24. + delete_space
  25. + pynutil.delete("}")
  26. )
  27. delete_tokens = self.delete_tokens(graph)
  28. self.fst = delete_tokens.optimize()