cardinal.py 1.1 KB

1234567891011121314151617181920212223242526272829
  1. import pynini
  2. from fun_text_processing.text_normalization.en.graph_utils import DAMO_NOT_QUOTE, GraphFst
  3. from pynini.lib import pynutil
  4. class CardinalFst(GraphFst):
  5. """
  6. Finite state transducer for verbalizing cardinals
  7. e.g. cardinal { integer: "zwei" } -> "zwei"
  8. Args:
  9. deterministic: if True will provide a single transduction option,
  10. for False multiple transduction are generated (used for audio-based normalization)
  11. """
  12. def __init__(self, deterministic: bool = True):
  13. super().__init__(name="cardinal", kind="verbalize", deterministic=deterministic)
  14. optional_sign = pynini.closure(pynini.cross("negative: \"true\" ", "minus "), 0, 1)
  15. self.optional_sign = optional_sign
  16. integer = pynini.closure(DAMO_NOT_QUOTE, 1)
  17. self.integer = pynutil.delete(" \"") + integer + pynutil.delete("\"")
  18. integer = pynutil.delete("integer:") + self.integer
  19. self.numbers = integer
  20. graph = optional_sign + self.numbers
  21. delete_tokens = self.delete_tokens(graph)
  22. self.fst = delete_tokens.optimize()