electronic.py 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. import pynini
  2. from fun_text_processing.text_normalization.en.graph_utils import DAMO_NOT_QUOTE, GraphFst, delete_space
  3. from pynini.lib import pynutil
  4. class ElectronicFst(GraphFst):
  5. """
  6. Finite state transducer for verbalizing electronic
  7. e.g. tokens { electronic { username: "cdf1" domain: "abc.edu" } } -> cdf1@abc.edu
  8. """
  9. def __init__(self):
  10. super().__init__(name="electronic", kind="verbalize")
  11. user_name = (
  12. pynutil.delete("username:")
  13. + delete_space
  14. + pynutil.delete("\"")
  15. + pynini.closure(DAMO_NOT_QUOTE, 1)
  16. + pynutil.delete("\"")
  17. )
  18. domain = (
  19. pynutil.delete("domain:")
  20. + delete_space
  21. + pynutil.delete("\"")
  22. + pynini.closure(DAMO_NOT_QUOTE, 1)
  23. + pynutil.delete("\"")
  24. )
  25. protocol = (
  26. pynutil.delete("protocol:")
  27. + delete_space
  28. + pynutil.delete("\"")
  29. + pynini.closure(DAMO_NOT_QUOTE, 1)
  30. + pynutil.delete("\"")
  31. )
  32. graph = user_name + delete_space + pynutil.insert("@") + domain
  33. graph |= protocol
  34. delete_tokens = self.delete_tokens(graph)
  35. self.fst = delete_tokens.optimize()