electronic.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. import pynini
  2. from fun_text_processing.text_normalization.en.graph_utils import DAMO_NOT_QUOTE, GraphFst, delete_space
  3. from pynini.lib import pynutil
  4. class ElectronicFst(GraphFst):
  5. """
  6. Finite state transducer for verbalizing electronic
  7. e.g. tokens { electronic { username: "cdf1" domain: "abc.edu" } } -> cdf1@abc.edu
  8. e.g. tokens { electronic { protocol: "www.abc.edu" } } -> www.abc.edu
  9. """
  10. def __init__(self):
  11. super().__init__(name="electronic", kind="verbalize")
  12. user_name = (
  13. pynutil.delete("username:")
  14. + delete_space
  15. + pynutil.delete("\"")
  16. + pynini.closure(DAMO_NOT_QUOTE, 1)
  17. + pynutil.delete("\"")
  18. )
  19. domain = (
  20. pynutil.delete("domain:")
  21. + delete_space
  22. + pynutil.delete("\"")
  23. + pynini.closure(DAMO_NOT_QUOTE, 1)
  24. + pynutil.delete("\"")
  25. )
  26. protocol = (
  27. pynutil.delete("protocol:")
  28. + delete_space
  29. + pynutil.delete("\"")
  30. + pynini.closure(DAMO_NOT_QUOTE, 1)
  31. + pynutil.delete("\"")
  32. )
  33. graph = user_name + delete_space + pynutil.insert("@") + domain
  34. graph |= protocol
  35. delete_tokens = self.delete_tokens(graph)
  36. self.fst = delete_tokens.optimize()