date.py 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. import pynini
  2. from fun_text_processing.text_normalization.en.graph_utils import (
  3. DAMO_NOT_QUOTE,
  4. GraphFst,
  5. delete_extra_space,
  6. delete_space,
  7. insert_space,
  8. )
  9. from pynini.lib import pynutil
  10. class DateFst(GraphFst):
  11. """
  12. Finite state transducer for verbalizing date, e.g.
  13. date { day: "1" month: "enero" preserve_order: true } -> 1 de enero
  14. """
  15. def __init__(self):
  16. super().__init__(name="date", kind="verbalize")
  17. month = (
  18. pynutil.delete("month:")
  19. + delete_space
  20. + pynutil.delete("\"")
  21. + pynini.closure(DAMO_NOT_QUOTE, 1)
  22. + pynutil.delete("\"")
  23. )
  24. day = (
  25. pynutil.delete("day:")
  26. + delete_space
  27. + pynutil.delete("\"")
  28. + pynini.closure(DAMO_NOT_QUOTE, 1)
  29. + pynutil.delete("\"")
  30. )
  31. year = (
  32. pynutil.delete("year:")
  33. + delete_space
  34. + pynutil.delete("\"")
  35. + pynini.closure(DAMO_NOT_QUOTE, 1)
  36. + pynutil.delete("\"")
  37. )
  38. # day month
  39. graph_dmy = (
  40. day
  41. + delete_extra_space
  42. + pynutil.insert("de")
  43. + insert_space
  44. + month
  45. + (delete_extra_space + pynutil.insert("de") + insert_space + year).ques
  46. )
  47. graph_dmy |= (
  48. day
  49. + delete_space
  50. + pynutil.insert("/")
  51. + month
  52. + pynutil.delete(" morphosyntactic_features: \"/\"")
  53. + (delete_space + pynutil.insert("/") + year).ques
  54. )
  55. optional_preserve_order = pynini.closure(
  56. pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space
  57. | pynutil.delete("field_order:")
  58. + delete_space
  59. + pynutil.delete("\"")
  60. + DAMO_NOT_QUOTE
  61. + pynutil.delete("\"")
  62. + delete_space
  63. )
  64. final_graph = graph_dmy + delete_space + optional_preserve_order
  65. delete_tokens = self.delete_tokens(final_graph)
  66. self.fst = delete_tokens.optimize()