date.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. import pynini
  2. from fun_text_processing.inverse_text_normalization.ko.graph_utils import (
  3. DAMO_NOT_QUOTE,
  4. GraphFst,
  5. delete_extra_space,
  6. delete_space,
  7. )
  8. from pynini.lib import pynutil
  9. class DateFst(GraphFst):
  10. """
  11. Finite state transducer for verbalizing date, e.g.
  12. date { month: "january" day: "5" year: "2012" preserve_order: true } -> february 5 2012
  13. date { day: "5" month: "january" year: "2012" preserve_order: true } -> 5 february 2012
  14. """
  15. def __init__(self):
  16. super().__init__(name="date", kind="verbalize")
  17. month = (
  18. pynutil.delete("month:")
  19. + delete_space
  20. + pynutil.delete("\"")
  21. + pynini.closure(DAMO_NOT_QUOTE, 1)
  22. + pynutil.delete("\"")
  23. + pynutil.insert(" ")
  24. )
  25. day = (
  26. pynutil.delete("day:")
  27. + delete_space
  28. + pynutil.delete("\"")
  29. + pynini.closure(DAMO_NOT_QUOTE, 1)
  30. + pynutil.delete("\"")
  31. + pynutil.insert(" ")
  32. )
  33. year = (
  34. pynutil.delete("year:")
  35. + delete_space
  36. + pynutil.delete("\"")
  37. + pynini.closure(DAMO_NOT_QUOTE, 1)
  38. + pynutil.delete("\"")
  39. + pynutil.insert(" ")
  40. )
  41. # month (day) year
  42. graph_mdy = (
  43. month + pynini.closure(delete_extra_space + day, 0, 1) + pynini.closure(delete_extra_space + year, 0, 1)
  44. )
  45. # (day) month year
  46. graph_dmy = (
  47. pynini.closure(day + delete_extra_space, 0, 1) + month + pynini.closure(delete_extra_space + year, 0, 1)
  48. )
  49. optional_preserve_order = pynini.closure(
  50. pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space
  51. | pynutil.delete("field_order:") + delete_space + pynutil.delete("\"") + DAMO_NOT_QUOTE + pynutil.delete("\"") + delete_space
  52. )
  53. # year month day
  54. graph_ymd = (
  55. year + month + day
  56. )
  57. # month day
  58. graph_md = (
  59. month + day
  60. )
  61. # year month
  62. graph_ym = (
  63. year + month
  64. )
  65. # add some grammars
  66. final_graph = (graph_mdy | year | graph_dmy | graph_ymd | graph_md | graph_ym | month | day) + delete_space + optional_preserve_order
  67. delete_tokens = self.delete_tokens(final_graph)
  68. self.fst = delete_tokens.optimize()