date.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. import pynini
  2. from fun_text_processing.inverse_text_normalization.zh.graph_utils import (
  3. DAMO_NOT_QUOTE,
  4. GraphFst,
  5. delete_extra_space,
  6. delete_space,
  7. )
  8. from pynini.lib import pynutil
  9. class DateFst(GraphFst):
  10. """
  11. Finite state transducer for verbalizing date, e.g.
  12. date { month: "january" day: "5" year: "2012" preserve_order: true } -> february 5 2012
  13. date { day: "5" month: "january" year: "2012" preserve_order: true } -> 5 february 2012
  14. """
  15. def __init__(self):
  16. super().__init__(name="date", kind="verbalize")
  17. month = (
  18. pynutil.delete("month:")
  19. + delete_space
  20. + pynutil.delete("\"")
  21. + pynini.closure(DAMO_NOT_QUOTE, 1)
  22. + pynutil.delete("\"")
  23. )
  24. day = (
  25. pynutil.delete("day:")
  26. + delete_space
  27. + pynutil.delete("\"")
  28. + pynini.closure(DAMO_NOT_QUOTE, 1)
  29. + pynutil.delete("\"")
  30. )
  31. year = (
  32. pynutil.delete("year:")
  33. + delete_space
  34. + pynutil.delete("\"")
  35. + pynini.closure(DAMO_NOT_QUOTE, 1)
  36. + delete_space
  37. + pynutil.delete("\"")
  38. )
  39. graph_ymd = (
  40. year + pynini.cross(" ", "年") + month + pynini.cross(" ", "月") + day + pynutil.insert("日")
  41. )
  42. graph_md = (
  43. month + pynini.cross(" ", "月") + day + pynutil.insert("日")
  44. )
  45. graph_ym = (
  46. year + pynini.cross(" ", "年") + month + pynutil.insert("月")
  47. )
  48. graph_year = (
  49. year + pynutil.insert("年")
  50. )
  51. optional_preserve_order = pynini.closure(
  52. pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space
  53. | pynutil.delete("field_order:")
  54. + delete_space
  55. + pynutil.delete("\"")
  56. + DAMO_NOT_QUOTE
  57. + pynutil.delete("\"")
  58. + delete_space
  59. )
  60. final_graph = (graph_ymd | graph_ym | graph_md | graph_year) + delete_space + optional_preserve_order
  61. delete_tokens = self.delete_tokens(final_graph)
  62. self.fst = delete_tokens.optimize()