date.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. import pynini
  2. from fun_text_processing.text_normalization.zh.graph_utils import FUN_NOT_QUOTE, GraphFst, delete_space
  3. from fun_text_processing.text_normalization.zh.utils import UNIT_1e01, get_abs_path
  4. from pynini.lib import pynutil
  5. class Date(GraphFst):
  6. '''
  7. tokens { date { year: "2002" month: "01" day: "28"} } -> 二零零二年一月二十八日
  8. tokens { date { year: "2002" } } -> 二零零八年
  9. '''
  10. def __init__(self, deterministic: bool = True, lm: bool = False):
  11. super().__init__(name="date", kind="verbalize", deterministic=deterministic)
  12. date_type0 = pynutil.delete('year: \"') + pynini.closure(FUN_NOT_QUOTE) + pynutil.delete('\"')
  13. graph_digit = pynini.string_file(get_abs_path("data/number/digit.tsv"))
  14. graph_teen = pynini.string_file(get_abs_path("data/number/digit_teen.tsv"))
  15. graph_zero = pynini.string_file(get_abs_path("data/number/zero.tsv"))
  16. graph_no_zero = pynini.cross("0", "")
  17. graph_year = pynini.closure(graph_digit | graph_zero, 2, 4)
  18. graph_digit_no_zero = graph_digit | graph_no_zero
  19. graph_2_digit_date = (graph_teen + pynutil.insert(UNIT_1e01) + graph_digit_no_zero) | (
  20. graph_no_zero + graph_digit
  21. )
  22. date_type1 = (
  23. pynutil.delete("year: \"")
  24. + graph_year
  25. + pynutil.insert("年")
  26. + pynutil.delete("\"")
  27. + delete_space
  28. + pynutil.delete("month: \"")
  29. + graph_2_digit_date
  30. + pynutil.insert("月")
  31. + pynutil.delete("\"")
  32. + delete_space
  33. + pynutil.delete("day: \"")
  34. + graph_2_digit_date
  35. + pynutil.insert("日")
  36. + pynutil.delete("\"")
  37. )
  38. date_type2 = (
  39. pynutil.delete("year: \"")
  40. + graph_year
  41. + pynutil.insert("年")
  42. + pynutil.delete("\"")
  43. + delete_space
  44. + pynutil.delete("month: \"")
  45. + graph_2_digit_date
  46. + pynutil.insert("月")
  47. + pynutil.delete("\"")
  48. )
  49. graph = date_type0 | date_type1 | date_type2
  50. self.fst = self.delete_tokens(graph).optimize()