time.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import pynini
  2. from fun_text_processing.text_normalization.zh.graph_utils import FUN_NOT_QUOTE, GraphFst, delete_space
  3. from fun_text_processing.text_normalization.zh.utils import UNIT_1e01, get_abs_path
  4. from pynini.lib import pynutil
  5. class Time(GraphFst):
  6. '''
  7. tokens { time { h: "1" m: "02" s: "36" } } -> 一点零二分三十六秒
  8. tokens { time { suffix "am" hours: "1" minutes: "02" seconds: "36" } } -> 上午一点零二分三十六秒
  9. '''
  10. def __init__(self, deterministic: bool = True, lm: bool = False):
  11. super().__init__(name="time", kind="verbalize", deterministic=deterministic)
  12. graph_digit = pynini.string_file(get_abs_path("data/number/digit.tsv"))
  13. graph_teen = pynini.string_file(get_abs_path("data/number/digit_teen.tsv"))
  14. graph_zero = pynini.string_file(get_abs_path("data/number/zero.tsv"))
  15. graph_no_zero = pynini.cross("0", "")
  16. graph_digit_no_zero = graph_digit | graph_no_zero
  17. graph_2_digit_zero_none = pynini.cross("0", "") + pynini.cross("0", "")
  18. graph_2_digit_zero = pynini.cross("00", "零")
  19. graph_2_digit_time = (graph_teen + pynutil.insert(UNIT_1e01) + graph_digit_no_zero) | (
  20. graph_zero + graph_digit
  21. )
  22. h = graph_2_digit_time | graph_2_digit_zero | graph_digit
  23. m = graph_2_digit_time | graph_2_digit_zero
  24. s = graph_2_digit_time | graph_2_digit_zero
  25. # 6:25
  26. h_m = (
  27. pynutil.delete("hours: \"")
  28. + h
  29. + pynutil.insert("点")
  30. + pynutil.delete("\"")
  31. + delete_space
  32. + pynutil.delete("minutes: \"")
  33. + (graph_2_digit_time)
  34. + pynutil.insert("分")
  35. + pynutil.delete("\"")
  36. )
  37. # 23:00
  38. h_00 = (
  39. pynutil.delete("hours: \"")
  40. + h
  41. + pynutil.insert("点")
  42. + pynutil.delete("\"")
  43. + delete_space
  44. + pynutil.delete("minutes: \"")
  45. + (graph_2_digit_zero_none)
  46. + pynutil.delete("\"")
  47. )
  48. # 9:12:52
  49. h_m_s = (
  50. pynutil.delete("hours: \"")
  51. + h
  52. + pynutil.insert("点")
  53. + pynutil.delete("\"")
  54. + delete_space
  55. + pynutil.delete("minutes: \"")
  56. + m
  57. + pynutil.insert("分")
  58. + pynutil.delete("\"")
  59. + delete_space
  60. + pynutil.delete("seconds: \"")
  61. + s
  62. + pynutil.insert("秒")
  63. + pynutil.delete("\"")
  64. )
  65. graph = h_m | h_m_s | h_00
  66. graph_suffix = (
  67. pynutil.delete("suffix: \"") + pynini.closure(FUN_NOT_QUOTE) + pynutil.delete("\"") + delete_space + graph
  68. )
  69. graph |= graph_suffix
  70. self.fst = self.delete_tokens(graph).optimize()