time.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. import pynini
  2. from fun_text_processing.text_normalization.en.graph_utils import (
  3. DAMO_NOT_QUOTE,
  4. DAMO_SIGMA,
  5. GraphFst,
  6. delete_space,
  7. insert_space,
  8. )
  9. from pynini.lib import pynutil
  10. class TimeFst(GraphFst):
  11. """
  12. Finite state transducer for verbalizing time, e.g.
  13. time { hours: "twelve" minutes: "thirty" suffix: "a m" zone: "e s t" } -> twelve thirty a m e s t
  14. time { hours: "twelve" } -> twelve o'clock
  15. Args:
  16. deterministic: if True will provide a single transduction option,
  17. for False multiple transduction are generated (used for audio-based normalization)
  18. """
  19. def __init__(self, deterministic: bool = True):
  20. super().__init__(name="time", kind="verbalize", deterministic=deterministic)
  21. hour = (
  22. pynutil.delete("hours:")
  23. + delete_space
  24. + pynutil.delete("\"")
  25. + pynini.closure(DAMO_NOT_QUOTE, 1)
  26. + pynutil.delete("\"")
  27. )
  28. minute = (
  29. pynutil.delete("minutes:")
  30. + delete_space
  31. + pynutil.delete("\"")
  32. + pynini.closure(DAMO_NOT_QUOTE, 1)
  33. + pynutil.delete("\"")
  34. )
  35. suffix = (
  36. pynutil.delete("suffix:")
  37. + delete_space
  38. + pynutil.delete("\"")
  39. + pynini.closure(DAMO_NOT_QUOTE, 1)
  40. + pynutil.delete("\"")
  41. )
  42. optional_suffix = pynini.closure(delete_space + insert_space + suffix, 0, 1)
  43. zone = (
  44. pynutil.delete("zone:")
  45. + delete_space
  46. + pynutil.delete("\"")
  47. + pynini.closure(DAMO_NOT_QUOTE, 1)
  48. + pynutil.delete("\"")
  49. )
  50. optional_zone = pynini.closure(delete_space + insert_space + zone, 0, 1)
  51. second = (
  52. pynutil.delete("seconds:")
  53. + delete_space
  54. + pynutil.delete("\"")
  55. + pynini.closure(DAMO_NOT_QUOTE, 1)
  56. + pynutil.delete("\"")
  57. )
  58. graph_hms = (
  59. hour
  60. + pynutil.insert(" hours ")
  61. + delete_space
  62. + minute
  63. + pynutil.insert(" minutes and ")
  64. + delete_space
  65. + second
  66. + pynutil.insert(" seconds")
  67. + optional_suffix
  68. + optional_zone
  69. )
  70. graph_hms @= pynini.cdrewrite(
  71. pynutil.delete("o ")
  72. | pynini.cross("one minutes", "one minute")
  73. | pynini.cross("one seconds", "one second")
  74. | pynini.cross("one hours", "one hour"),
  75. pynini.union(" ", "[BOS]"),
  76. "",
  77. DAMO_SIGMA,
  78. )
  79. graph = hour + delete_space + insert_space + minute + optional_suffix + optional_zone
  80. graph |= hour + insert_space + pynutil.insert("o'clock") + optional_zone
  81. graph |= hour + delete_space + insert_space + suffix + optional_zone
  82. graph |= graph_hms
  83. delete_tokens = self.delete_tokens(graph)
  84. self.fst = delete_tokens.optimize()