time.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. import pynini
  2. from fun_text_processing.text_normalization.en.graph_utils import (
  3. DAMO_CHAR,
  4. DAMO_DIGIT,
  5. GraphFst,
  6. delete_space,
  7. insert_space,
  8. )
  9. from pynini.lib import pynutil
  10. class TimeFst(GraphFst):
  11. """
  12. Finite state transducer for verbalizing time,
  13. e.g. time { hours: "la 1" minutes: "10" } -> la 1:10
  14. e.g. time { hours: "la 1" minutes: "45" } -> la 1:45
  15. """
  16. def __init__(self):
  17. super().__init__(name="time", kind="verbalize")
  18. add_leading_zero_to_double_digit = (DAMO_DIGIT + DAMO_DIGIT) | (pynutil.insert("0") + DAMO_DIGIT)
  19. # hour includes preposition ("la" or "las")
  20. hour = (
  21. pynutil.delete("hours:")
  22. + delete_space
  23. + pynutil.delete("\"")
  24. + pynini.union("la ", "las ")
  25. + pynini.closure(DAMO_DIGIT, 1)
  26. + pynutil.delete("\"")
  27. )
  28. minute = (
  29. pynutil.delete("minutes:")
  30. + delete_space
  31. + pynutil.delete("\"")
  32. + pynini.closure(DAMO_DIGIT, 1)
  33. + pynutil.delete("\"")
  34. )
  35. suffix = (
  36. delete_space
  37. + insert_space
  38. + pynutil.delete("suffix:")
  39. + delete_space
  40. + pynutil.delete("\"")
  41. + pynini.closure(DAMO_CHAR - " ", 1)
  42. + pynutil.delete("\"")
  43. )
  44. optional_suffix = pynini.closure(suffix, 0, 1)
  45. zone = (
  46. delete_space
  47. + insert_space
  48. + pynutil.delete("zone:")
  49. + delete_space
  50. + pynutil.delete("\"")
  51. + pynini.closure(DAMO_CHAR - " ", 1)
  52. + pynutil.delete("\"")
  53. )
  54. optional_zone = pynini.closure(zone, 0, 1)
  55. graph = (
  56. hour
  57. + delete_space
  58. + pynutil.insert(":")
  59. + (minute @ add_leading_zero_to_double_digit)
  60. + optional_suffix
  61. + optional_zone
  62. )
  63. delete_tokens = self.delete_tokens(graph)
  64. self.fst = delete_tokens.optimize()