time.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import pynini
  2. from fun_text_processing.inverse_text_normalization.fr.graph_utils import (
  3. DAMO_DIGIT,
  4. GraphFst,
  5. delete_extra_space,
  6. delete_space,
  7. )
  8. from fun_text_processing.inverse_text_normalization.fr.utils import get_abs_path
  9. from pynini.lib import pynutil
  10. class TimeFst(GraphFst):
  11. """
  12. Finite state transducer for verbalizing time, e.g.
  13. time { hours: "8" minutes: "30" suffix: "du matin"} -> 8 h 30
  14. time { hours: "8" minutes: "30" } -> 8 h 30
  15. time { hours: "8" minutes: "30" suffix: "du soir"} -> 20 h 30
  16. """
  17. def __init__(self):
  18. super().__init__(name="time", kind="verbalize")
  19. hour_to_night = pynini.string_file(get_abs_path("data/time/hour_to_night.tsv"))
  20. day_suffixes = pynutil.delete("suffix: \"am\"")
  21. night_suffixes = pynutil.delete("suffix: \"pm\"")
  22. hour = (
  23. pynutil.delete("hours:")
  24. + delete_space
  25. + pynutil.delete("\"")
  26. + pynini.closure(DAMO_DIGIT, 1, 2)
  27. + pynutil.delete("\"")
  28. )
  29. minute = (
  30. pynutil.delete("minutes:")
  31. + delete_extra_space
  32. + pynutil.delete("\"")
  33. + pynini.closure(DAMO_DIGIT, 1, 2)
  34. + pynutil.delete("\"")
  35. )
  36. graph = hour + delete_extra_space + pynutil.insert("h") + minute.ques + delete_space + day_suffixes.ques
  37. graph |= (
  38. hour @ hour_to_night
  39. + delete_extra_space
  40. + pynutil.insert("h")
  41. + minute.ques
  42. + delete_space
  43. + night_suffixes
  44. )
  45. delete_tokens = self.delete_tokens(graph)
  46. self.fst = delete_tokens.optimize()