measure.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. import pynini
  2. from fun_text_processing.inverse_text_normalization.vi.graph_utils import (
  3. DAMO_CHAR,
  4. DAMO_NOT_QUOTE,
  5. GraphFst,
  6. delete_space,
  7. )
  8. from pynini.lib import pynutil
  9. class MeasureFst(GraphFst):
  10. """
  11. Finite state transducer for verbalizing measure, e.g.
  12. measure { negative: "true" cardinal { integer: "12" } units: "kg" } -> -12 kg
  13. Args:
  14. decimal: DecimalFst
  15. cardinal: CardinalFst
  16. """
  17. def __init__(self, decimal: GraphFst, cardinal: GraphFst):
  18. super().__init__(name="measure", kind="verbalize")
  19. optional_sign = pynini.closure(pynini.cross('negative: "true"', "-"), 0, 1)
  20. unit = (
  21. pynutil.delete("units:")
  22. + delete_space
  23. + pynutil.delete('"')
  24. + pynini.closure(DAMO_CHAR - " ", 1)
  25. + pynutil.delete('"')
  26. + delete_space
  27. )
  28. graph_decimal = (
  29. pynutil.delete("decimal {")
  30. + delete_space
  31. + optional_sign
  32. + delete_space
  33. + decimal.numbers
  34. + delete_space
  35. + pynutil.delete("}")
  36. )
  37. graph_cardinal = (
  38. pynutil.delete("cardinal {")
  39. + delete_space
  40. + optional_sign
  41. + delete_space
  42. + cardinal.numbers
  43. + delete_space
  44. + pynutil.delete("}")
  45. )
  46. fractional = (
  47. pynutil.insert(".")
  48. + pynutil.delete("fractional_part:")
  49. + delete_space
  50. + pynutil.delete('"')
  51. + pynini.closure(DAMO_NOT_QUOTE, 1)
  52. + pynutil.delete('"')
  53. )
  54. optional_fractional = pynini.closure(fractional + delete_space, 0, 1)
  55. graph = (
  56. (graph_cardinal | graph_decimal)
  57. + delete_space
  58. + optional_fractional
  59. + pynutil.insert(" ")
  60. + unit
  61. + delete_space
  62. )
  63. delete_tokens = self.delete_tokens(graph)
  64. self.fst = delete_tokens.optimize()