measure.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import pynini
  2. from fun_text_processing.inverse_text_normalization.zh.graph_utils import DAMO_CHAR, GraphFst, delete_space
  3. from pynini.lib import pynutil
  4. class MeasureFst(GraphFst):
  5. """
  6. Finite state transducer for verbalizing measure, e.g.
  7. measure { negative: "true" cardinal { integer: "12" } units: "kg" } -> -12 kg
  8. Args:
  9. decimal: DecimalFst
  10. cardinal: CardinalFst
  11. """
  12. def __init__(self, decimal: GraphFst, cardinal: GraphFst):
  13. super().__init__(name="measure", kind="verbalize")
  14. optional_sign = pynini.closure(pynini.cross("negative: \"true\"", "-"), 0, 1)
  15. unit = (
  16. pynutil.delete("units:")
  17. + delete_space
  18. + pynutil.delete("\"")
  19. + pynini.closure(DAMO_CHAR - " ", 1)
  20. + pynutil.delete("\"")
  21. + delete_space
  22. )
  23. graph_decimal = (
  24. pynutil.delete("decimal {")
  25. + delete_space
  26. + optional_sign
  27. + delete_space
  28. + decimal.numbers
  29. + delete_space
  30. + pynutil.delete("}")
  31. )
  32. graph_cardinal = (
  33. pynutil.delete("cardinal {")
  34. + delete_space
  35. + optional_sign
  36. + delete_space
  37. + cardinal.numbers
  38. + delete_space
  39. + pynutil.delete("}")
  40. )
  41. graph = (graph_cardinal | graph_decimal) + delete_space + unit
  42. delete_tokens = self.delete_tokens(graph)
  43. self.fst = delete_tokens.optimize()