measure.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. import pynini
  2. from fun_text_processing.text_normalization.zh.graph_utils import FUN_NOT_QUOTE, GraphFst, delete_space
  3. from pynini.lib import pynutil
  4. class Measure(GraphFst):
  5. '''
  6. tokens { measure { cardinal: "一" } units: "千克" } } -> 一千克
  7. '''
  8. def __init__(self, deterministic: bool = True, lm: bool = False):
  9. super().__init__(name="measure", kind="verbalize", deterministic=deterministic)
  10. graph = (
  11. pynutil.delete("cardinal {")
  12. + delete_space
  13. + pynutil.delete("integer: \"")
  14. + pynini.closure(FUN_NOT_QUOTE)
  15. + pynutil.delete("\"")
  16. + delete_space
  17. + pynutil.delete("}")
  18. + delete_space
  19. + pynutil.delete("units: \"")
  20. + pynini.closure(FUN_NOT_QUOTE)
  21. + pynutil.delete("\"")
  22. )
  23. percent_graph = (
  24. pynutil.delete("decimal { ")
  25. + pynutil.delete("integer_part: \"")
  26. + pynutil.insert("百分之")
  27. + pynini.closure(FUN_NOT_QUOTE, 1)
  28. + pynutil.delete("\"")
  29. + delete_space
  30. + pynutil.delete("}")
  31. )
  32. graph |= percent_graph
  33. self.fst = self.delete_tokens(graph).optimize()