decimal.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import pynini
  2. from fun_text_processing.text_normalization.en.graph_utils import DAMO_NOT_QUOTE, GraphFst, delete_space
  3. from pynini.lib import pynutil
  4. class DecimalFst(GraphFst):
  5. """
  6. Finite state transducer for verbalizing decimal,
  7. e.g. decimal { negative: "true" integer_part: "1" morphosyntactic_features: "," fractional_part: "26" } -> -1,26
  8. e.g. decimal { negative: "true" integer_part: "1" morphosyntactic_features: "." fractional_part: "26" } -> -1.26
  9. e.g. decimal { negative: "false" integer_part: "1" morphosyntactic_features: "," fractional_part: "26" quantity: "millón" } -> 1,26 millón
  10. e.g. decimal { negative: "false" integer_part: "2" quantity: "millones" } -> 2 millones
  11. """
  12. def __init__(self):
  13. super().__init__(name="decimal", kind="verbalize")
  14. optionl_sign = pynini.closure(pynini.cross("negative: \"true\"", "-") + delete_space, 0, 1)
  15. integer = (
  16. pynutil.delete("integer_part:")
  17. + delete_space
  18. + pynutil.delete("\"")
  19. + pynini.closure(DAMO_NOT_QUOTE, 1)
  20. + pynutil.delete("\"")
  21. )
  22. optional_integer = pynini.closure(integer + delete_space, 0, 1)
  23. decimal_point = pynini.cross("morphosyntactic_features: \",\"", ",")
  24. decimal_point |= pynini.cross("morphosyntactic_features: \".\"", ".")
  25. fractional = (
  26. decimal_point
  27. + delete_space
  28. + pynutil.delete("fractional_part:")
  29. + delete_space
  30. + pynutil.delete("\"")
  31. + pynini.closure(DAMO_NOT_QUOTE, 1)
  32. + pynutil.delete("\"")
  33. )
  34. optional_fractional = pynini.closure(fractional + delete_space, 0, 1)
  35. quantity = (
  36. pynutil.delete("quantity:")
  37. + delete_space
  38. + pynutil.delete("\"")
  39. + pynini.closure(DAMO_NOT_QUOTE, 1)
  40. + pynutil.delete("\"")
  41. )
  42. optional_quantity = pynini.closure(pynutil.insert(" ") + quantity + delete_space, 0, 1)
  43. graph = optional_integer + optional_fractional + optional_quantity
  44. self.numbers = graph
  45. graph = optionl_sign + graph
  46. delete_tokens = self.delete_tokens(graph)
  47. self.fst = delete_tokens.optimize()