measure.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. import pynini
  2. from fun_text_processing.text_normalization.en.graph_utils import DAMO_NOT_QUOTE, GraphFst, delete_space, insert_space
  3. from pynini.lib import pynutil
  4. class MeasureFst(GraphFst):
  5. """
  6. Finite state transducer for verbalizing measure, e.g.
  7. measure { negative: "true" cardinal { integer: "twelve" } units: "kilograms" } -> minus twelve kilograms
  8. measure { decimal { integer_part: "twelve" fractional_part: "five" } units: "kilograms" } -> twelve point five kilograms
  9. tokens { measure { units: "covid" decimal { integer_part: "nineteen" fractional_part: "five" } } } -> covid nineteen point five
  10. Args:
  11. decimal: DecimalFst
  12. cardinal: CardinalFst
  13. fraction: FractionFst
  14. deterministic: if True will provide a single transduction option,
  15. for False multiple transduction are generated (used for audio-based normalization)
  16. """
  17. def __init__(self, decimal: GraphFst, cardinal: GraphFst, fraction: GraphFst, deterministic: bool = True):
  18. super().__init__(name="measure", kind="verbalize", deterministic=deterministic)
  19. optional_sign = cardinal.optional_sign
  20. unit = (
  21. pynutil.delete("units: \"")
  22. + pynini.difference(pynini.closure(DAMO_NOT_QUOTE, 1), pynini.union("address", "math"))
  23. + pynutil.delete("\"")
  24. + delete_space
  25. )
  26. if not deterministic:
  27. unit |= pynini.compose(unit, pynini.cross(pynini.union("inch", "inches"), "\""))
  28. graph_decimal = (
  29. pynutil.delete("decimal {")
  30. + delete_space
  31. + optional_sign
  32. + delete_space
  33. + decimal.numbers
  34. + delete_space
  35. + pynutil.delete("}")
  36. )
  37. graph_cardinal = (
  38. pynutil.delete("cardinal {")
  39. + delete_space
  40. + optional_sign
  41. + delete_space
  42. + cardinal.numbers
  43. + delete_space
  44. + pynutil.delete("}")
  45. )
  46. graph_fraction = (
  47. pynutil.delete("fraction {") + delete_space + fraction.graph + delete_space + pynutil.delete("}")
  48. )
  49. graph = (graph_cardinal | graph_decimal | graph_fraction) + delete_space + insert_space + unit
  50. # SH adds "preserve_order: true" by default
  51. preserve_order = pynutil.delete("preserve_order:") + delete_space + pynutil.delete("true") + delete_space
  52. graph |= unit + insert_space + (graph_cardinal | graph_decimal) + delete_space + pynini.closure(preserve_order)
  53. # for only unit
  54. graph |= (
  55. pynutil.delete("cardinal { integer: \"-\"")
  56. + delete_space
  57. + pynutil.delete("}")
  58. + delete_space
  59. + unit
  60. + pynini.closure(preserve_order)
  61. )
  62. address = (
  63. pynutil.delete("units: \"address\" ")
  64. + delete_space
  65. + graph_cardinal
  66. + delete_space
  67. + pynini.closure(preserve_order)
  68. )
  69. math = (
  70. pynutil.delete("units: \"math\" ")
  71. + delete_space
  72. + graph_cardinal
  73. + delete_space
  74. + pynini.closure(preserve_order)
  75. )
  76. graph |= address | math
  77. delete_tokens = self.delete_tokens(graph)
  78. self.fst = delete_tokens.optimize()