fraction.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. import pynini
  2. from fun_text_processing.text_normalization.en.graph_utils import DAMO_NOT_QUOTE, DAMO_SIGMA, GraphFst, insert_space
  3. from fun_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst
  4. from pynini.examples import plurals
  5. from pynini.lib import pynutil
  6. class FractionFst(GraphFst):
  7. """
  8. Finite state transducer for verbalizing fraction
  9. e.g. tokens { fraction { integer: "twenty three" numerator: "four" denominator: "five" } } ->
  10. twenty three and four fifth
  11. Args:
  12. deterministic: if True will provide a single transduction option,
  13. for False multiple transduction are generated (used for audio-based normalization)
  14. """
  15. def __init__(self, deterministic: bool = True, lm: bool = False):
  16. super().__init__(name="fraction", kind="verbalize", deterministic=deterministic)
  17. suffix = OrdinalFst().suffix
  18. integer = pynutil.delete("integer_part: \"") + pynini.closure(DAMO_NOT_QUOTE) + pynutil.delete("\" ")
  19. denominator_one = pynini.cross("denominator: \"one\"", "over one")
  20. denominator_half = pynini.cross("denominator: \"two\"", "half")
  21. denominator_quarter = pynini.cross("denominator: \"four\"", "quarter")
  22. denominator_rest = (
  23. pynutil.delete("denominator: \"") + pynini.closure(DAMO_NOT_QUOTE) @ suffix + pynutil.delete("\"")
  24. )
  25. denominators = plurals._priority_union(
  26. denominator_one,
  27. plurals._priority_union(
  28. denominator_half,
  29. plurals._priority_union(denominator_quarter, denominator_rest, DAMO_SIGMA),
  30. DAMO_SIGMA,
  31. ),
  32. DAMO_SIGMA,
  33. ).optimize()
  34. if not deterministic:
  35. denominators |= pynutil.delete("denominator: \"") + (pynini.accep("four") @ suffix) + pynutil.delete("\"")
  36. numerator_one = pynutil.delete("numerator: \"") + pynini.accep("one") + pynutil.delete("\" ")
  37. numerator_one = numerator_one + insert_space + denominators
  38. numerator_rest = (
  39. pynutil.delete("numerator: \"")
  40. + (pynini.closure(DAMO_NOT_QUOTE) - pynini.accep("one"))
  41. + pynutil.delete("\" ")
  42. )
  43. numerator_rest = numerator_rest + insert_space + denominators
  44. numerator_rest @= pynini.cdrewrite(
  45. plurals._priority_union(pynini.cross("half", "halves"), pynutil.insert("s"), DAMO_SIGMA),
  46. "",
  47. "[EOS]",
  48. DAMO_SIGMA,
  49. )
  50. graph = numerator_one | numerator_rest
  51. conjunction = pynutil.insert("and ")
  52. if not deterministic and not lm:
  53. conjunction = pynini.closure(conjunction, 0, 1)
  54. integer = pynini.closure(integer + insert_space + conjunction, 0, 1)
  55. graph = integer + graph
  56. graph @= pynini.cdrewrite(
  57. pynini.cross("and one half", "and a half") | pynini.cross("over ones", "over one"), "", "[EOS]", DAMO_SIGMA
  58. )
  59. self.graph = graph
  60. delete_tokens = self.delete_tokens(self.graph)
  61. self.fst = delete_tokens.optimize()