money.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. import pynini
  2. from fun_text_processing.text_normalization.en.graph_utils import DAMO_NOT_QUOTE, GraphFst, delete_preserve_order
  3. from pynini.lib import pynutil
  4. class MoneyFst(GraphFst):
  5. """
  6. Finite state transducer for verbalizing money, e.g.
  7. money { currency_maj: "euro" integer_part: "ein"} -> "ein euro"
  8. money { currency_maj: "euro" integer_part: "eins" fractional_part: "null null eins"} -> "eins komma null null eins euro"
  9. money { integer_part: "ein" currency_maj: "pfund" fractional_part: "vierzig" preserve_order: true} -> "ein pfund vierzig"
  10. money { integer_part: "ein" currency_maj: "pfund" fractional_part: "vierzig" currency_min: "pence" preserve_order: true} -> "ein pfund vierzig pence"
  11. money { fractional_part: "ein" currency_min: "penny" preserve_order: true} -> "ein penny"
  12. money { currency_maj: "pfund" integer_part: "null" fractional_part: "null eins" quantity: "million"} -> "null komma null eins million pfund"
  13. Args:
  14. decimal: GraphFst
  15. deterministic: if True will provide a single transduction option,
  16. for False multiple transduction are generated (used for audio-based normalization)
  17. """
  18. def __init__(self, decimal: GraphFst, deterministic: bool = True):
  19. super().__init__(name="money", kind="verbalize", deterministic=deterministic)
  20. keep_space = pynini.accep(" ")
  21. maj = pynutil.delete("currency_maj: \"") + pynini.closure(DAMO_NOT_QUOTE, 1) + pynutil.delete("\"")
  22. min = pynutil.delete("currency_min: \"") + pynini.closure(DAMO_NOT_QUOTE, 1) + pynutil.delete("\"")
  23. fractional_part = (
  24. pynutil.delete("fractional_part: \"") + pynini.closure(DAMO_NOT_QUOTE, 1) + pynutil.delete("\"")
  25. )
  26. integer_part = pynutil.delete("integer_part: \"") + pynini.closure(DAMO_NOT_QUOTE, 1) + pynutil.delete("\"")
  27. optional_add_and = pynini.closure(pynutil.insert("und "), 0, 1)
  28. # *** currency_maj
  29. graph_integer = integer_part + keep_space + maj
  30. # *** currency_maj + (***) | ((und) *** current_min)
  31. graph_integer_with_minor = (
  32. integer_part
  33. + keep_space
  34. + maj
  35. + keep_space
  36. + (fractional_part | (optional_add_and + fractional_part + keep_space + min))
  37. + delete_preserve_order
  38. )
  39. # *** komma *** currency_maj
  40. graph_decimal = decimal.fst + keep_space + maj
  41. # *** current_min
  42. graph_minor = fractional_part + keep_space + min + delete_preserve_order
  43. graph = graph_integer | graph_integer_with_minor | graph_decimal | graph_minor
  44. delete_tokens = self.delete_tokens(graph)
  45. self.fst = delete_tokens.optimize()