| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- from fun_text_processing.text_normalization.en.graph_utils import GraphFst
- from fun_text_processing.text_normalization.en.verbalizers.abbreviation import AbbreviationFst
- from fun_text_processing.text_normalization.en.verbalizers.cardinal import CardinalFst
- from fun_text_processing.text_normalization.en.verbalizers.date import DateFst
- from fun_text_processing.text_normalization.en.verbalizers.decimal import DecimalFst
- from fun_text_processing.text_normalization.en.verbalizers.electronic import ElectronicFst
- from fun_text_processing.text_normalization.en.verbalizers.fraction import FractionFst
- from fun_text_processing.text_normalization.en.verbalizers.measure import MeasureFst
- from fun_text_processing.text_normalization.en.verbalizers.money import MoneyFst
- from fun_text_processing.text_normalization.en.verbalizers.ordinal import OrdinalFst
- from fun_text_processing.text_normalization.en.verbalizers.roman import RomanFst
- from fun_text_processing.text_normalization.en.verbalizers.telephone import TelephoneFst
- from fun_text_processing.text_normalization.en.verbalizers.time import TimeFst
- from fun_text_processing.text_normalization.en.verbalizers.whitelist import WhiteListFst
- class VerbalizeFst(GraphFst):
- """
- Composes other verbalizer grammars.
- For deployment, this grammar will be compiled and exported to OpenFst Finate State Archiv (FAR) File.
- More details to deployment at NeMo/tools/text_processing_deployment.
- Args:
- deterministic: if True will provide a single transduction option,
- for False multiple options (used for audio-based normalization)
- """
- def __init__(self, deterministic: bool = True):
- super().__init__(name="verbalize", kind="verbalize", deterministic=deterministic)
- cardinal = CardinalFst(deterministic=deterministic)
- cardinal_graph = cardinal.fst
- decimal = DecimalFst(cardinal=cardinal, deterministic=deterministic)
- decimal_graph = decimal.fst
- ordinal = OrdinalFst(deterministic=deterministic)
- ordinal_graph = ordinal.fst
- fraction = FractionFst(deterministic=deterministic)
- fraction_graph = fraction.fst
- telephone_graph = TelephoneFst(deterministic=deterministic).fst
- electronic_graph = ElectronicFst(deterministic=deterministic).fst
- measure = MeasureFst(decimal=decimal, cardinal=cardinal, fraction=fraction, deterministic=deterministic)
- measure_graph = measure.fst
- time_graph = TimeFst(deterministic=deterministic).fst
- date_graph = DateFst(ordinal=ordinal, deterministic=deterministic).fst
- money_graph = MoneyFst(decimal=decimal, deterministic=deterministic).fst
- whitelist_graph = WhiteListFst(deterministic=deterministic).fst
- graph = (
- time_graph
- | date_graph
- | money_graph
- | measure_graph
- | ordinal_graph
- | decimal_graph
- | cardinal_graph
- | telephone_graph
- | electronic_graph
- | fraction_graph
- | whitelist_graph
- )
- roman_graph = RomanFst(deterministic=deterministic).fst
- graph |= roman_graph
- if not deterministic:
- abbreviation_graph = AbbreviationFst(deterministic=deterministic).fst
- graph |= abbreviation_graph
- self.fst = graph
|