| 1234567891011121314151617181920212223242526272829303132333435363738 |
- import os
- import pynini
- from fun_text_processing.text_normalization.zh.graph_utils import GraphFst, delete_space, generator_main
- from fun_text_processing.text_normalization.zh.verbalizers.postprocessor import PostProcessor
- from fun_text_processing.text_normalization.zh.verbalizers.verbalize import VerbalizeFst
- from pynini.lib import pynutil
- # import logging
- class VerbalizeFinalFst(GraphFst):
- """
- """
- def __init__(self, deterministic: bool = True, cache_dir: str = None, overwrite_cache: bool = False):
- super().__init__(name="verbalize_final", kind="verbalize", deterministic=deterministic)
- far_file = None
- if cache_dir is not None and cache_dir != "None":
- os.makedirs(cache_dir, exist_ok=True)
- far_file = os.path.join(cache_dir, f"zh_tn_{deterministic}_deterministic_verbalizer.far")
- if not overwrite_cache and far_file and os.path.exists(far_file):
- self.fst = pynini.Far(far_file, mode="r")["verbalize"]
- else:
- token_graph = VerbalizeFst(deterministic=deterministic)
- token_verbalizer = (
- pynutil.delete("tokens {") + delete_space + token_graph.fst + delete_space + pynutil.delete(" }")
- )
- verbalizer = pynini.closure(delete_space + token_verbalizer + delete_space)
- postprocessor = PostProcessor(remove_puncts=False, to_upper=False, to_lower=False, tag_oov=False,)
- self.fst = (verbalizer @ postprocessor.fst).optimize()
- if far_file:
- generator_main(far_file, {"verbalize": self.fst})
|