| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- # Copyright (c) Alibaba, Inc. and its affiliates.
- import os
- import shutil
- import ssl
- import nltk
- # mkdir nltk_data dir if not exist
- try:
- nltk.data.find('.')
- except LookupError:
- dir_list = nltk.data.path
- for dir_item in dir_list:
- if not os.path.exists(dir_item):
- os.mkdir(dir_item)
- if os.path.exists(dir_item):
- break
- # download one package if nltk_data not exist
- try:
- nltk.data.find('.')
- except: # noqa: *
- try:
- _create_unverified_https_context = ssl._create_unverified_context
- except AttributeError:
- pass
- else:
- ssl._create_default_https_context = _create_unverified_https_context
- nltk.download('cmudict', halt_on_error=False, raise_on_error=True)
- # deploy taggers/averaged_perceptron_tagger
- try:
- nltk.data.find('taggers/averaged_perceptron_tagger')
- except: # noqa: *
- data_dir = nltk.data.find('.')
- target_dir = os.path.join(data_dir, 'taggers')
- if not os.path.exists(target_dir):
- os.mkdir(target_dir)
- src_file = os.path.join(os.path.dirname(__file__), '..', 'nltk_packages',
- 'averaged_perceptron_tagger.zip')
- shutil.copyfile(src_file,
- os.path.join(target_dir, 'averaged_perceptron_tagger.zip'))
- shutil._unpack_zipfile(
- os.path.join(target_dir, 'averaged_perceptron_tagger.zip'), target_dir)
- # deploy corpora/cmudict
- try:
- nltk.data.find('corpora/cmudict')
- except: # noqa: *
- data_dir = nltk.data.find('.')
- target_dir = os.path.join(data_dir, 'corpora')
- if not os.path.exists(target_dir):
- os.mkdir(target_dir)
- src_file = os.path.join(os.path.dirname(__file__), '..', 'nltk_packages',
- 'cmudict.zip')
- shutil.copyfile(src_file, os.path.join(target_dir, 'cmudict.zip'))
- shutil._unpack_zipfile(os.path.join(target_dir, 'cmudict.zip'), target_dir)
- try:
- nltk.data.find('taggers/averaged_perceptron_tagger')
- except: # noqa: *
- try:
- _create_unverified_https_context = ssl._create_unverified_context
- except AttributeError:
- pass
- else:
- ssl._create_default_https_context = _create_unverified_https_context
- nltk.download('averaged_perceptron_tagger',
- halt_on_error=False,
- raise_on_error=True)
- try:
- nltk.data.find('corpora/cmudict')
- except: # noqa: *
- try:
- _create_unverified_https_context = ssl._create_unverified_context
- except AttributeError:
- pass
- else:
- ssl._create_default_https_context = _create_unverified_https_context
- nltk.download('cmudict', halt_on_error=False, raise_on_error=True)
|