text_preprocessor.py 444 B

123456789101112
  1. def split_to_mini_sentence(words: list, word_limit: int = 20):
  2. assert word_limit > 1
  3. if len(words) <= word_limit:
  4. return [words]
  5. sentences = []
  6. length = len(words)
  7. sentence_len = length // word_limit
  8. for i in range(sentence_len):
  9. sentences.append(words[i * word_limit:(i + 1) * word_limit])
  10. if length % word_limit > 0:
  11. sentences.append(words[sentence_len * word_limit:])
  12. return sentences