|
|
@@ -619,6 +619,22 @@ def inference_paraformer_vad_punc(
|
|
|
sorted_data = sorted(data_with_index, key=lambda x: x[0][1] - x[0][0])
|
|
|
results_sorted = []
|
|
|
|
|
|
+ if not len(sorted_data):
|
|
|
+ key = keys[0]
|
|
|
+ # no active segments after VAD
|
|
|
+ if writer is not None:
|
|
|
+ # Write empty results
|
|
|
+ ibest_writer["token"][key] = ""
|
|
|
+ ibest_writer["token_int"][key] = ""
|
|
|
+ ibest_writer["vad"][key] = ""
|
|
|
+ ibest_writer["text"][key] = ""
|
|
|
+ ibest_writer["text_with_punc"][key] = ""
|
|
|
+ if use_timestamp:
|
|
|
+ ibest_writer["time_stamp"][key] = ""
|
|
|
+
|
|
|
+ logging.info("decoding, utt: {}, empty speech".format(key))
|
|
|
+ continue
|
|
|
+
|
|
|
batch_size_token_ms = batch_size_token*60
|
|
|
if speech2text.device == "cpu":
|
|
|
batch_size_token_ms = 0
|