test_rtf.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import time
  2. import sys
  3. import librosa
  4. backend=sys.argv[1]
  5. model_dir=sys.argv[2]
  6. wav_file=sys.argv[3]
  7. from torch_paraformer import Paraformer
  8. if backend == "onnxruntime":
  9. from rapid_paraformer import Paraformer
  10. model = Paraformer(model_dir, batch_size=1, device_id="-1")
  11. wav_file_f = open(wav_file, 'r')
  12. wav_files = wav_file_f.readlines()
  13. # warm-up
  14. total = 0.0
  15. num = 100
  16. wav_path = wav_files[0].split("\t")[1].strip() if "\t" in wav_files[0] else wav_files[0].split(" ")[1].strip()
  17. for i in range(num):
  18. beg_time = time.time()
  19. result = model(wav_path)
  20. end_time = time.time()
  21. duration = end_time-beg_time
  22. total += duration
  23. print(result)
  24. print("num: {}, time, {}, avg: {}, rtf: {}".format(len(wav_path), duration, total/(i+1), (total/(i+1))/5.53))
  25. # infer time
  26. beg_time = time.time()
  27. for i, wav_path_i in enumerate(wav_files):
  28. wav_path = wav_path_i.split("\t")[1].strip() if "\t" in wav_path_i else wav_path_i.split(" ")[1].strip()
  29. result = model(wav_path)
  30. end_time = time.time()
  31. duration = (end_time-beg_time)*1000
  32. print("total_time_comput_ms: {}".format(int(duration)))
  33. duration_time = 0.0
  34. for i, wav_path_i in enumerate(wav_files):
  35. wav_path = wav_path_i.split("\t")[1].strip() if "\t" in wav_path_i else wav_path_i.split(" ")[1].strip()
  36. waveform, _ = librosa.load(wav_path, sr=16000)
  37. duration_time += len(waveform)/16.0
  38. print("total_time_wav_ms: {}".format(int(duration_time)))
  39. print("total_rtf: {:.5}".format(duration/duration_time))