|
|
@@ -581,7 +581,22 @@ def inference_modelscope(
|
|
|
speech2text = Speech2TextExport(**speech2text_kwargs)
|
|
|
else:
|
|
|
speech2text = Speech2Text(**speech2text_kwargs)
|
|
|
-
|
|
|
+
|
|
|
+ def _load_bytes(input):
|
|
|
+ middle_data = np.frombuffer(input, dtype=np.int16)
|
|
|
+ middle_data = np.asarray(middle_data)
|
|
|
+ if middle_data.dtype.kind not in 'iu':
|
|
|
+ raise TypeError("'middle_data' must be an array of integers")
|
|
|
+ dtype = np.dtype('float32')
|
|
|
+ if dtype.kind != 'f':
|
|
|
+ raise TypeError("'dtype' must be a floating point type")
|
|
|
+
|
|
|
+ i = np.iinfo(middle_data.dtype)
|
|
|
+ abs_max = 2 ** (i.bits - 1)
|
|
|
+ offset = i.min + abs_max
|
|
|
+ array = np.frombuffer((middle_data.astype(dtype) - offset) / abs_max, dtype=np.float32)
|
|
|
+ return array
|
|
|
+
|
|
|
def _forward(
|
|
|
data_path_and_name_and_type,
|
|
|
raw_inputs: Union[np.ndarray, torch.Tensor] = None,
|
|
|
@@ -592,6 +607,9 @@ def inference_modelscope(
|
|
|
):
|
|
|
|
|
|
# 3. Build data-iterator
|
|
|
+ if data_path_and_name_and_type is not None and data_path_and_name_and_type[2] == "bytes":
|
|
|
+ raw_inputs = _load_bytes(data_path_and_name_and_type[0])
|
|
|
+ raw_inputs = torch.tensor(raw_inputs)
|
|
|
if data_path_and_name_and_type is None and raw_inputs is not None:
|
|
|
if isinstance(raw_inputs, np.ndarray):
|
|
|
raw_inputs = torch.tensor(raw_inputs)
|