npy_scp.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. import collections.abc
  2. from pathlib import Path
  3. from typing import Union
  4. import numpy as np
  5. from funasr.fileio.read_text import read_2column_text
  6. class NpyScpWriter:
  7. """Writer class for a scp file of numpy file.
  8. Examples:
  9. key1 /some/path/a.npy
  10. key2 /some/path/b.npy
  11. key3 /some/path/c.npy
  12. key4 /some/path/d.npy
  13. ...
  14. >>> writer = NpyScpWriter('./data/', './data/feat.scp')
  15. >>> writer['aa'] = numpy_array
  16. >>> writer['bb'] = numpy_array
  17. """
  18. def __init__(self, outdir: Union[Path, str], scpfile: Union[Path, str]):
  19. self.dir = Path(outdir)
  20. self.dir.mkdir(parents=True, exist_ok=True)
  21. scpfile = Path(scpfile)
  22. scpfile.parent.mkdir(parents=True, exist_ok=True)
  23. self.fscp = scpfile.open("w", encoding="utf-8")
  24. self.data = {}
  25. def get_path(self, key):
  26. return self.data[key]
  27. def __setitem__(self, key, value):
  28. assert isinstance(value, np.ndarray), type(value)
  29. p = self.dir / f"{key}.npy"
  30. p.parent.mkdir(parents=True, exist_ok=True)
  31. np.save(str(p), value)
  32. self.fscp.write(f"{key} {p}\n")
  33. # Store the file path
  34. self.data[key] = str(p)
  35. def __enter__(self):
  36. return self
  37. def __exit__(self, exc_type, exc_val, exc_tb):
  38. self.close()
  39. def close(self):
  40. self.fscp.close()
  41. class NpyScpReader(collections.abc.Mapping):
  42. """Reader class for a scp file of numpy file.
  43. Examples:
  44. key1 /some/path/a.npy
  45. key2 /some/path/b.npy
  46. key3 /some/path/c.npy
  47. key4 /some/path/d.npy
  48. ...
  49. >>> reader = NpyScpReader('npy.scp')
  50. >>> array = reader['key1']
  51. """
  52. def __init__(self, fname: Union[Path, str]):
  53. self.fname = Path(fname)
  54. self.data = read_2column_text(fname)
  55. def get_path(self, key):
  56. return self.data[key]
  57. def __getitem__(self, key) -> np.ndarray:
  58. p = self.data[key]
  59. return np.load(p)
  60. def __contains__(self, item):
  61. return item
  62. def __len__(self):
  63. return len(self.data)
  64. def __iter__(self):
  65. return iter(self.data)
  66. def keys(self):
  67. return self.data.keys()