load_pretrained_model.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. from typing import Any
  2. from typing import Dict
  3. from typing import Union
  4. from io import BytesIO
  5. import logging
  6. import torch
  7. import torch.nn
  8. import torch.optim
  9. def filter_state_dict(
  10. dst_state: Dict[str, Union[float, torch.Tensor]],
  11. src_state: Dict[str, Union[float, torch.Tensor]],
  12. ):
  13. """Filter name, size mismatch instances between dicts.
  14. Args:
  15. dst_state: reference state dict for filtering
  16. src_state: target state dict for filtering
  17. """
  18. match_state = {}
  19. for key, value in src_state.items():
  20. if key in dst_state and (dst_state[key].size() == src_state[key].size()):
  21. match_state[key] = value
  22. else:
  23. if key not in dst_state:
  24. logging.warning(
  25. f"Filter out {key} from pretrained dict"
  26. + " because of name not found in target dict"
  27. )
  28. else:
  29. logging.warning(
  30. f"Filter out {key} from pretrained dict"
  31. + " because of size mismatch"
  32. + f"({dst_state[key].size()}-{src_state[key].size()})"
  33. )
  34. return match_state
  35. def load_pretrained_model(
  36. init_param: str,
  37. model: torch.nn.Module,
  38. ignore_init_mismatch: bool,
  39. map_location: str = "cpu",
  40. oss_bucket=None,
  41. ):
  42. """Load a model state and set it to the model.
  43. Args:
  44. init_param: <file_path>:<src_key>:<dst_key>:<exclude_Keys>
  45. Examples:
  46. >>> load_pretrained_model("somewhere/model.pb", model)
  47. >>> load_pretrained_model("somewhere/model.pb:decoder:decoder", model)
  48. >>> load_pretrained_model("somewhere/model.pb:decoder:decoder:", model)
  49. >>> load_pretrained_model(
  50. ... "somewhere/model.pb:decoder:decoder:decoder.embed", model
  51. ... )
  52. >>> load_pretrained_model("somewhere/decoder.pb::decoder", model)
  53. """
  54. sps = init_param.split(":", 4)
  55. if len(sps) == 4:
  56. path, src_key, dst_key, excludes = sps
  57. elif len(sps) == 3:
  58. path, src_key, dst_key = sps
  59. excludes = None
  60. elif len(sps) == 2:
  61. path, src_key = sps
  62. dst_key, excludes = None, None
  63. else:
  64. (path,) = sps
  65. src_key, dst_key, excludes = None, None, None
  66. if src_key == "":
  67. src_key = None
  68. if dst_key == "":
  69. dst_key = None
  70. if dst_key is None:
  71. obj = model
  72. else:
  73. def get_attr(obj: Any, key: str):
  74. """Get an nested attribute.
  75. >>> class A(torch.nn.Module):
  76. ... def __init__(self):
  77. ... super().__init__()
  78. ... self.linear = torch.nn.Linear(10, 10)
  79. >>> a = A()
  80. >>> assert A.linear.weight is get_attr(A, 'linear.weight')
  81. """
  82. if key.strip() == "":
  83. return obj
  84. for k in key.split("."):
  85. obj = getattr(obj, k)
  86. return obj
  87. obj = get_attr(model, dst_key)
  88. if oss_bucket is None:
  89. src_state = torch.load(path, map_location=map_location)
  90. else:
  91. buffer = BytesIO(oss_bucket.get_object(path).read())
  92. src_state = torch.load(buffer, map_location=map_location)
  93. if excludes is not None:
  94. for e in excludes.split(","):
  95. src_state = {k: v for k, v in src_state.items() if not k.startswith(e)}
  96. if src_key is not None:
  97. src_state = {
  98. k[len(src_key) + 1 :]: v
  99. for k, v in src_state.items()
  100. if k.startswith(src_key)
  101. }
  102. dst_state = obj.state_dict()
  103. if ignore_init_mismatch:
  104. src_state = filter_state_dict(dst_state, src_state)
  105. logging.info("Loaded src_state keys: {}".format(src_state.keys()))
  106. dst_state.update(src_state)
  107. obj.load_state_dict(dst_state)