ascii85.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. """Python implementation of ASCII85/ASCIIHex decoder (Adobe version).
  2. This code is in the public domain.
  3. """
  4. import re
  5. import struct
  6. # ascii85decode(data)
  7. def ascii85decode(data: bytes) -> bytes:
  8. """In ASCII85 encoding, every four bytes are encoded with five ASCII
  9. letters, using 85 different types of characters (as 256**4 < 85**5).
  10. When the length of the original bytes is not a multiple of 4, a special
  11. rule is used for round up.
  12. The Adobe's ASCII85 implementation is slightly different from
  13. its original in handling the last characters.
  14. """
  15. n = b = 0
  16. out = b""
  17. for i in iter(data):
  18. c = bytes((i,))
  19. if c >= b"!" and c <= b"u":
  20. n += 1
  21. b = b * 85 + (ord(c) - 33)
  22. if n == 5:
  23. out += struct.pack(">L", b)
  24. n = b = 0
  25. elif c == b"z":
  26. assert n == 0, str(n)
  27. out += b"\0\0\0\0"
  28. elif c == b"~":
  29. if n:
  30. for _ in range(5 - n):
  31. b = b * 85 + 84
  32. out += struct.pack(">L", b)[: n - 1]
  33. break
  34. return out
  35. # asciihexdecode(data)
  36. hex_re = re.compile(rb"([a-f\d]{2})", re.IGNORECASE)
  37. trail_re = re.compile(rb"^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$", re.IGNORECASE)
  38. def asciihexdecode(data: bytes) -> bytes:
  39. """ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
  40. For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
  41. ASCIIHexDecode filter produces one byte of binary data. All white-space
  42. characters are ignored. A right angle bracket character (>) indicates
  43. EOD. Any other characters will cause an error. If the filter encounters
  44. the EOD marker after reading an odd number of hexadecimal digits, it
  45. will behave as if a 0 followed the last digit.
  46. """
  47. def decode(x: bytes) -> bytes:
  48. i = int(x, 16)
  49. return bytes((i,))
  50. out = b""
  51. for x in hex_re.findall(data):
  52. out += decode(x)
  53. m = trail_re.search(data)
  54. if m:
  55. out += decode(m.group(1) + b"0")
  56. return out