| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- """Python implementation of ASCII85/ASCIIHex decoder (Adobe version).
- This code is in the public domain.
- """
- import re
- import struct
- # ascii85decode(data)
- def ascii85decode(data: bytes) -> bytes:
- """In ASCII85 encoding, every four bytes are encoded with five ASCII
- letters, using 85 different types of characters (as 256**4 < 85**5).
- When the length of the original bytes is not a multiple of 4, a special
- rule is used for round up.
- The Adobe's ASCII85 implementation is slightly different from
- its original in handling the last characters.
- """
- n = b = 0
- out = b""
- for i in iter(data):
- c = bytes((i,))
- if c >= b"!" and c <= b"u":
- n += 1
- b = b * 85 + (ord(c) - 33)
- if n == 5:
- out += struct.pack(">L", b)
- n = b = 0
- elif c == b"z":
- assert n == 0, str(n)
- out += b"\0\0\0\0"
- elif c == b"~":
- if n:
- for _ in range(5 - n):
- b = b * 85 + 84
- out += struct.pack(">L", b)[: n - 1]
- break
- return out
- # asciihexdecode(data)
- hex_re = re.compile(rb"([a-f\d]{2})", re.IGNORECASE)
- trail_re = re.compile(rb"^(?:[a-f\d]{2}|\s)*([a-f\d])[\s>]*$", re.IGNORECASE)
- def asciihexdecode(data: bytes) -> bytes:
- """ASCIIHexDecode filter: PDFReference v1.4 section 3.3.1
- For each pair of ASCII hexadecimal digits (0-9 and A-F or a-f), the
- ASCIIHexDecode filter produces one byte of binary data. All white-space
- characters are ignored. A right angle bracket character (>) indicates
- EOD. Any other characters will cause an error. If the filter encounters
- the EOD marker after reading an odd number of hexadecimal digits, it
- will behave as if a 0 followed the last digit.
- """
- def decode(x: bytes) -> bytes:
- i = int(x, 16)
- return bytes((i,))
- out = b""
- for x in hex_re.findall(data):
- out += decode(x)
- m = trail_re.search(data)
- if m:
- out += decode(m.group(1) + b"0")
- return out
|