| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015 |
- # -*- coding: utf-8 -*-
- import base64
- import re
- import zlib
- from collections import namedtuple
- from . import exceptions
- from .snippets import findall_regex, split_by_regex
- header = namedtuple(
- 'header',
- 'index_path old_path old_version new_path new_version',
- )
- diffobj = namedtuple('diffobj', 'header changes text')
- Change = namedtuple('Change', 'old new line hunk')
- file_timestamp_str = '(.+?)(?:\t|:| +)(.*)'
- # .+? was previously [^:\t\n\r\f\v]+
- # general diff regex
- diffcmd_header = re.compile('^diff.* (.+) (.+)$')
- unified_header_index = re.compile('^Index: (.+)$')
- unified_header_old_line = re.compile(r'^--- ' + file_timestamp_str + '$')
- unified_header_new_line = re.compile(r'^\+\+\+ ' + file_timestamp_str + '$')
- unified_hunk_start = re.compile(r'^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@(.*)$')
- unified_change = re.compile('^([-+ ])(.*)$')
- context_header_old_line = re.compile(r'^\*\*\* ' + file_timestamp_str + '$')
- context_header_new_line = re.compile('^--- ' + file_timestamp_str + '$')
- context_hunk_start = re.compile(r'^\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*$')
- context_hunk_old = re.compile(r'^\*\*\* (\d+),?(\d*) \*\*\*\*$')
- context_hunk_new = re.compile(r'^--- (\d+),?(\d*) ----$')
- context_change = re.compile('^([-+ !]) (.*)$')
- ed_hunk_start = re.compile(r'^(\d+),?(\d*)([acd])$')
- ed_hunk_end = re.compile('^.$')
- # much like forward ed, but no 'c' type
- rcs_ed_hunk_start = re.compile(r'^([ad])(\d+) ?(\d*)$')
- default_hunk_start = re.compile(r'^(\d+),?(\d*)([acd])(\d+),?(\d*)$')
- default_hunk_mid = re.compile('^---$')
- default_change = re.compile('^([><]) (.*)$')
- # Headers
- # git has a special index header and no end part
- git_diffcmd_header = re.compile('^diff --git a/(.+) b/(.+)$')
- git_header_index = re.compile(r'^index ([a-f0-9]+)..([a-f0-9]+) ?(\d*)$')
- git_header_old_line = re.compile('^--- (.+)$')
- git_header_new_line = re.compile(r'^\+\+\+ (.+)$')
- git_header_file_mode = re.compile(r'^(new|deleted) file mode \d{6}$')
- git_header_binary_file = re.compile('^Binary files (.+) and (.+) differ')
- git_binary_patch_start = re.compile(r'^GIT binary patch$')
- git_binary_literal_start = re.compile(r'^literal (\d+)$')
- git_binary_delta_start = re.compile(r'^delta (\d+)$')
- base85string = re.compile(r'^[0-9A-Za-z!#$%&()*+;<=>?@^_`{|}~-]+$')
- bzr_header_index = re.compile('=== (.+)')
- bzr_header_old_line = unified_header_old_line
- bzr_header_new_line = unified_header_new_line
- svn_header_index = unified_header_index
- svn_header_timestamp_version = re.compile(r'\((?:working copy|revision (\d+))\)')
- svn_header_timestamp = re.compile(r'.*(\(.*\))$')
- cvs_header_index = unified_header_index
- cvs_header_rcs = re.compile(r'^RCS file: (.+)(?:,\w{1}$|$)')
- cvs_header_timestamp = re.compile(r'(.+)\t([\d.]+)')
- cvs_header_timestamp_colon = re.compile(r':([\d.]+)\t(.+)')
- old_cvs_diffcmd_header = re.compile('^diff.* (.+):(.*) (.+):(.*)$')
- def parse_patch(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- # maybe use this to nuke all of those line endings?
- # lines = [x.splitlines()[0] for x in lines]
- lines = [x if len(x) == 0 else x.splitlines()[0] for x in lines]
- check = [
- unified_header_index,
- diffcmd_header,
- cvs_header_rcs,
- git_header_index,
- context_header_old_line,
- unified_header_old_line,
- ]
- diffs = []
- for c in check:
- diffs = split_by_regex(lines, c)
- if len(diffs) > 1:
- break
- for diff in diffs:
- difftext = '\n'.join(diff) + '\n'
- h = parse_header(diff)
- d = parse_diff(diff)
- if h or d:
- yield diffobj(header=h, changes=d, text=difftext)
- def parse_header(text):
- h = parse_scm_header(text)
- if h is None:
- h = parse_diff_header(text)
- return h
- def parse_scm_header(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- check = [
- (git_header_index, parse_git_header),
- (old_cvs_diffcmd_header, parse_cvs_header),
- (cvs_header_rcs, parse_cvs_header),
- (svn_header_index, parse_svn_header),
- ]
- for regex, parser in check:
- diffs = findall_regex(lines, regex)
- if len(diffs) > 0:
- git_opt = findall_regex(lines, git_diffcmd_header)
- if len(git_opt) > 0:
- res = parser(lines)
- if res:
- old_path = res.old_path
- new_path = res.new_path
- if old_path.startswith('a/'):
- old_path = old_path[2:]
- if new_path.startswith('b/'):
- new_path = new_path[2:]
- return header(
- index_path=res.index_path,
- old_path=old_path,
- old_version=res.old_version,
- new_path=new_path,
- new_version=res.new_version,
- )
- else:
- res = parser(lines)
- return res
- return None
- def parse_diff_header(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- check = [
- (unified_header_new_line, parse_unified_header),
- (context_header_old_line, parse_context_header),
- (diffcmd_header, parse_diffcmd_header),
- # TODO:
- # git_header can handle version-less unified headers, but
- # will trim a/ and b/ in the paths if they exist...
- (git_header_new_line, parse_git_header),
- ]
- for regex, parser in check:
- diffs = findall_regex(lines, regex)
- if len(diffs) > 0:
- return parser(lines)
- return None # no header?
- def parse_diff(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- check = [
- (unified_hunk_start, parse_unified_diff),
- (context_hunk_start, parse_context_diff),
- (default_hunk_start, parse_default_diff),
- (ed_hunk_start, parse_ed_diff),
- (rcs_ed_hunk_start, parse_rcs_ed_diff),
- (git_binary_patch_start, parse_git_binary_diff),
- ]
- for hunk, parser in check:
- diffs = findall_regex(lines, hunk)
- if len(diffs) > 0:
- return parser(lines)
- return None
- def parse_git_header(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- old_version = None
- new_version = None
- old_path = None
- new_path = None
- cmd_old_path = None
- cmd_new_path = None
- for line in lines:
- hm = git_diffcmd_header.match(line)
- if hm:
- cmd_old_path = hm.group(1)
- cmd_new_path = hm.group(2)
- continue
- g = git_header_index.match(line)
- if g:
- old_version = g.group(1)
- new_version = g.group(2)
- continue
- # git always has it's own special headers
- o = git_header_old_line.match(line)
- if o:
- old_path = o.group(1)
- n = git_header_new_line.match(line)
- if n:
- new_path = n.group(1)
- binary = git_header_binary_file.match(line)
- if binary:
- old_path = binary.group(1)
- new_path = binary.group(2)
- if old_path and new_path:
- if old_path.startswith('a/'):
- old_path = old_path[2:]
- if new_path.startswith('b/'):
- new_path = new_path[2:]
- return header(
- index_path=None,
- old_path=old_path,
- old_version=old_version,
- new_path=new_path,
- new_version=new_version,
- )
- # if we go through all of the text without finding our normal info,
- # use the cmd if available
- if cmd_old_path and cmd_new_path and old_version and new_version:
- if cmd_old_path.startswith('a/'):
- cmd_old_path = cmd_old_path[2:]
- if cmd_new_path.startswith('b/'):
- cmd_new_path = cmd_new_path[2:]
- return header(
- index_path=None,
- # wow, I kind of hate this:
- # assume /dev/null if the versions are zeroed out
- old_path='/dev/null' if old_version == '0000000' else cmd_old_path,
- old_version=old_version,
- new_path='/dev/null' if new_version == '0000000' else cmd_new_path,
- new_version=new_version,
- )
- return None
- def parse_svn_header(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- headers = findall_regex(lines, svn_header_index)
- if len(headers) == 0:
- return None
- while len(lines) > 0:
- i = svn_header_index.match(lines[0])
- del lines[0]
- if not i:
- continue
- diff_header = parse_diff_header(lines)
- if not diff_header:
- return header(
- index_path=i.group(1),
- old_path=i.group(1),
- old_version=None,
- new_path=i.group(1),
- new_version=None,
- )
- opath = diff_header.old_path
- over = diff_header.old_version
- if over:
- oend = svn_header_timestamp_version.match(over)
- if oend and oend.group(1):
- over = int(oend.group(1))
- elif opath:
- ts = svn_header_timestamp.match(opath)
- if ts:
- opath = opath[: -len(ts.group(1))]
- oend = svn_header_timestamp_version.match(ts.group(1))
- if oend and oend.group(1):
- over = int(oend.group(1))
- npath = diff_header.new_path
- nver = diff_header.new_version
- if nver:
- nend = svn_header_timestamp_version.match(diff_header.new_version)
- if nend and nend.group(1):
- nver = int(nend.group(1))
- elif npath:
- ts = svn_header_timestamp.match(npath)
- if ts:
- npath = npath[: -len(ts.group(1))]
- nend = svn_header_timestamp_version.match(ts.group(1))
- if nend and nend.group(1):
- nver = int(nend.group(1))
- if not isinstance(over, int):
- over = None
- if not isinstance(nver, int):
- nver = None
- return header(
- index_path=i.group(1),
- old_path=opath,
- old_version=over,
- new_path=npath,
- new_version=nver,
- )
- return None
- def parse_cvs_header(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- headers = findall_regex(lines, cvs_header_rcs)
- headers_old = findall_regex(lines, old_cvs_diffcmd_header)
- if headers:
- # parse rcs style headers
- while len(lines) > 0:
- i = cvs_header_index.match(lines[0])
- del lines[0]
- if not i:
- continue
- diff_header = parse_diff_header(lines)
- if diff_header:
- over = diff_header.old_version
- if over:
- oend = cvs_header_timestamp.match(over)
- oend_c = cvs_header_timestamp_colon.match(over)
- if oend:
- over = oend.group(2)
- elif oend_c:
- over = oend_c.group(1)
- nver = diff_header.new_version
- if nver:
- nend = cvs_header_timestamp.match(nver)
- nend_c = cvs_header_timestamp_colon.match(nver)
- if nend:
- nver = nend.group(2)
- elif nend_c:
- nver = nend_c.group(1)
- return header(
- index_path=i.group(1),
- old_path=diff_header.old_path,
- old_version=over,
- new_path=diff_header.new_path,
- new_version=nver,
- )
- return header(
- index_path=i.group(1),
- old_path=i.group(1),
- old_version=None,
- new_path=i.group(1),
- new_version=None,
- )
- elif headers_old:
- # parse old style headers
- while len(lines) > 0:
- i = cvs_header_index.match(lines[0])
- del lines[0]
- if not i:
- continue
- d = old_cvs_diffcmd_header.match(lines[0])
- if not d:
- return header(
- index_path=i.group(1),
- old_path=i.group(1),
- old_version=None,
- new_path=i.group(1),
- new_version=None,
- )
- # will get rid of the useless stuff for us
- parse_diff_header(lines)
- over = d.group(2) if d.group(2) else None
- nver = d.group(4) if d.group(4) else None
- return header(
- index_path=i.group(1),
- old_path=d.group(1),
- old_version=over,
- new_path=d.group(3),
- new_version=nver,
- )
- return None
- def parse_diffcmd_header(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- headers = findall_regex(lines, diffcmd_header)
- if len(headers) == 0:
- return None
- while len(lines) > 0:
- d = diffcmd_header.match(lines[0])
- del lines[0]
- if d:
- return header(
- index_path=None,
- old_path=d.group(1),
- old_version=None,
- new_path=d.group(2),
- new_version=None,
- )
- return None
- def parse_unified_header(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- headers = findall_regex(lines, unified_header_new_line)
- if len(headers) == 0:
- return None
- while len(lines) > 1:
- o = unified_header_old_line.match(lines[0])
- del lines[0]
- if o:
- n = unified_header_new_line.match(lines[0])
- del lines[0]
- if n:
- over = o.group(2)
- if len(over) == 0:
- over = None
- nver = n.group(2)
- if len(nver) == 0:
- nver = None
- return header(
- index_path=None,
- old_path=o.group(1),
- old_version=over,
- new_path=n.group(1),
- new_version=nver,
- )
- return None
- def parse_context_header(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- headers = findall_regex(lines, context_header_old_line)
- if len(headers) == 0:
- return None
- while len(lines) > 1:
- o = context_header_old_line.match(lines[0])
- del lines[0]
- if o:
- n = context_header_new_line.match(lines[0])
- del lines[0]
- if n:
- over = o.group(2)
- if len(over) == 0:
- over = None
- nver = n.group(2)
- if len(nver) == 0:
- nver = None
- return header(
- index_path=None,
- old_path=o.group(1),
- old_version=over,
- new_path=n.group(1),
- new_version=nver,
- )
- return None
- def parse_default_diff(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- old = 0
- new = 0
- old_len = 0
- new_len = 0
- r = 0
- i = 0
- changes = list()
- hunks = split_by_regex(lines, default_hunk_start)
- for hunk_n, hunk in enumerate(hunks):
- if not len(hunk):
- continue
- r = 0
- i = 0
- while len(hunk) > 0:
- h = default_hunk_start.match(hunk[0])
- c = default_change.match(hunk[0])
- del hunk[0]
- if h:
- old = int(h.group(1))
- if len(h.group(2)) > 0:
- old_len = int(h.group(2)) - old + 1
- else:
- old_len = 0
- new = int(h.group(4))
- if len(h.group(5)) > 0:
- new_len = int(h.group(5)) - new + 1
- else:
- new_len = 0
- elif c:
- kind = c.group(1)
- line = c.group(2)
- if kind == '<' and (r != old_len or r == 0):
- changes.append(Change(old + r, None, line, hunk_n))
- r += 1
- elif kind == '>' and (i != new_len or i == 0):
- changes.append(Change(None, new + i, line, hunk_n))
- i += 1
- if len(changes) > 0:
- return changes
- return None
- def parse_unified_diff(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- old = 0
- new = 0
- r = 0
- i = 0
- old_len = 0
- new_len = 0
- changes = list()
- hunks = split_by_regex(lines, unified_hunk_start)
- for hunk_n, hunk in enumerate(hunks):
- # reset counters
- r = 0
- i = 0
- while len(hunk) > 0:
- h = unified_hunk_start.match(hunk[0])
- del hunk[0]
- if h:
- old = int(h.group(1))
- if len(h.group(2)) > 0:
- old_len = int(h.group(2))
- else:
- old_len = 0
- new = int(h.group(3))
- if len(h.group(4)) > 0:
- new_len = int(h.group(4))
- else:
- new_len = 0
- h = None
- break
- for n in hunk:
- c = unified_change.match(n)
- if c:
- kind = c.group(1)
- line = c.group(2)
- if kind == '-' and (r != old_len or r == 0):
- changes.append(Change(old + r, None, line, hunk_n))
- r += 1
- elif kind == '+' and (i != new_len or i == 0):
- changes.append(Change(None, new + i, line, hunk_n))
- i += 1
- elif kind == ' ':
- if r != old_len and i != new_len:
- changes.append(Change(old + r, new + i, line, hunk_n))
- r += 1
- i += 1
- if len(changes) > 0:
- return changes
- return None
- def parse_context_diff(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- old = 0
- new = 0
- j = 0
- k = 0
- changes = list()
- hunks = split_by_regex(lines, context_hunk_start)
- for hunk_n, hunk in enumerate(hunks):
- if not len(hunk):
- continue
- j = 0
- k = 0
- parts = split_by_regex(hunk, context_hunk_new)
- if len(parts) != 2:
- raise exceptions.ParseException('Context diff invalid', hunk_n)
- old_hunk = parts[0]
- new_hunk = parts[1]
- while len(old_hunk) > 0:
- o = context_hunk_old.match(old_hunk[0])
- del old_hunk[0]
- if not o:
- continue
- old = int(o.group(1))
- old_len = int(o.group(2)) + 1 - old
- while len(new_hunk) > 0:
- n = context_hunk_new.match(new_hunk[0])
- del new_hunk[0]
- if not n:
- continue
- new = int(n.group(1))
- new_len = int(n.group(2)) + 1 - new
- break
- break
- # now have old and new set, can start processing?
- if len(old_hunk) > 0 and len(new_hunk) == 0:
- msg = 'Got unexpected change in removal hunk: '
- # only removes left?
- while len(old_hunk) > 0:
- c = context_change.match(old_hunk[0])
- del old_hunk[0]
- if not c:
- continue
- kind = c.group(1)
- line = c.group(2)
- if kind == '-' and (j != old_len or j == 0):
- changes.append(Change(old + j, None, line, hunk_n))
- j += 1
- elif kind == ' ' and (
- (j != old_len and k != new_len) or (j == 0 or k == 0)
- ):
- changes.append(Change(old + j, new + k, line, hunk_n))
- j += 1
- k += 1
- elif kind == '+' or kind == '!':
- raise exceptions.ParseException(msg + kind, hunk_n)
- continue
- if len(old_hunk) == 0 and len(new_hunk) > 0:
- msg = 'Got unexpected change in removal hunk: '
- # only insertions left?
- while len(new_hunk) > 0:
- c = context_change.match(new_hunk[0])
- del new_hunk[0]
- if not c:
- continue
- kind = c.group(1)
- line = c.group(2)
- if kind == '+' and (k != new_len or k == 0):
- changes.append(Change(None, new + k, line, hunk_n))
- k += 1
- elif kind == ' ' and (
- (j != old_len and k != new_len) or (j == 0 or k == 0)
- ):
- changes.append(Change(old + j, new + k, line, hunk_n))
- j += 1
- k += 1
- elif kind == '-' or kind == '!':
- raise exceptions.ParseException(msg + kind, hunk_n)
- continue
- # both
- while len(old_hunk) > 0 and len(new_hunk) > 0:
- oc = context_change.match(old_hunk[0])
- nc = context_change.match(new_hunk[0])
- okind = None
- nkind = None
- if oc:
- okind = oc.group(1)
- oline = oc.group(2)
- if nc:
- nkind = nc.group(1)
- nline = nc.group(2)
- if not (oc or nc):
- del old_hunk[0]
- del new_hunk[0]
- elif okind == ' ' and nkind == ' ' and oline == nline:
- changes.append(Change(old + j, new + k, oline, hunk_n))
- j += 1
- k += 1
- del old_hunk[0]
- del new_hunk[0]
- elif okind == '-' or okind == '!' and (j != old_len or j == 0):
- changes.append(Change(old + j, None, oline, hunk_n))
- j += 1
- del old_hunk[0]
- elif nkind == '+' or nkind == '!' and (k != new_len or k == 0):
- changes.append(Change(None, new + k, nline, hunk_n))
- k += 1
- del new_hunk[0]
- else:
- return None
- if len(changes) > 0:
- return changes
- return None
- def parse_ed_diff(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- old = 0
- j = 0
- k = 0
- r = 0
- i = 0
- changes = list()
- hunks = split_by_regex(lines, ed_hunk_start)
- hunks.reverse()
- for hunk_n, hunk in enumerate(hunks):
- if not len(hunk):
- continue
- j = 0
- k = 0
- while len(hunk) > 0:
- o = ed_hunk_start.match(hunk[0])
- del hunk[0]
- if not o:
- continue
- old = int(o.group(1))
- old_end = int(o.group(2)) if len(o.group(2)) else old
- hunk_kind = o.group(3)
- if hunk_kind == 'd':
- k = 0
- while old_end >= old:
- changes.append(Change(old + k, None, None, hunk_n))
- r += 1
- k += 1
- old_end -= 1
- continue
- while len(hunk) > 0:
- e = ed_hunk_end.match(hunk[0])
- if not e and hunk_kind == 'c':
- k = 0
- while old_end >= old:
- changes.append(Change(old + k, None, None, hunk_n))
- r += 1
- k += 1
- old_end -= 1
- # I basically have no idea why this works
- # for these tests.
- changes.append(
- Change(
- None,
- old - r + i + k + j,
- hunk[0],
- hunk_n,
- )
- )
- i += 1
- j += 1
- if not e and hunk_kind == 'a':
- changes.append(
- Change(
- None,
- old - r + i + 1,
- hunk[0],
- hunk_n,
- )
- )
- i += 1
- del hunk[0]
- if len(changes) > 0:
- return changes
- return None
- def parse_rcs_ed_diff(text):
- # much like forward ed, but no 'c' type
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- old = 0
- j = 0
- size = 0
- total_change_size = 0
- changes = list()
- hunks = split_by_regex(lines, rcs_ed_hunk_start)
- for hunk_n, hunk in enumerate(hunks):
- if len(hunk):
- j = 0
- while len(hunk) > 0:
- o = rcs_ed_hunk_start.match(hunk[0])
- del hunk[0]
- if not o:
- continue
- hunk_kind = o.group(1)
- old = int(o.group(2))
- size = int(o.group(3))
- if hunk_kind == 'a':
- old += total_change_size + 1
- total_change_size += size
- while size > 0 and len(hunk) > 0:
- changes.append(Change(None, old + j, hunk[0], hunk_n))
- j += 1
- size -= 1
- del hunk[0]
- elif hunk_kind == 'd':
- total_change_size -= size
- while size > 0:
- changes.append(Change(old + j, None, None, hunk_n))
- j += 1
- size -= 1
- if len(changes) > 0:
- return changes
- return None
- def parse_git_binary_diff(text):
- try:
- lines = text.splitlines()
- except AttributeError:
- lines = text
- changes: list[Change] = list()
- old_version = None
- new_version = None
- cmd_old_path = None
- cmd_new_path = None
- # the sizes are used as latch-up
- new_size = 0
- old_size = 0
- old_encoded = ''
- new_encoded = ''
- for line in lines:
- if cmd_old_path is None and cmd_new_path is None:
- hm = git_diffcmd_header.match(line)
- if hm:
- cmd_old_path = hm.group(1)
- cmd_new_path = hm.group(2)
- continue
- if old_version is None and new_version is None:
- g = git_header_index.match(line)
- if g:
- old_version = g.group(1)
- new_version = g.group(2)
- continue
- # the first is added file
- if new_size == 0:
- literal = git_binary_literal_start.match(line)
- if literal:
- new_size = int(literal.group(1))
- continue
- delta = git_binary_delta_start.match(line)
- if delta:
- # not supported
- new_size = 0
- continue
- elif new_size > 0:
- if base85string.match(line):
- assert len(line) >= 6 and ((len(line) - 1) % 5) == 0
- new_encoded += line[1:]
- elif 0 == len(line):
- if new_encoded:
- decoded = base64.b85decode(new_encoded)
- added_data = zlib.decompress(decoded)
- assert new_size == len(added_data)
- change = Change(None, 0, added_data, None)
- changes.append(change)
- new_size = 0
- new_encoded = ''
- else:
- # Invalid line format
- new_size = 0
- new_encoded = ''
- # the second is removed file
- if old_size == 0:
- literal = git_binary_literal_start.match(line)
- if literal:
- old_size = int(literal.group(1))
- delta = git_binary_delta_start.match(line)
- if delta:
- # not supported
- old_size = 0
- continue
- elif old_size > 0:
- if base85string.match(line):
- assert len(line) >= 6 and ((len(line) - 1) % 5) == 0
- old_encoded += line[1:]
- elif 0 == len(line):
- if old_encoded:
- decoded = base64.b85decode(old_encoded)
- removed_data = zlib.decompress(decoded)
- assert old_size == len(removed_data)
- change = Change(0, None, None, removed_data)
- changes.append(change)
- old_size = 0
- old_encoded = ''
- else:
- # Invalid line format
- old_size = 0
- old_encoded = ''
- return changes
|