patch.py 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015
  1. # -*- coding: utf-8 -*-
  2. import base64
  3. import re
  4. import zlib
  5. from collections import namedtuple
  6. from . import exceptions
  7. from .snippets import findall_regex, split_by_regex
  8. header = namedtuple(
  9. "header",
  10. "index_path old_path old_version new_path new_version",
  11. )
  12. diffobj = namedtuple("diffobj", "header changes text")
  13. Change = namedtuple("Change", "old new line hunk")
  14. file_timestamp_str = "(.+?)(?:\t|:| +)(.*)"
  15. # .+? was previously [^:\t\n\r\f\v]+
  16. # general diff regex
  17. diffcmd_header = re.compile("^diff.* (.+) (.+)$")
  18. unified_header_index = re.compile("^Index: (.+)$")
  19. unified_header_old_line = re.compile(r"^--- " + file_timestamp_str + "$")
  20. unified_header_new_line = re.compile(r"^\+\+\+ " + file_timestamp_str + "$")
  21. unified_hunk_start = re.compile(r"^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@(.*)$")
  22. unified_change = re.compile("^([-+ ])(.*)$")
  23. context_header_old_line = re.compile(r"^\*\*\* " + file_timestamp_str + "$")
  24. context_header_new_line = re.compile("^--- " + file_timestamp_str + "$")
  25. context_hunk_start = re.compile(r"^\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*$")
  26. context_hunk_old = re.compile(r"^\*\*\* (\d+),?(\d*) \*\*\*\*$")
  27. context_hunk_new = re.compile(r"^--- (\d+),?(\d*) ----$")
  28. context_change = re.compile("^([-+ !]) (.*)$")
  29. ed_hunk_start = re.compile(r"^(\d+),?(\d*)([acd])$")
  30. ed_hunk_end = re.compile("^.$")
  31. # much like forward ed, but no 'c' type
  32. rcs_ed_hunk_start = re.compile(r"^([ad])(\d+) ?(\d*)$")
  33. default_hunk_start = re.compile(r"^(\d+),?(\d*)([acd])(\d+),?(\d*)$")
  34. default_hunk_mid = re.compile("^---$")
  35. default_change = re.compile("^([><]) (.*)$")
  36. # Headers
  37. # git has a special index header and no end part
  38. git_diffcmd_header = re.compile("^diff --git a/(.+) b/(.+)$")
  39. git_header_index = re.compile(r"^index ([a-f0-9]+)..([a-f0-9]+) ?(\d*)$")
  40. git_header_old_line = re.compile("^--- (.+)$")
  41. git_header_new_line = re.compile(r"^\+\+\+ (.+)$")
  42. git_header_file_mode = re.compile(r"^(new|deleted) file mode \d{6}$")
  43. git_header_binary_file = re.compile("^Binary files (.+) and (.+) differ")
  44. git_binary_patch_start = re.compile(r"^GIT binary patch$")
  45. git_binary_literal_start = re.compile(r"^literal (\d+)$")
  46. git_binary_delta_start = re.compile(r"^delta (\d+)$")
  47. base85string = re.compile(r"^[0-9A-Za-z!#$%&()*+;<=>?@^_`{|}~-]+$")
  48. bzr_header_index = re.compile("=== (.+)")
  49. bzr_header_old_line = unified_header_old_line
  50. bzr_header_new_line = unified_header_new_line
  51. svn_header_index = unified_header_index
  52. svn_header_timestamp_version = re.compile(r"\((?:working copy|revision (\d+))\)")
  53. svn_header_timestamp = re.compile(r".*(\(.*\))$")
  54. cvs_header_index = unified_header_index
  55. cvs_header_rcs = re.compile(r"^RCS file: (.+)(?:,\w{1}$|$)")
  56. cvs_header_timestamp = re.compile(r"(.+)\t([\d.]+)")
  57. cvs_header_timestamp_colon = re.compile(r":([\d.]+)\t(.+)")
  58. old_cvs_diffcmd_header = re.compile("^diff.* (.+):(.*) (.+):(.*)$")
  59. def parse_patch(text):
  60. try:
  61. lines = text.splitlines()
  62. except AttributeError:
  63. lines = text
  64. # maybe use this to nuke all of those line endings?
  65. # lines = [x.splitlines()[0] for x in lines]
  66. lines = [x if len(x) == 0 else x.splitlines()[0] for x in lines]
  67. check = [
  68. unified_header_index,
  69. diffcmd_header,
  70. cvs_header_rcs,
  71. git_header_index,
  72. context_header_old_line,
  73. unified_header_old_line,
  74. ]
  75. diffs = []
  76. for c in check:
  77. diffs = split_by_regex(lines, c)
  78. if len(diffs) > 1:
  79. break
  80. for diff in diffs:
  81. difftext = "\n".join(diff) + "\n"
  82. h = parse_header(diff)
  83. d = parse_diff(diff)
  84. if h or d:
  85. yield diffobj(header=h, changes=d, text=difftext)
  86. def parse_header(text):
  87. h = parse_scm_header(text)
  88. if h is None:
  89. h = parse_diff_header(text)
  90. return h
  91. def parse_scm_header(text):
  92. try:
  93. lines = text.splitlines()
  94. except AttributeError:
  95. lines = text
  96. check = [
  97. (git_header_index, parse_git_header),
  98. (old_cvs_diffcmd_header, parse_cvs_header),
  99. (cvs_header_rcs, parse_cvs_header),
  100. (svn_header_index, parse_svn_header),
  101. ]
  102. for regex, parser in check:
  103. diffs = findall_regex(lines, regex)
  104. if len(diffs) > 0:
  105. git_opt = findall_regex(lines, git_diffcmd_header)
  106. if len(git_opt) > 0:
  107. res = parser(lines)
  108. if res:
  109. old_path = res.old_path
  110. new_path = res.new_path
  111. if old_path.startswith("a/"):
  112. old_path = old_path[2:]
  113. if new_path.startswith("b/"):
  114. new_path = new_path[2:]
  115. return header(
  116. index_path=res.index_path,
  117. old_path=old_path,
  118. old_version=res.old_version,
  119. new_path=new_path,
  120. new_version=res.new_version,
  121. )
  122. else:
  123. res = parser(lines)
  124. return res
  125. return None
  126. def parse_diff_header(text):
  127. try:
  128. lines = text.splitlines()
  129. except AttributeError:
  130. lines = text
  131. check = [
  132. (unified_header_new_line, parse_unified_header),
  133. (context_header_old_line, parse_context_header),
  134. (diffcmd_header, parse_diffcmd_header),
  135. # TODO:
  136. # git_header can handle version-less unified headers, but
  137. # will trim a/ and b/ in the paths if they exist...
  138. (git_header_new_line, parse_git_header),
  139. ]
  140. for regex, parser in check:
  141. diffs = findall_regex(lines, regex)
  142. if len(diffs) > 0:
  143. return parser(lines)
  144. return None # no header?
  145. def parse_diff(text):
  146. try:
  147. lines = text.splitlines()
  148. except AttributeError:
  149. lines = text
  150. check = [
  151. (unified_hunk_start, parse_unified_diff),
  152. (context_hunk_start, parse_context_diff),
  153. (default_hunk_start, parse_default_diff),
  154. (ed_hunk_start, parse_ed_diff),
  155. (rcs_ed_hunk_start, parse_rcs_ed_diff),
  156. (git_binary_patch_start, parse_git_binary_diff),
  157. ]
  158. for hunk, parser in check:
  159. diffs = findall_regex(lines, hunk)
  160. if len(diffs) > 0:
  161. return parser(lines)
  162. return None
  163. def parse_git_header(text):
  164. try:
  165. lines = text.splitlines()
  166. except AttributeError:
  167. lines = text
  168. old_version = None
  169. new_version = None
  170. old_path = None
  171. new_path = None
  172. cmd_old_path = None
  173. cmd_new_path = None
  174. for line in lines:
  175. hm = git_diffcmd_header.match(line)
  176. if hm:
  177. cmd_old_path = hm.group(1)
  178. cmd_new_path = hm.group(2)
  179. continue
  180. g = git_header_index.match(line)
  181. if g:
  182. old_version = g.group(1)
  183. new_version = g.group(2)
  184. continue
  185. # git always has it's own special headers
  186. o = git_header_old_line.match(line)
  187. if o:
  188. old_path = o.group(1)
  189. n = git_header_new_line.match(line)
  190. if n:
  191. new_path = n.group(1)
  192. binary = git_header_binary_file.match(line)
  193. if binary:
  194. old_path = binary.group(1)
  195. new_path = binary.group(2)
  196. if old_path and new_path:
  197. if old_path.startswith("a/"):
  198. old_path = old_path[2:]
  199. if new_path.startswith("b/"):
  200. new_path = new_path[2:]
  201. return header(
  202. index_path=None,
  203. old_path=old_path,
  204. old_version=old_version,
  205. new_path=new_path,
  206. new_version=new_version,
  207. )
  208. # if we go through all of the text without finding our normal info,
  209. # use the cmd if available
  210. if cmd_old_path and cmd_new_path and old_version and new_version:
  211. if cmd_old_path.startswith("a/"):
  212. cmd_old_path = cmd_old_path[2:]
  213. if cmd_new_path.startswith("b/"):
  214. cmd_new_path = cmd_new_path[2:]
  215. return header(
  216. index_path=None,
  217. # wow, I kind of hate this:
  218. # assume /dev/null if the versions are zeroed out
  219. old_path="/dev/null" if old_version == "0000000" else cmd_old_path,
  220. old_version=old_version,
  221. new_path="/dev/null" if new_version == "0000000" else cmd_new_path,
  222. new_version=new_version,
  223. )
  224. return None
  225. def parse_svn_header(text):
  226. try:
  227. lines = text.splitlines()
  228. except AttributeError:
  229. lines = text
  230. headers = findall_regex(lines, svn_header_index)
  231. if len(headers) == 0:
  232. return None
  233. while len(lines) > 0:
  234. i = svn_header_index.match(lines[0])
  235. del lines[0]
  236. if not i:
  237. continue
  238. diff_header = parse_diff_header(lines)
  239. if not diff_header:
  240. return header(
  241. index_path=i.group(1),
  242. old_path=i.group(1),
  243. old_version=None,
  244. new_path=i.group(1),
  245. new_version=None,
  246. )
  247. opath = diff_header.old_path
  248. over = diff_header.old_version
  249. if over:
  250. oend = svn_header_timestamp_version.match(over)
  251. if oend and oend.group(1):
  252. over = int(oend.group(1))
  253. elif opath:
  254. ts = svn_header_timestamp.match(opath)
  255. if ts:
  256. opath = opath[: -len(ts.group(1))]
  257. oend = svn_header_timestamp_version.match(ts.group(1))
  258. if oend and oend.group(1):
  259. over = int(oend.group(1))
  260. npath = diff_header.new_path
  261. nver = diff_header.new_version
  262. if nver:
  263. nend = svn_header_timestamp_version.match(diff_header.new_version)
  264. if nend and nend.group(1):
  265. nver = int(nend.group(1))
  266. elif npath:
  267. ts = svn_header_timestamp.match(npath)
  268. if ts:
  269. npath = npath[: -len(ts.group(1))]
  270. nend = svn_header_timestamp_version.match(ts.group(1))
  271. if nend and nend.group(1):
  272. nver = int(nend.group(1))
  273. if not isinstance(over, int):
  274. over = None
  275. if not isinstance(nver, int):
  276. nver = None
  277. return header(
  278. index_path=i.group(1),
  279. old_path=opath,
  280. old_version=over,
  281. new_path=npath,
  282. new_version=nver,
  283. )
  284. return None
  285. def parse_cvs_header(text):
  286. try:
  287. lines = text.splitlines()
  288. except AttributeError:
  289. lines = text
  290. headers = findall_regex(lines, cvs_header_rcs)
  291. headers_old = findall_regex(lines, old_cvs_diffcmd_header)
  292. if headers:
  293. # parse rcs style headers
  294. while len(lines) > 0:
  295. i = cvs_header_index.match(lines[0])
  296. del lines[0]
  297. if not i:
  298. continue
  299. diff_header = parse_diff_header(lines)
  300. if diff_header:
  301. over = diff_header.old_version
  302. if over:
  303. oend = cvs_header_timestamp.match(over)
  304. oend_c = cvs_header_timestamp_colon.match(over)
  305. if oend:
  306. over = oend.group(2)
  307. elif oend_c:
  308. over = oend_c.group(1)
  309. nver = diff_header.new_version
  310. if nver:
  311. nend = cvs_header_timestamp.match(nver)
  312. nend_c = cvs_header_timestamp_colon.match(nver)
  313. if nend:
  314. nver = nend.group(2)
  315. elif nend_c:
  316. nver = nend_c.group(1)
  317. return header(
  318. index_path=i.group(1),
  319. old_path=diff_header.old_path,
  320. old_version=over,
  321. new_path=diff_header.new_path,
  322. new_version=nver,
  323. )
  324. return header(
  325. index_path=i.group(1),
  326. old_path=i.group(1),
  327. old_version=None,
  328. new_path=i.group(1),
  329. new_version=None,
  330. )
  331. elif headers_old:
  332. # parse old style headers
  333. while len(lines) > 0:
  334. i = cvs_header_index.match(lines[0])
  335. del lines[0]
  336. if not i:
  337. continue
  338. d = old_cvs_diffcmd_header.match(lines[0])
  339. if not d:
  340. return header(
  341. index_path=i.group(1),
  342. old_path=i.group(1),
  343. old_version=None,
  344. new_path=i.group(1),
  345. new_version=None,
  346. )
  347. # will get rid of the useless stuff for us
  348. parse_diff_header(lines)
  349. over = d.group(2) if d.group(2) else None
  350. nver = d.group(4) if d.group(4) else None
  351. return header(
  352. index_path=i.group(1),
  353. old_path=d.group(1),
  354. old_version=over,
  355. new_path=d.group(3),
  356. new_version=nver,
  357. )
  358. return None
  359. def parse_diffcmd_header(text):
  360. try:
  361. lines = text.splitlines()
  362. except AttributeError:
  363. lines = text
  364. headers = findall_regex(lines, diffcmd_header)
  365. if len(headers) == 0:
  366. return None
  367. while len(lines) > 0:
  368. d = diffcmd_header.match(lines[0])
  369. del lines[0]
  370. if d:
  371. return header(
  372. index_path=None,
  373. old_path=d.group(1),
  374. old_version=None,
  375. new_path=d.group(2),
  376. new_version=None,
  377. )
  378. return None
  379. def parse_unified_header(text):
  380. try:
  381. lines = text.splitlines()
  382. except AttributeError:
  383. lines = text
  384. headers = findall_regex(lines, unified_header_new_line)
  385. if len(headers) == 0:
  386. return None
  387. while len(lines) > 1:
  388. o = unified_header_old_line.match(lines[0])
  389. del lines[0]
  390. if o:
  391. n = unified_header_new_line.match(lines[0])
  392. del lines[0]
  393. if n:
  394. over = o.group(2)
  395. if len(over) == 0:
  396. over = None
  397. nver = n.group(2)
  398. if len(nver) == 0:
  399. nver = None
  400. return header(
  401. index_path=None,
  402. old_path=o.group(1),
  403. old_version=over,
  404. new_path=n.group(1),
  405. new_version=nver,
  406. )
  407. return None
  408. def parse_context_header(text):
  409. try:
  410. lines = text.splitlines()
  411. except AttributeError:
  412. lines = text
  413. headers = findall_regex(lines, context_header_old_line)
  414. if len(headers) == 0:
  415. return None
  416. while len(lines) > 1:
  417. o = context_header_old_line.match(lines[0])
  418. del lines[0]
  419. if o:
  420. n = context_header_new_line.match(lines[0])
  421. del lines[0]
  422. if n:
  423. over = o.group(2)
  424. if len(over) == 0:
  425. over = None
  426. nver = n.group(2)
  427. if len(nver) == 0:
  428. nver = None
  429. return header(
  430. index_path=None,
  431. old_path=o.group(1),
  432. old_version=over,
  433. new_path=n.group(1),
  434. new_version=nver,
  435. )
  436. return None
  437. def parse_default_diff(text):
  438. try:
  439. lines = text.splitlines()
  440. except AttributeError:
  441. lines = text
  442. old = 0
  443. new = 0
  444. old_len = 0
  445. new_len = 0
  446. r = 0
  447. i = 0
  448. changes = list()
  449. hunks = split_by_regex(lines, default_hunk_start)
  450. for hunk_n, hunk in enumerate(hunks):
  451. if not len(hunk):
  452. continue
  453. r = 0
  454. i = 0
  455. while len(hunk) > 0:
  456. h = default_hunk_start.match(hunk[0])
  457. c = default_change.match(hunk[0])
  458. del hunk[0]
  459. if h:
  460. old = int(h.group(1))
  461. if len(h.group(2)) > 0:
  462. old_len = int(h.group(2)) - old + 1
  463. else:
  464. old_len = 0
  465. new = int(h.group(4))
  466. if len(h.group(5)) > 0:
  467. new_len = int(h.group(5)) - new + 1
  468. else:
  469. new_len = 0
  470. elif c:
  471. kind = c.group(1)
  472. line = c.group(2)
  473. if kind == "<" and (r != old_len or r == 0):
  474. changes.append(Change(old + r, None, line, hunk_n))
  475. r += 1
  476. elif kind == ">" and (i != new_len or i == 0):
  477. changes.append(Change(None, new + i, line, hunk_n))
  478. i += 1
  479. if len(changes) > 0:
  480. return changes
  481. return None
  482. def parse_unified_diff(text):
  483. try:
  484. lines = text.splitlines()
  485. except AttributeError:
  486. lines = text
  487. old = 0
  488. new = 0
  489. r = 0
  490. i = 0
  491. old_len = 0
  492. new_len = 0
  493. changes = list()
  494. hunks = split_by_regex(lines, unified_hunk_start)
  495. for hunk_n, hunk in enumerate(hunks):
  496. # reset counters
  497. r = 0
  498. i = 0
  499. while len(hunk) > 0:
  500. h = unified_hunk_start.match(hunk[0])
  501. del hunk[0]
  502. if h:
  503. old = int(h.group(1))
  504. if len(h.group(2)) > 0:
  505. old_len = int(h.group(2))
  506. else:
  507. old_len = 0
  508. new = int(h.group(3))
  509. if len(h.group(4)) > 0:
  510. new_len = int(h.group(4))
  511. else:
  512. new_len = 0
  513. h = None
  514. break
  515. for n in hunk:
  516. c = unified_change.match(n)
  517. if c:
  518. kind = c.group(1)
  519. line = c.group(2)
  520. if kind == "-" and (r != old_len or r == 0):
  521. changes.append(Change(old + r, None, line, hunk_n))
  522. r += 1
  523. elif kind == "+" and (i != new_len or i == 0):
  524. changes.append(Change(None, new + i, line, hunk_n))
  525. i += 1
  526. elif kind == " ":
  527. if r != old_len and i != new_len:
  528. changes.append(Change(old + r, new + i, line, hunk_n))
  529. r += 1
  530. i += 1
  531. if len(changes) > 0:
  532. return changes
  533. return None
  534. def parse_context_diff(text):
  535. try:
  536. lines = text.splitlines()
  537. except AttributeError:
  538. lines = text
  539. old = 0
  540. new = 0
  541. j = 0
  542. k = 0
  543. changes = list()
  544. hunks = split_by_regex(lines, context_hunk_start)
  545. for hunk_n, hunk in enumerate(hunks):
  546. if not len(hunk):
  547. continue
  548. j = 0
  549. k = 0
  550. parts = split_by_regex(hunk, context_hunk_new)
  551. if len(parts) != 2:
  552. raise exceptions.ParseException("Context diff invalid", hunk_n)
  553. old_hunk = parts[0]
  554. new_hunk = parts[1]
  555. while len(old_hunk) > 0:
  556. o = context_hunk_old.match(old_hunk[0])
  557. del old_hunk[0]
  558. if not o:
  559. continue
  560. old = int(o.group(1))
  561. old_len = int(o.group(2)) + 1 - old
  562. while len(new_hunk) > 0:
  563. n = context_hunk_new.match(new_hunk[0])
  564. del new_hunk[0]
  565. if not n:
  566. continue
  567. new = int(n.group(1))
  568. new_len = int(n.group(2)) + 1 - new
  569. break
  570. break
  571. # now have old and new set, can start processing?
  572. if len(old_hunk) > 0 and len(new_hunk) == 0:
  573. msg = "Got unexpected change in removal hunk: "
  574. # only removes left?
  575. while len(old_hunk) > 0:
  576. c = context_change.match(old_hunk[0])
  577. del old_hunk[0]
  578. if not c:
  579. continue
  580. kind = c.group(1)
  581. line = c.group(2)
  582. if kind == "-" and (j != old_len or j == 0):
  583. changes.append(Change(old + j, None, line, hunk_n))
  584. j += 1
  585. elif kind == " " and (
  586. (j != old_len and k != new_len) or (j == 0 or k == 0)
  587. ):
  588. changes.append(Change(old + j, new + k, line, hunk_n))
  589. j += 1
  590. k += 1
  591. elif kind == "+" or kind == "!":
  592. raise exceptions.ParseException(msg + kind, hunk_n)
  593. continue
  594. if len(old_hunk) == 0 and len(new_hunk) > 0:
  595. msg = "Got unexpected change in removal hunk: "
  596. # only insertions left?
  597. while len(new_hunk) > 0:
  598. c = context_change.match(new_hunk[0])
  599. del new_hunk[0]
  600. if not c:
  601. continue
  602. kind = c.group(1)
  603. line = c.group(2)
  604. if kind == "+" and (k != new_len or k == 0):
  605. changes.append(Change(None, new + k, line, hunk_n))
  606. k += 1
  607. elif kind == " " and (
  608. (j != old_len and k != new_len) or (j == 0 or k == 0)
  609. ):
  610. changes.append(Change(old + j, new + k, line, hunk_n))
  611. j += 1
  612. k += 1
  613. elif kind == "-" or kind == "!":
  614. raise exceptions.ParseException(msg + kind, hunk_n)
  615. continue
  616. # both
  617. while len(old_hunk) > 0 and len(new_hunk) > 0:
  618. oc = context_change.match(old_hunk[0])
  619. nc = context_change.match(new_hunk[0])
  620. okind = None
  621. nkind = None
  622. if oc:
  623. okind = oc.group(1)
  624. oline = oc.group(2)
  625. if nc:
  626. nkind = nc.group(1)
  627. nline = nc.group(2)
  628. if not (oc or nc):
  629. del old_hunk[0]
  630. del new_hunk[0]
  631. elif okind == " " and nkind == " " and oline == nline:
  632. changes.append(Change(old + j, new + k, oline, hunk_n))
  633. j += 1
  634. k += 1
  635. del old_hunk[0]
  636. del new_hunk[0]
  637. elif okind == "-" or okind == "!" and (j != old_len or j == 0):
  638. changes.append(Change(old + j, None, oline, hunk_n))
  639. j += 1
  640. del old_hunk[0]
  641. elif nkind == "+" or nkind == "!" and (k != new_len or k == 0):
  642. changes.append(Change(None, new + k, nline, hunk_n))
  643. k += 1
  644. del new_hunk[0]
  645. else:
  646. return None
  647. if len(changes) > 0:
  648. return changes
  649. return None
  650. def parse_ed_diff(text):
  651. try:
  652. lines = text.splitlines()
  653. except AttributeError:
  654. lines = text
  655. old = 0
  656. j = 0
  657. k = 0
  658. r = 0
  659. i = 0
  660. changes = list()
  661. hunks = split_by_regex(lines, ed_hunk_start)
  662. hunks.reverse()
  663. for hunk_n, hunk in enumerate(hunks):
  664. if not len(hunk):
  665. continue
  666. j = 0
  667. k = 0
  668. while len(hunk) > 0:
  669. o = ed_hunk_start.match(hunk[0])
  670. del hunk[0]
  671. if not o:
  672. continue
  673. old = int(o.group(1))
  674. old_end = int(o.group(2)) if len(o.group(2)) else old
  675. hunk_kind = o.group(3)
  676. if hunk_kind == "d":
  677. k = 0
  678. while old_end >= old:
  679. changes.append(Change(old + k, None, None, hunk_n))
  680. r += 1
  681. k += 1
  682. old_end -= 1
  683. continue
  684. while len(hunk) > 0:
  685. e = ed_hunk_end.match(hunk[0])
  686. if not e and hunk_kind == "c":
  687. k = 0
  688. while old_end >= old:
  689. changes.append(Change(old + k, None, None, hunk_n))
  690. r += 1
  691. k += 1
  692. old_end -= 1
  693. # I basically have no idea why this works
  694. # for these tests.
  695. changes.append(
  696. Change(
  697. None,
  698. old - r + i + k + j,
  699. hunk[0],
  700. hunk_n,
  701. )
  702. )
  703. i += 1
  704. j += 1
  705. if not e and hunk_kind == "a":
  706. changes.append(
  707. Change(
  708. None,
  709. old - r + i + 1,
  710. hunk[0],
  711. hunk_n,
  712. )
  713. )
  714. i += 1
  715. del hunk[0]
  716. if len(changes) > 0:
  717. return changes
  718. return None
  719. def parse_rcs_ed_diff(text):
  720. # much like forward ed, but no 'c' type
  721. try:
  722. lines = text.splitlines()
  723. except AttributeError:
  724. lines = text
  725. old = 0
  726. j = 0
  727. size = 0
  728. total_change_size = 0
  729. changes = list()
  730. hunks = split_by_regex(lines, rcs_ed_hunk_start)
  731. for hunk_n, hunk in enumerate(hunks):
  732. if len(hunk):
  733. j = 0
  734. while len(hunk) > 0:
  735. o = rcs_ed_hunk_start.match(hunk[0])
  736. del hunk[0]
  737. if not o:
  738. continue
  739. hunk_kind = o.group(1)
  740. old = int(o.group(2))
  741. size = int(o.group(3))
  742. if hunk_kind == "a":
  743. old += total_change_size + 1
  744. total_change_size += size
  745. while size > 0 and len(hunk) > 0:
  746. changes.append(Change(None, old + j, hunk[0], hunk_n))
  747. j += 1
  748. size -= 1
  749. del hunk[0]
  750. elif hunk_kind == "d":
  751. total_change_size -= size
  752. while size > 0:
  753. changes.append(Change(old + j, None, None, hunk_n))
  754. j += 1
  755. size -= 1
  756. if len(changes) > 0:
  757. return changes
  758. return None
  759. def parse_git_binary_diff(text):
  760. try:
  761. lines = text.splitlines()
  762. except AttributeError:
  763. lines = text
  764. changes: list[Change] = list()
  765. old_version = None
  766. new_version = None
  767. cmd_old_path = None
  768. cmd_new_path = None
  769. # the sizes are used as latch-up
  770. new_size = 0
  771. old_size = 0
  772. old_encoded = ""
  773. new_encoded = ""
  774. for line in lines:
  775. if cmd_old_path is None and cmd_new_path is None:
  776. hm = git_diffcmd_header.match(line)
  777. if hm:
  778. cmd_old_path = hm.group(1)
  779. cmd_new_path = hm.group(2)
  780. continue
  781. if old_version is None and new_version is None:
  782. g = git_header_index.match(line)
  783. if g:
  784. old_version = g.group(1)
  785. new_version = g.group(2)
  786. continue
  787. # the first is added file
  788. if new_size == 0:
  789. literal = git_binary_literal_start.match(line)
  790. if literal:
  791. new_size = int(literal.group(1))
  792. continue
  793. delta = git_binary_delta_start.match(line)
  794. if delta:
  795. # not supported
  796. new_size = 0
  797. continue
  798. elif new_size > 0:
  799. if base85string.match(line):
  800. assert len(line) >= 6 and ((len(line) - 1) % 5) == 0
  801. new_encoded += line[1:]
  802. elif 0 == len(line):
  803. if new_encoded:
  804. decoded = base64.b85decode(new_encoded)
  805. added_data = zlib.decompress(decoded)
  806. assert new_size == len(added_data)
  807. change = Change(None, 0, added_data, None)
  808. changes.append(change)
  809. new_size = 0
  810. new_encoded = ""
  811. else:
  812. # Invalid line format
  813. new_size = 0
  814. new_encoded = ""
  815. # the second is removed file
  816. if old_size == 0:
  817. literal = git_binary_literal_start.match(line)
  818. if literal:
  819. old_size = int(literal.group(1))
  820. delta = git_binary_delta_start.match(line)
  821. if delta:
  822. # not supported
  823. old_size = 0
  824. continue
  825. elif old_size > 0:
  826. if base85string.match(line):
  827. assert len(line) >= 6 and ((len(line) - 1) % 5) == 0
  828. old_encoded += line[1:]
  829. elif 0 == len(line):
  830. if old_encoded:
  831. decoded = base64.b85decode(old_encoded)
  832. removed_data = zlib.decompress(decoded)
  833. assert old_size == len(removed_data)
  834. change = Change(0, None, None, removed_data)
  835. changes.append(change)
  836. old_size = 0
  837. old_encoded = ""
  838. else:
  839. # Invalid line format
  840. old_size = 0
  841. old_encoded = ""
  842. return changes