25 * |
25 * |
26 * @access private |
26 * @access private |
27 */ |
27 */ |
28 class Text_Diff_Engine_native { |
28 class Text_Diff_Engine_native { |
29 |
29 |
30 function diff($from_lines, $to_lines) |
30 function diff($from_lines, $to_lines) |
31 { |
31 { |
32 array_walk($from_lines, array('Text_Diff', 'trimNewlines')); |
32 array_walk($from_lines, array('Text_Diff', 'trimNewlines')); |
33 array_walk($to_lines, array('Text_Diff', 'trimNewlines')); |
33 array_walk($to_lines, array('Text_Diff', 'trimNewlines')); |
34 |
34 |
35 $n_from = count($from_lines); |
35 $n_from = count($from_lines); |
36 $n_to = count($to_lines); |
36 $n_to = count($to_lines); |
37 |
37 |
38 $this->xchanged = $this->ychanged = array(); |
38 $this->xchanged = $this->ychanged = array(); |
39 $this->xv = $this->yv = array(); |
39 $this->xv = $this->yv = array(); |
40 $this->xind = $this->yind = array(); |
40 $this->xind = $this->yind = array(); |
41 unset($this->seq); |
41 unset($this->seq); |
42 unset($this->in_seq); |
42 unset($this->in_seq); |
43 unset($this->lcs); |
43 unset($this->lcs); |
44 |
44 |
45 // Skip leading common lines. |
45 // Skip leading common lines. |
46 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) { |
46 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) { |
47 if ($from_lines[$skip] !== $to_lines[$skip]) { |
47 if ($from_lines[$skip] !== $to_lines[$skip]) { |
48 break; |
48 break; |
49 } |
49 } |
50 $this->xchanged[$skip] = $this->ychanged[$skip] = false; |
50 $this->xchanged[$skip] = $this->ychanged[$skip] = false; |
51 } |
51 } |
52 |
52 |
53 // Skip trailing common lines. |
53 // Skip trailing common lines. |
54 $xi = $n_from; $yi = $n_to; |
54 $xi = $n_from; $yi = $n_to; |
55 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) { |
55 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) { |
56 if ($from_lines[$xi] !== $to_lines[$yi]) { |
56 if ($from_lines[$xi] !== $to_lines[$yi]) { |
57 break; |
57 break; |
58 } |
58 } |
59 $this->xchanged[$xi] = $this->ychanged[$yi] = false; |
59 $this->xchanged[$xi] = $this->ychanged[$yi] = false; |
60 } |
60 } |
61 |
61 |
62 // Ignore lines which do not exist in both files. |
62 // Ignore lines which do not exist in both files. |
63 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { |
63 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { |
64 $xhash[$from_lines[$xi]] = 1; |
64 $xhash[$from_lines[$xi]] = 1; |
65 } |
65 } |
66 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) { |
66 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) { |
67 $line = $to_lines[$yi]; |
67 $line = $to_lines[$yi]; |
68 if (($this->ychanged[$yi] = empty($xhash[$line]))) { |
68 if (($this->ychanged[$yi] = empty($xhash[$line]))) { |
69 continue; |
69 continue; |
70 } |
70 } |
71 $yhash[$line] = 1; |
71 $yhash[$line] = 1; |
72 $this->yv[] = $line; |
72 $this->yv[] = $line; |
73 $this->yind[] = $yi; |
73 $this->yind[] = $yi; |
74 } |
74 } |
75 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { |
75 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) { |
76 $line = $from_lines[$xi]; |
76 $line = $from_lines[$xi]; |
77 if (($this->xchanged[$xi] = empty($yhash[$line]))) { |
77 if (($this->xchanged[$xi] = empty($yhash[$line]))) { |
78 continue; |
78 continue; |
79 } |
79 } |
80 $this->xv[] = $line; |
80 $this->xv[] = $line; |
81 $this->xind[] = $xi; |
81 $this->xind[] = $xi; |
82 } |
82 } |
83 |
83 |
84 // Find the LCS. |
84 // Find the LCS. |
85 $this->_compareseq(0, count($this->xv), 0, count($this->yv)); |
85 $this->_compareseq(0, count($this->xv), 0, count($this->yv)); |
86 |
86 |
87 // Merge edits when possible. |
87 // Merge edits when possible. |
88 $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged); |
88 $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged); |
89 $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged); |
89 $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged); |
90 |
90 |
91 // Compute the edit operations. |
91 // Compute the edit operations. |
92 $edits = array(); |
92 $edits = array(); |
93 $xi = $yi = 0; |
93 $xi = $yi = 0; |
94 while ($xi < $n_from || $yi < $n_to) { |
94 while ($xi < $n_from || $yi < $n_to) { |
95 assert($yi < $n_to || $this->xchanged[$xi]); |
95 assert($yi < $n_to || $this->xchanged[$xi]); |
96 assert($xi < $n_from || $this->ychanged[$yi]); |
96 assert($xi < $n_from || $this->ychanged[$yi]); |
97 |
97 |
98 // Skip matching "snake". |
98 // Skip matching "snake". |
99 $copy = array(); |
99 $copy = array(); |
100 while ($xi < $n_from && $yi < $n_to |
100 while ($xi < $n_from && $yi < $n_to |
101 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { |
101 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) { |
102 $copy[] = $from_lines[$xi++]; |
102 $copy[] = $from_lines[$xi++]; |
103 ++$yi; |
103 ++$yi; |
104 } |
104 } |
105 if ($copy) { |
105 if ($copy) { |
106 $edits[] = &new Text_Diff_Op_copy($copy); |
106 $edits[] = &new Text_Diff_Op_copy($copy); |
107 } |
107 } |
108 |
108 |
109 // Find deletes & adds. |
109 // Find deletes & adds. |
110 $delete = array(); |
110 $delete = array(); |
111 while ($xi < $n_from && $this->xchanged[$xi]) { |
111 while ($xi < $n_from && $this->xchanged[$xi]) { |
112 $delete[] = $from_lines[$xi++]; |
112 $delete[] = $from_lines[$xi++]; |
113 } |
113 } |
114 |
114 |
115 $add = array(); |
115 $add = array(); |
116 while ($yi < $n_to && $this->ychanged[$yi]) { |
116 while ($yi < $n_to && $this->ychanged[$yi]) { |
117 $add[] = $to_lines[$yi++]; |
117 $add[] = $to_lines[$yi++]; |
118 } |
118 } |
119 |
119 |
120 if ($delete && $add) { |
120 if ($delete && $add) { |
121 $edits[] = &new Text_Diff_Op_change($delete, $add); |
121 $edits[] = &new Text_Diff_Op_change($delete, $add); |
122 } elseif ($delete) { |
122 } elseif ($delete) { |
123 $edits[] = &new Text_Diff_Op_delete($delete); |
123 $edits[] = &new Text_Diff_Op_delete($delete); |
124 } elseif ($add) { |
124 } elseif ($add) { |
125 $edits[] = &new Text_Diff_Op_add($add); |
125 $edits[] = &new Text_Diff_Op_add($add); |
126 } |
126 } |
127 } |
127 } |
128 |
128 |
129 return $edits; |
129 return $edits; |
130 } |
130 } |
131 |
131 |
132 /** |
132 /** |
133 * Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, |
133 * Divides the Largest Common Subsequence (LCS) of the sequences (XOFF, |
134 * XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized |
134 * XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized |
135 * segments. |
135 * segments. |
136 * |
136 * |
137 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of |
137 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of |
138 * NCHUNKS+1 (X, Y) indexes giving the diving points between sub |
138 * NCHUNKS+1 (X, Y) indexes giving the diving points between sub |
139 * sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1), |
139 * sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1), |
140 * the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) == |
140 * the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) == |
141 * (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). |
141 * (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM). |
142 * |
142 * |
143 * This function assumes that the first lines of the specified portions of |
143 * This function assumes that the first lines of the specified portions of |
144 * the two files do not match, and likewise that the last lines do not |
144 * the two files do not match, and likewise that the last lines do not |
145 * match. The caller must trim matching lines from the beginning and end |
145 * match. The caller must trim matching lines from the beginning and end |
146 * of the portions it is going to specify. |
146 * of the portions it is going to specify. |
147 */ |
147 */ |
148 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) |
148 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks) |
149 { |
149 { |
150 $flip = false; |
150 $flip = false; |
151 |
151 |
152 if ($xlim - $xoff > $ylim - $yoff) { |
152 if ($xlim - $xoff > $ylim - $yoff) { |
153 /* Things seems faster (I'm not sure I understand why) when the |
153 /* Things seems faster (I'm not sure I understand why) when the |
154 * shortest sequence is in X. */ |
154 * shortest sequence is in X. */ |
155 $flip = true; |
155 $flip = true; |
156 list ($xoff, $xlim, $yoff, $ylim) |
156 list ($xoff, $xlim, $yoff, $ylim) |
157 = array($yoff, $ylim, $xoff, $xlim); |
157 = array($yoff, $ylim, $xoff, $xlim); |
158 } |
158 } |
159 |
159 |
160 if ($flip) { |
160 if ($flip) { |
161 for ($i = $ylim - 1; $i >= $yoff; $i--) { |
161 for ($i = $ylim - 1; $i >= $yoff; $i--) { |
162 $ymatches[$this->xv[$i]][] = $i; |
162 $ymatches[$this->xv[$i]][] = $i; |
163 } |
163 } |
164 } else { |
164 } else { |
165 for ($i = $ylim - 1; $i >= $yoff; $i--) { |
165 for ($i = $ylim - 1; $i >= $yoff; $i--) { |
166 $ymatches[$this->yv[$i]][] = $i; |
166 $ymatches[$this->yv[$i]][] = $i; |
167 } |
167 } |
168 } |
168 } |
169 |
169 |
170 $this->lcs = 0; |
170 $this->lcs = 0; |
171 $this->seq[0]= $yoff - 1; |
171 $this->seq[0]= $yoff - 1; |
172 $this->in_seq = array(); |
172 $this->in_seq = array(); |
173 $ymids[0] = array(); |
173 $ymids[0] = array(); |
174 |
174 |
175 $numer = $xlim - $xoff + $nchunks - 1; |
175 $numer = $xlim - $xoff + $nchunks - 1; |
176 $x = $xoff; |
176 $x = $xoff; |
177 for ($chunk = 0; $chunk < $nchunks; $chunk++) { |
177 for ($chunk = 0; $chunk < $nchunks; $chunk++) { |
178 if ($chunk > 0) { |
178 if ($chunk > 0) { |
179 for ($i = 0; $i <= $this->lcs; $i++) { |
179 for ($i = 0; $i <= $this->lcs; $i++) { |
180 $ymids[$i][$chunk - 1] = $this->seq[$i]; |
180 $ymids[$i][$chunk - 1] = $this->seq[$i]; |
181 } |
181 } |
182 } |
182 } |
183 |
183 |
184 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks); |
184 $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks); |
185 for (; $x < $x1; $x++) { |
185 for (; $x < $x1; $x++) { |
186 $line = $flip ? $this->yv[$x] : $this->xv[$x]; |
186 $line = $flip ? $this->yv[$x] : $this->xv[$x]; |
187 if (empty($ymatches[$line])) { |
187 if (empty($ymatches[$line])) { |
188 continue; |
188 continue; |
189 } |
189 } |
190 $matches = $ymatches[$line]; |
190 $matches = $ymatches[$line]; |
191 foreach ($matches as $y) { |
191 foreach ($matches as $y) { |
192 if (empty($this->in_seq[$y])) { |
192 if (empty($this->in_seq[$y])) { |
193 $k = $this->_lcsPos($y); |
193 $k = $this->_lcsPos($y); |
194 assert($k > 0); |
194 assert($k > 0); |
195 $ymids[$k] = $ymids[$k - 1]; |
195 $ymids[$k] = $ymids[$k - 1]; |
196 break; |
196 break; |
197 } |
197 } |
198 } |
198 } |
199 |
199 |
200 while (list($junk, $y) = each($matches)) { |
200 while (list($junk, $y) = each($matches)) { |
201 if ($y > $this->seq[$k - 1]) { |
201 if ($y > $this->seq[$k - 1]) { |
202 assert($y < $this->seq[$k]); |
202 assert($y < $this->seq[$k]); |
203 /* Optimization: this is a common case: next match is |
203 /* Optimization: this is a common case: next match is |
204 * just replacing previous match. */ |
204 * just replacing previous match. */ |
205 $this->in_seq[$this->seq[$k]] = false; |
205 $this->in_seq[$this->seq[$k]] = false; |
206 $this->seq[$k] = $y; |
206 $this->seq[$k] = $y; |
207 $this->in_seq[$y] = 1; |
207 $this->in_seq[$y] = 1; |
208 } elseif (empty($this->in_seq[$y])) { |
208 } elseif (empty($this->in_seq[$y])) { |
209 $k = $this->_lcsPos($y); |
209 $k = $this->_lcsPos($y); |
210 assert($k > 0); |
210 assert($k > 0); |
211 $ymids[$k] = $ymids[$k - 1]; |
211 $ymids[$k] = $ymids[$k - 1]; |
212 } |
212 } |
213 } |
213 } |
214 } |
214 } |
215 } |
215 } |
216 |
216 |
217 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff); |
217 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff); |
218 $ymid = $ymids[$this->lcs]; |
218 $ymid = $ymids[$this->lcs]; |
219 for ($n = 0; $n < $nchunks - 1; $n++) { |
219 for ($n = 0; $n < $nchunks - 1; $n++) { |
220 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks); |
220 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks); |
221 $y1 = $ymid[$n] + 1; |
221 $y1 = $ymid[$n] + 1; |
222 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1); |
222 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1); |
223 } |
223 } |
224 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim); |
224 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim); |
225 |
225 |
226 return array($this->lcs, $seps); |
226 return array($this->lcs, $seps); |
227 } |
227 } |
228 |
228 |
229 function _lcsPos($ypos) |
229 function _lcsPos($ypos) |
230 { |
230 { |
231 $end = $this->lcs; |
231 $end = $this->lcs; |
232 if ($end == 0 || $ypos > $this->seq[$end]) { |
232 if ($end == 0 || $ypos > $this->seq[$end]) { |
233 $this->seq[++$this->lcs] = $ypos; |
233 $this->seq[++$this->lcs] = $ypos; |
234 $this->in_seq[$ypos] = 1; |
234 $this->in_seq[$ypos] = 1; |
235 return $this->lcs; |
235 return $this->lcs; |
236 } |
236 } |
237 |
237 |
238 $beg = 1; |
238 $beg = 1; |
239 while ($beg < $end) { |
239 while ($beg < $end) { |
240 $mid = (int)(($beg + $end) / 2); |
240 $mid = (int)(($beg + $end) / 2); |
241 if ($ypos > $this->seq[$mid]) { |
241 if ($ypos > $this->seq[$mid]) { |
242 $beg = $mid + 1; |
242 $beg = $mid + 1; |
243 } else { |
243 } else { |
244 $end = $mid; |
244 $end = $mid; |
245 } |
245 } |
246 } |
246 } |
247 |
247 |
248 assert($ypos != $this->seq[$end]); |
248 assert($ypos != $this->seq[$end]); |
249 |
249 |
250 $this->in_seq[$this->seq[$end]] = false; |
250 $this->in_seq[$this->seq[$end]] = false; |
251 $this->seq[$end] = $ypos; |
251 $this->seq[$end] = $ypos; |
252 $this->in_seq[$ypos] = 1; |
252 $this->in_seq[$ypos] = 1; |
253 return $end; |
253 return $end; |
254 } |
254 } |
255 |
255 |
256 /** |
256 /** |
257 * Finds LCS of two sequences. |
257 * Finds LCS of two sequences. |
258 * |
258 * |
259 * The results are recorded in the vectors $this->{x,y}changed[], by |
259 * The results are recorded in the vectors $this->{x,y}changed[], by |
260 * storing a 1 in the element for each line that is an insertion or |
260 * storing a 1 in the element for each line that is an insertion or |
261 * deletion (ie. is not in the LCS). |
261 * deletion (ie. is not in the LCS). |
262 * |
262 * |
263 * The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1. |
263 * The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1. |
264 * |
264 * |
265 * Note that XLIM, YLIM are exclusive bounds. All line numbers are |
265 * Note that XLIM, YLIM are exclusive bounds. All line numbers are |
266 * origin-0 and discarded lines are not counted. |
266 * origin-0 and discarded lines are not counted. |
267 */ |
267 */ |
268 function _compareseq ($xoff, $xlim, $yoff, $ylim) |
268 function _compareseq ($xoff, $xlim, $yoff, $ylim) |
269 { |
269 { |
270 /* Slide down the bottom initial diagonal. */ |
270 /* Slide down the bottom initial diagonal. */ |
271 while ($xoff < $xlim && $yoff < $ylim |
271 while ($xoff < $xlim && $yoff < $ylim |
272 && $this->xv[$xoff] == $this->yv[$yoff]) { |
272 && $this->xv[$xoff] == $this->yv[$yoff]) { |
273 ++$xoff; |
273 ++$xoff; |
274 ++$yoff; |
274 ++$yoff; |
275 } |
275 } |
276 |
276 |
277 /* Slide up the top initial diagonal. */ |
277 /* Slide up the top initial diagonal. */ |
278 while ($xlim > $xoff && $ylim > $yoff |
278 while ($xlim > $xoff && $ylim > $yoff |
279 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) { |
279 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) { |
280 --$xlim; |
280 --$xlim; |
281 --$ylim; |
281 --$ylim; |
282 } |
282 } |
283 |
283 |
284 if ($xoff == $xlim || $yoff == $ylim) { |
284 if ($xoff == $xlim || $yoff == $ylim) { |
285 $lcs = 0; |
285 $lcs = 0; |
286 } else { |
286 } else { |
287 /* This is ad hoc but seems to work well. $nchunks = |
287 /* This is ad hoc but seems to work well. $nchunks = |
288 * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks = |
288 * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks = |
289 * max(2,min(8,(int)$nchunks)); */ |
289 * max(2,min(8,(int)$nchunks)); */ |
290 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1; |
290 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1; |
291 list($lcs, $seps) |
291 list($lcs, $seps) |
292 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks); |
292 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks); |
293 } |
293 } |
294 |
294 |
295 if ($lcs == 0) { |
295 if ($lcs == 0) { |
296 /* X and Y sequences have no common subsequence: mark all |
296 /* X and Y sequences have no common subsequence: mark all |
297 * changed. */ |
297 * changed. */ |
298 while ($yoff < $ylim) { |
298 while ($yoff < $ylim) { |
299 $this->ychanged[$this->yind[$yoff++]] = 1; |
299 $this->ychanged[$this->yind[$yoff++]] = 1; |
300 } |
300 } |
301 while ($xoff < $xlim) { |
301 while ($xoff < $xlim) { |
302 $this->xchanged[$this->xind[$xoff++]] = 1; |
302 $this->xchanged[$this->xind[$xoff++]] = 1; |
303 } |
303 } |
304 } else { |
304 } else { |
305 /* Use the partitions to split this problem into subproblems. */ |
305 /* Use the partitions to split this problem into subproblems. */ |
306 reset($seps); |
306 reset($seps); |
307 $pt1 = $seps[0]; |
307 $pt1 = $seps[0]; |
308 while ($pt2 = next($seps)) { |
308 while ($pt2 = next($seps)) { |
309 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]); |
309 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]); |
310 $pt1 = $pt2; |
310 $pt1 = $pt2; |
311 } |
311 } |
312 } |
312 } |
313 } |
313 } |
314 |
314 |
315 /** |
315 /** |
316 * Adjusts inserts/deletes of identical lines to join changes as much as |
316 * Adjusts inserts/deletes of identical lines to join changes as much as |
317 * possible. |
317 * possible. |
318 * |
318 * |
319 * We do something when a run of changed lines include a line at one end |
319 * We do something when a run of changed lines include a line at one end |
320 * and has an excluded, identical line at the other. We are free to |
320 * and has an excluded, identical line at the other. We are free to |
321 * choose which identical line is included. `compareseq' usually chooses |
321 * choose which identical line is included. `compareseq' usually chooses |
322 * the one at the beginning, but usually it is cleaner to consider the |
322 * the one at the beginning, but usually it is cleaner to consider the |
323 * following identical line to be the "change". |
323 * following identical line to be the "change". |
324 * |
324 * |
325 * This is extracted verbatim from analyze.c (GNU diffutils-2.7). |
325 * This is extracted verbatim from analyze.c (GNU diffutils-2.7). |
326 */ |
326 */ |
327 function _shiftBoundaries($lines, &$changed, $other_changed) |
327 function _shiftBoundaries($lines, &$changed, $other_changed) |
328 { |
328 { |
329 $i = 0; |
329 $i = 0; |
330 $j = 0; |
330 $j = 0; |
331 |
331 |
332 assert('count($lines) == count($changed)'); |
332 assert('count($lines) == count($changed)'); |
333 $len = count($lines); |
333 $len = count($lines); |
334 $other_len = count($other_changed); |
334 $other_len = count($other_changed); |
335 |
335 |
336 while (1) { |
336 while (1) { |
337 /* Scan forward to find the beginning of another run of |
337 /* Scan forward to find the beginning of another run of |
338 * changes. Also keep track of the corresponding point in the |
338 * changes. Also keep track of the corresponding point in the |
339 * other file. |
339 * other file. |
340 * |
340 * |
341 * Throughout this code, $i and $j are adjusted together so that |
341 * Throughout this code, $i and $j are adjusted together so that |
342 * the first $i elements of $changed and the first $j elements of |
342 * the first $i elements of $changed and the first $j elements of |
343 * $other_changed both contain the same number of zeros (unchanged |
343 * $other_changed both contain the same number of zeros (unchanged |
344 * lines). |
344 * lines). |
345 * |
345 * |
346 * Furthermore, $j is always kept so that $j == $other_len or |
346 * Furthermore, $j is always kept so that $j == $other_len or |
347 * $other_changed[$j] == false. */ |
347 * $other_changed[$j] == false. */ |
348 while ($j < $other_len && $other_changed[$j]) { |
348 while ($j < $other_len && $other_changed[$j]) { |
349 $j++; |
349 $j++; |
350 } |
350 } |
351 |
351 |
352 while ($i < $len && ! $changed[$i]) { |
352 while ($i < $len && ! $changed[$i]) { |
353 assert('$j < $other_len && ! $other_changed[$j]'); |
353 assert('$j < $other_len && ! $other_changed[$j]'); |
354 $i++; $j++; |
354 $i++; $j++; |
355 while ($j < $other_len && $other_changed[$j]) { |
355 while ($j < $other_len && $other_changed[$j]) { |
356 $j++; |
356 $j++; |
357 } |
357 } |
358 } |
358 } |
359 |
359 |
360 if ($i == $len) { |
360 if ($i == $len) { |
361 break; |
361 break; |
362 } |
362 } |
363 |
363 |
364 $start = $i; |
364 $start = $i; |
365 |
365 |
366 /* Find the end of this run of changes. */ |
366 /* Find the end of this run of changes. */ |
367 while (++$i < $len && $changed[$i]) { |
367 while (++$i < $len && $changed[$i]) { |
368 continue; |
368 continue; |
369 } |
369 } |
370 |
370 |
371 do { |
371 do { |
372 /* Record the length of this run of changes, so that we can |
372 /* Record the length of this run of changes, so that we can |
373 * later determine whether the run has grown. */ |
373 * later determine whether the run has grown. */ |
374 $runlength = $i - $start; |
374 $runlength = $i - $start; |
375 |
375 |
376 /* Move the changed region back, so long as the previous |
376 /* Move the changed region back, so long as the previous |
377 * unchanged line matches the last changed one. This merges |
377 * unchanged line matches the last changed one. This merges |
378 * with previous changed regions. */ |
378 * with previous changed regions. */ |
379 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) { |
379 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) { |
380 $changed[--$start] = 1; |
380 $changed[--$start] = 1; |
381 $changed[--$i] = false; |
381 $changed[--$i] = false; |
382 while ($start > 0 && $changed[$start - 1]) { |
382 while ($start > 0 && $changed[$start - 1]) { |
383 $start--; |
383 $start--; |
384 } |
384 } |
385 assert('$j > 0'); |
385 assert('$j > 0'); |
386 while ($other_changed[--$j]) { |
386 while ($other_changed[--$j]) { |
387 continue; |
387 continue; |
388 } |
388 } |
389 assert('$j >= 0 && !$other_changed[$j]'); |
389 assert('$j >= 0 && !$other_changed[$j]'); |
390 } |
390 } |
391 |
391 |
392 /* Set CORRESPONDING to the end of the changed run, at the |
392 /* Set CORRESPONDING to the end of the changed run, at the |
393 * last point where it corresponds to a changed run in the |
393 * last point where it corresponds to a changed run in the |
394 * other file. CORRESPONDING == LEN means no such point has |
394 * other file. CORRESPONDING == LEN means no such point has |
395 * been found. */ |
395 * been found. */ |
396 $corresponding = $j < $other_len ? $i : $len; |
396 $corresponding = $j < $other_len ? $i : $len; |
397 |
397 |
398 /* Move the changed region forward, so long as the first |
398 /* Move the changed region forward, so long as the first |
399 * changed line matches the following unchanged one. This |
399 * changed line matches the following unchanged one. This |
400 * merges with following changed regions. Do this second, so |
400 * merges with following changed regions. Do this second, so |
401 * that if there are no merges, the changed region is moved |
401 * that if there are no merges, the changed region is moved |
402 * forward as far as possible. */ |
402 * forward as far as possible. */ |
403 while ($i < $len && $lines[$start] == $lines[$i]) { |
403 while ($i < $len && $lines[$start] == $lines[$i]) { |
404 $changed[$start++] = false; |
404 $changed[$start++] = false; |
405 $changed[$i++] = 1; |
405 $changed[$i++] = 1; |
406 while ($i < $len && $changed[$i]) { |
406 while ($i < $len && $changed[$i]) { |
407 $i++; |
407 $i++; |
408 } |
408 } |
409 |
409 |
410 assert('$j < $other_len && ! $other_changed[$j]'); |
410 assert('$j < $other_len && ! $other_changed[$j]'); |
411 $j++; |
411 $j++; |
412 if ($j < $other_len && $other_changed[$j]) { |
412 if ($j < $other_len && $other_changed[$j]) { |
413 $corresponding = $i; |
413 $corresponding = $i; |
414 while ($j < $other_len && $other_changed[$j]) { |
414 while ($j < $other_len && $other_changed[$j]) { |
415 $j++; |
415 $j++; |
416 } |
416 } |
417 } |
417 } |
418 } |
418 } |
419 } while ($runlength != $i - $start); |
419 } while ($runlength != $i - $start); |
420 |
420 |
421 /* If possible, move the fully-merged run of changes back to a |
421 /* If possible, move the fully-merged run of changes back to a |
422 * corresponding run in the other file. */ |
422 * corresponding run in the other file. */ |
423 while ($corresponding < $i) { |
423 while ($corresponding < $i) { |
424 $changed[--$start] = 1; |
424 $changed[--$start] = 1; |
425 $changed[--$i] = 0; |
425 $changed[--$i] = 0; |
426 assert('$j > 0'); |
426 assert('$j > 0'); |
427 while ($other_changed[--$j]) { |
427 while ($other_changed[--$j]) { |
428 continue; |
428 continue; |
429 } |
429 } |
430 assert('$j >= 0 && !$other_changed[$j]'); |
430 assert('$j >= 0 && !$other_changed[$j]'); |
431 } |
431 } |
432 } |
432 } |
433 } |
433 } |
434 |
434 |
435 } |
435 } |