No regular expressions were active.
1 |
|
// Diff_Ma
tch_Patch
v1.3
|
|
|
2 |
|
// Compute
s the diff
erence bet
ween two t
exts to cr
eate a pat
ch.
|
|
|
3 |
|
// Applies
the patch
onto anot
her text,
allowing f
or errors.
|
|
|
4 |
|
// Copyrig
ht (C) 200
6 Neil Fra
ser
|
|
|
5 |
|
// http://
neil.frase
r.name/sof
tware/diff
_match_pat
ch/
|
|
|
6 |
|
|
|
|
7 |
|
// This pr
ogram is f
ree softwa
re; you ca
n redistri
bute it an
d/or
|
|
|
8 |
|
// modify
it under t
he terms o
f the GNU
General Pu
blic Licen
se
|
|
|
9 |
|
// as publ
ished by t
he Free So
ftware Fou
ndation.
|
|
|
10 |
|
|
|
|
11 |
|
// This pr
ogram is d
istributed
in the ho
pe that it
will be u
seful,
|
|
|
12 |
|
// but WIT
HOUT ANY W
ARRANTY; w
ithout eve
n the impl
ied warran
ty of
|
|
|
13 |
|
// MERCHAN
TABILITY o
r FITNESS
FOR A PART
ICULAR PUR
POSE. See
the
|
|
|
14 |
|
// GNU Gen
eral Publi
c License
(www.gnu.o
rg) for mo
re details
.
|
|
|
15 |
|
|
|
|
16 |
|
|
|
|
17 |
|
// Constan
ts.
|
|
|
18 |
|
// Redefin
e these in
your prog
ram to ove
rride the
defaults.
|
|
|
19 |
|
|
|
|
20 |
|
// Number
of seconds
to map a
diff befor
e giving u
p. (0 for
infinity)
|
|
|
21 |
|
var DIFF_T
IMEOUT = 1
.0;
|
|
|
22 |
|
// Cost of
an empty
edit opera
tion in te
rms of edi
t characte
rs.
|
|
|
23 |
|
var DIFF_E
DIT_COST =
4;
|
|
|
24 |
|
// Tweak t
he relativ
e importan
ce (0.0 =
accuracy,
1.0 = prox
imity)
|
|
|
25 |
|
var MATCH_
BALANCE =
0.5;
|
|
|
26 |
|
// At what
point is
no match d
eclared (0
.0 = perfe
ction, 1.0
= very lo
ose)
|
|
|
27 |
|
var MATCH_
THRESHOLD
= 0.5;
|
|
|
28 |
|
// The min
and max c
utoffs use
d when com
puting tex
t lengths.
|
|
|
29 |
|
var MATCH_
MINLENGTH
= 100;
|
|
|
30 |
|
var MATCH_
MAXLENGTH
= 1000;
|
|
|
31 |
|
// Chunk s
ize for co
ntext leng
th.
|
|
|
32 |
|
var PATCH_
MARGIN = 4
;
|
|
|
33 |
|
|
|
|
34 |
|
|
|
|
35 |
|
////////
//////////
//////////
//////////
//////////
//////////
//////////
//
|
|
|
36 |
|
// Diff
/
/
|
|
|
37 |
|
//////////
//////////
//////////
//////////
//////////
//////////
//////////
|
|
|
38 |
|
|
|
|
39 |
|
// The dat
a structur
e represen
ting a dif
f is an ar
ray of tup
les:
|
|
|
40 |
|
// [[-1, "
Hello"], [
1, "Goodby
e"], [0, "
world."]]
|
|
|
41 |
|
// which m
eans: dele
te "Hello"
, add "Goo
dbye" and
keep " wor
ld."
|
|
|
42 |
|
|
|
|
43 |
|
|
|
|
44 |
|
function d
iff_main(t
ext1, text
2, checkli
nes) {
|
|
|
45 |
|
// Find
the differ
ences betw
een two te
xts. Retu
rn an arra
y of chang
es.
|
|
|
46 |
|
// If ch
ecklines i
s present
and false,
then don'
t run a li
ne-level d
iff first
to identif
y the chan
ged areas.
|
|
|
47 |
|
// Check
for equal
ity (speed
up)
|
|
|
48 |
|
if (text
1 == text2
)
|
|
|
49 |
|
return
[[0, text
1]];
|
|
|
50 |
|
|
|
|
51 |
|
if (type
of checkli
nes == 'un
defined')
|
|
|
52 |
|
checkl
ines = tru
e;
|
|
|
53 |
|
|
|
|
54 |
|
var a;
|
|
|
55 |
|
// Trim
off common
prefix (s
peedup)
|
|
|
56 |
|
a = diff
_prefix(te
xt1, text2
);
|
|
|
57 |
|
text1 =
a[0];
|
|
|
58 |
|
text2 =
a[1];
|
|
|
59 |
|
var comm
onprefix =
a[2];
|
|
|
60 |
|
|
|
|
61 |
|
// Trim
off common
suffix (s
peedup)
|
|
|
62 |
|
a = diff
_suffix(te
xt1, text2
);
|
|
|
63 |
|
text1 =
a[0];
|
|
|
64 |
|
text2 =
a[1];
|
|
|
65 |
|
var comm
onsuffix =
a[2];
|
|
|
66 |
|
|
|
|
67 |
|
var diff
, i;
|
|
|
68 |
|
var long
text = tex
t1.length
> text2.le
ngth ? tex
t1 : text2
;
|
|
|
69 |
|
var shor
ttext = te
xt1.length
> text2.l
ength ? te
xt2 : text
1;
|
|
|
70 |
|
|
|
|
71 |
|
if (!tex
t1) { //
Just add s
ome text (
speedup)
|
|
|
72 |
|
diff =
[[1, text
2]];
|
|
|
73 |
|
} else i
f (!text2)
{ // Just
delete so
me text (s
peedup)
|
|
|
74 |
|
diff =
[[-1, tex
t1]];
|
|
|
75 |
|
} else i
f ((i = lo
ngtext.ind
exOf(short
text)) !=
-1) {
|
|
|
76 |
|
// Sho
rter text
is inside
the longer
text (spe
edup)
|
|
|
77 |
|
diff =
[[1, long
text.subst
ring(0, i)
], [0, sho
rttext], [
1, longtex
t.substrin
g(i+shortt
ext.length
)]];
|
|
|
78 |
|
// Swa
p insertio
ns for del
etions if
diff is re
versed.
|
|
|
79 |
|
if (te
xt1.length
> text2.l
ength)
|
|
|
80 |
|
diff
[0][0] = d
iff[2][0]
= -1;
|
|
|
81 |
|
} else {
|
|
|
82 |
|
longte
xt = short
text = nul
l; // Garb
age collec
t
|
|
|
83 |
|
// Che
ck to see
if the pro
blem can b
e split in
two.
|
|
|
84 |
|
var hm
= diff_ha
lfmatch(te
xt1, text2
);
|
|
|
85 |
|
if (hm
) {
|
|
|
86 |
|
// A
half-matc
h was foun
d, sort ou
t the retu
rn data.
|
|
|
87 |
|
var
text1_a =
hm[0];
|
|
|
88 |
|
var
text1_b =
hm[1];
|
|
|
89 |
|
var
text2_a =
hm[2];
|
|
|
90 |
|
var
text2_b =
hm[3];
|
|
|
91 |
|
var
mid_common
= hm[4];
|
|
|
92 |
|
// S
end both p
airs off f
or separat
e processi
ng.
|
|
|
93 |
|
var
diff_a = d
iff_main(t
ext1_a, te
xt2_a, che
cklines);
|
|
|
94 |
|
var
diff_b = d
iff_main(t
ext1_b, te
xt2_b, che
cklines);
|
|
|
95 |
|
// M
erge the r
esults.
|
|
|
96 |
|
diff
= diff_a.
concat([[0
, mid_comm
on]], diff
_b);
|
|
|
97 |
|
} else
{
|
|
|
98 |
|
// P
erform a r
eal diff.
|
|
|
99 |
|
if (
checklines
&& text1.
length + t
ext2.lengt
h < 250)
|
|
|
100 |
|
ch
ecklines =
false; //
Too trivi
al for the
overhead.
|
|
|
101 |
|
if (
checklines
) {
|
|
|
102 |
|
//
Scan the
text on a
line-by-li
ne basis f
irst.
|
|
|
103 |
|
a
= diff_lin
es2chars(t
ext1, text
2);
|
|
|
104 |
|
te
xt1 = a[0]
;
|
|
|
105 |
|
te
xt2 = a[1]
;
|
|
|
106 |
|
va
r linearra
y = a[2];
|
|
|
107 |
|
}
|
|
|
108 |
|
diff
= diff_ma
p(text1, t
ext2);
|
|
|
109 |
|
if (
!diff) //
No accepta
ble result
.
|
|
|
110 |
|
di
ff = [[-1,
text1], [
1, text2]]
;
|
|
|
111 |
|
if (
checklines
) {
|
|
|
112 |
|
di
ff_chars2l
ines(diff,
linearray
); // Conv
ert the di
ff back to
original
text.
|
|
|
113 |
|
di
ff_cleanup
_semantic(
diff); //
Eliminate
freak matc
hes (e.g.
blank line
s)
|
|
|
114 |
|
|
|
|
115 |
|
//
Rediff an
y replacem
ent blocks
, this tim
e on chara
cter-by-ch
aracter ba
sis.
|
|
|
116 |
|
di
ff.push([0
, '']); /
/ Add a du
mmy entry
at the end
.
|
|
|
117 |
|
va
r pointer
= 0;
|
|
|
118 |
|
va
r count_de
lete = 0;
|
|
|
119 |
|
va
r count_in
sert = 0;
|
|
|
120 |
|
va
r text_del
ete = '';
|
|
|
121 |
|
va
r text_ins
ert = '';
|
|
|
122 |
|
wh
ile(pointe
r < diff.l
ength) {
|
|
|
123 |
|
if (diff[p
ointer][0]
== 1) {
|
|
|
124 |
|
count_in
sert++;
|
|
|
125 |
|
text_ins
ert += dif
f[pointer]
[1];
|
|
|
126 |
|
} else if
(diff[poin
ter][0] ==
-1) {
|
|
|
127 |
|
count_de
lete++;
|
|
|
128 |
|
text_del
ete += dif
f[pointer]
[1];
|
|
|
129 |
|
} else {
// Upon re
aching an
equality,
check for
prior redu
ndancies.
|
|
|
130 |
|
if (coun
t_delete >
= 1 && cou
nt_insert
>= 1) {
|
|
|
131 |
|
// Del
ete the of
fending re
cords and
add the me
rged ones.
|
|
|
132 |
|
a = di
ff_main(te
xt_delete,
text_inse
rt, false)
;
|
|
|
133 |
|
diff.s
plice(poin
ter - coun
t_delete -
count_ins
ert, count
_delete +
count_inse
rt);
|
|
|
134 |
|
pointe
r = pointe
r - count_
delete - c
ount_inser
t;
|
|
|
135 |
|
for (i
=a.length-
1; i>=0; i
--)
|
|
|
136 |
|
diff
.splice(po
inter, 0,
a[i]);
|
|
|
137 |
|
pointe
r = pointe
r + a.leng
th;
|
|
|
138 |
|
}
|
|
|
139 |
|
count_in
sert = 0;
|
|
|
140 |
|
count_de
lete = 0;
|
|
|
141 |
|
text_del
ete = '';
|
|
|
142 |
|
text_ins
ert = '';
|
|
|
143 |
|
}
|
|
|
144 |
|
pointer++;
|
|
|
145 |
|
}
|
|
|
146 |
|
di
ff.pop();
// Remove
the dummy
entry at
the end.
|
|
|
147 |
|
|
|
|
148 |
|
}
|
|
|
149 |
|
}
|
|
|
150 |
|
}
|
|
|
151 |
|
|
|
|
152 |
|
if (comm
onprefix)
|
|
|
153 |
|
diff.u
nshift([0,
commonpre
fix]);
|
|
|
154 |
|
if (comm
onsuffix)
|
|
|
155 |
|
diff.p
ush([0, co
mmonsuffix
]);
|
|
|
156 |
|
diff_cle
anup_merge
(diff);
|
|
|
157 |
|
return d
iff;
|
|
|
158 |
|
}
|
|
|
159 |
|
|
|
|
160 |
|
|
|
|
161 |
|
function d
iff_lines2
chars(text
1, text2)
{
|
|
|
162 |
|
// Split
text into
an array
of strings
.
|
|
|
163 |
|
// Reduc
e the text
s to a str
ing of has
hes where
each chara
cter repre
sents one
line.
|
|
|
164 |
|
var line
array = ne
w Array();
// linea
rray[4] ==
"Hello\n"
|
|
|
165 |
|
var line
hash = new
Object();
// lineh
ash["Hello
\n"] == 4
|
|
|
166 |
|
|
|
|
167 |
|
// "\x00
" is a val
id JavaScr
ipt charac
ter, but t
he Venkman
debugger
doesn't li
ke it (bug
335098)
|
|
|
168 |
|
// So we
'll insert
a junk en
try to avo
id generat
ing a null
character
.
|
|
|
169 |
|
linearra
y.push('')
;
|
|
|
170 |
|
|
|
|
171 |
|
function
diff_line
s2chars_mu
nge(text)
{
|
|
|
172 |
|
// My
first ever
closure!
|
|
|
173 |
|
var i,
line;
|
|
|
174 |
|
var ch
ars = '';
|
|
|
175 |
|
while
(text) {
|
|
|
176 |
|
i =
text.index
Of('\n');
|
|
|
177 |
|
if (
i == -1)
|
|
|
178 |
|
i
= text.len
gth;
|
|
|
179 |
|
line
= text.su
bstring(0,
i+1);
|
|
|
180 |
|
text
= text.su
bstring(i+
1);
|
|
|
181 |
|
if (
linehash.h
asOwnPrope
rty ? line
hash.hasOw
nProperty(
line) : (l
inehash[li
ne] !== un
defined))
{
|
|
|
182 |
|
ch
ars += Str
ing.fromCh
arCode(lin
ehash[line
]);
|
|
|
183 |
|
} el
se {
|
|
|
184 |
|
li
nearray.pu
sh(line);
|
|
|
185 |
|
li
nehash[lin
e] = linea
rray.lengt
h - 1;
|
|
|
186 |
|
ch
ars += Str
ing.fromCh
arCode(lin
earray.len
gth - 1);
|
|
|
187 |
|
}
|
|
|
188 |
|
}
|
|
|
189 |
|
return
chars;
|
|
|
190 |
|
}
|
|
|
191 |
|
|
|
|
192 |
|
var char
s1 = diff_
lines2char
s_munge(te
xt1);
|
|
|
193 |
|
var char
s2 = diff_
lines2char
s_munge(te
xt2);
|
|
|
194 |
|
return [
chars1, ch
ars2, line
array];
|
|
|
195 |
|
}
|
|
|
196 |
|
|
|
|
197 |
|
|
|
|
198 |
|
function d
iff_chars2
lines(diff
, linearra
y) {
|
|
|
199 |
|
// Rehyd
rate the t
ext in a d
iff from a
string of
line hash
es to real
lines of
text.
|
|
|
200 |
|
var char
s, text;
|
|
|
201 |
|
for (var
x=0; x<di
ff.length;
x++) {
|
|
|
202 |
|
chars
= diff[x][
1];
|
|
|
203 |
|
text =
'';
|
|
|
204 |
|
for (v
ar y=0; y<
chars.leng
th; y++)
|
|
|
205 |
|
text
+= linear
ray[chars.
charCodeAt
(y)];
|
|
|
206 |
|
diff[x
][1] = tex
t;
|
|
|
207 |
|
}
|
|
|
208 |
|
}
|
|
|
209 |
|
|
|
|
210 |
|
|
|
|
211 |
|
function d
iff_map(te
xt1, text2
) {
|
|
|
212 |
|
// Explo
re the int
ersection
points bet
ween the t
wo texts.
|
|
|
213 |
|
var now
= new Date
();
|
|
|
214 |
|
var ms_e
nd = now.g
etTime() +
DIFF_TIME
OUT * 1000
; // Don't
run for t
oo long.
|
|
|
215 |
|
var max
= (text1.l
ength + te
xt2.length
) / 2;
|
|
|
216 |
|
var v_ma
p1 = new A
rray();
|
|
|
217 |
|
var v_ma
p2 = new A
rray();
|
|
|
218 |
|
var v1 =
new Objec
t();
|
|
|
219 |
|
var v2 =
new Objec
t();
|
|
|
220 |
|
v1[1] =
0;
|
|
|
221 |
|
v2[1] =
0;
|
|
|
222 |
|
var x, y
;
|
|
|
223 |
|
var foot
step; // U
sed to tra
ck overlap
ping paths
.
|
|
|
224 |
|
var foot
steps = ne
w Object()
;
|
|
|
225 |
|
var done
= false;
|
|
|
226 |
|
var hasO
wnProperty
= !!(foot
steps.hasO
wnProperty
);
|
|
|
227 |
|
// If th
e total nu
mber of ch
aracters i
s odd, the
n the fron
t path wil
l collide
with the r
everse pat
h.
|
|
|
228 |
|
var fron
t = (text1
.length +
text2.leng
th) % 2;
|
|
|
229 |
|
for (var
d=0; d<ma
x; d++) {
|
|
|
230 |
|
now =
new Date()
;
|
|
|
231 |
|
if (DI
FF_TIMEOUT
> 0 && no
w.getTime(
) > ms_end
) // Timeo
ut reached
|
|
|
232 |
|
retu
rn null;
|
|
|
233 |
|
|
|
|
234 |
|
// Wal
k the fron
t path one
step.
|
|
|
235 |
|
v_map1
[d] = new
Object();
|
|
|
236 |
|
for (v
ar k=-d; k
<=d; k+=2)
{
|
|
|
237 |
|
if (
k == -d ||
k != d &&
v1[k-1] <
v1[k+1])
|
|
|
238 |
|
x
= v1[k+1];
|
|
|
239 |
|
else
|
|
|
240 |
|
x
= v1[k-1]+
1;
|
|
|
241 |
|
y =
x - k;
|
|
|
242 |
|
foot
step = x+"
,"+y;
|
|
|
243 |
|
if (
front && (
hasOwnProp
erty ? foo
tsteps.has
OwnPropert
y(footstep
) : (foots
teps[foots
tep] !== u
ndefined))
)
|
|
|
244 |
|
do
ne = true;
|
|
|
245 |
|
if (
!front)
|
|
|
246 |
|
fo
otsteps[fo
otstep] =
d;
|
|
|
247 |
|
whil
e (!done &
& x < text
1.length &
& y < text
2.length &
& text1.ch
arAt(x) ==
text2.cha
rAt(y)) {
|
|
|
248 |
|
x+
+; y++;
|
|
|
249 |
|
fo
otstep = x
+","+y;
|
|
|
250 |
|
if
(front &&
(hasOwnPr
operty ? f
ootsteps.h
asOwnPrope
rty(footst
ep) : (foo
tsteps[foo
tstep] !==
undefined
)))
|
|
|
251 |
|
done = tru
e;
|
|
|
252 |
|
if
(!front)
|
|
|
253 |
|
footsteps[
footstep]
= d;
|
|
|
254 |
|
}
|
|
|
255 |
|
v1[k
] = x;
|
|
|
256 |
|
v_ma
p1[d][x+",
"+y] = tru
e;
|
|
|
257 |
|
if (
done) {
|
|
|
258 |
|
//
Front pat
h ran over
reverse p
ath.
|
|
|
259 |
|
v_
map2 = v_m
ap2.slice(
0, footste
ps[footste
p]+1);
|
|
|
260 |
|
va
r a = diff
_path1(v_m
ap1, text1
.substring
(0, x), te
xt2.substr
ing(0, y))
;
|
|
|
261 |
|
re
turn a.con
cat(diff_p
ath2(v_map
2, text1.s
ubstring(x
), text2.s
ubstring(y
)));
|
|
|
262 |
|
}
|
|
|
263 |
|
}
|
|
|
264 |
|
|
|
|
265 |
|
// Wal
k the reve
rse path o
ne step.
|
|
|
266 |
|
v_map2
[d] = new
Object();
|
|
|
267 |
|
for (v
ar k=-d; k
<=d; k+=2)
{
|
|
|
268 |
|
if (
k == -d ||
k != d &&
v2[k-1] <
v2[k+1])
|
|
|
269 |
|
x
= v2[k+1];
|
|
|
270 |
|
else
|
|
|
271 |
|
x
= v2[k-1]+
1;
|
|
|
272 |
|
y =
x - k;
|
|
|
273 |
|
foot
step = (te
xt1.length
-x)+","+(t
ext2.lengt
h-y);
|
|
|
274 |
|
if (
!front &&
(hasOwnPro
perty ? fo
otsteps.ha
sOwnProper
ty(footste
p) : (foot
steps[foot
step] !==
undefined)
))
|
|
|
275 |
|
do
ne = true;
|
|
|
276 |
|
if (
front)
|
|
|
277 |
|
fo
otsteps[fo
otstep] =
d;
|
|
|
278 |
|
whil
e (!done &
& x < text
1.length &
& y < text
2.length &
& text1.ch
arAt(text1
.length-x-
1) == text
2.charAt(t
ext2.lengt
h-y-1)) {
|
|
|
279 |
|
x+
+; y++;
|
|
|
280 |
|
fo
otstep = (
text1.leng
th-x)+","+
(text2.len
gth-y);
|
|
|
281 |
|
if
(!front &
& (hasOwnP
roperty ?
footsteps.
hasOwnProp
erty(foots
tep) : (fo
otsteps[fo
otstep] !=
= undefine
d)))
|
|
|
282 |
|
done = tru
e;
|
|
|
283 |
|
if
(front)
|
|
|
284 |
|
footsteps[
footstep]
= d;
|
|
|
285 |
|
}
|
|
|
286 |
|
v2[k
] = x;
|
|
|
287 |
|
v_ma
p2[d][x+",
"+y] = tru
e;
|
|
|
288 |
|
if (
done) {
|
|
|
289 |
|
//
Reverse p
ath ran ov
er front p
ath.
|
|
|
290 |
|
v_
map1 = v_m
ap1.slice(
0, footste
ps[footste
p]+1);
|
|
|
291 |
|
va
r a = diff
_path1(v_m
ap1, text1
.substring
(0, text1.
length-x),
text2.sub
string(0,
text2.leng
th-y));
|
|
|
292 |
|
re
turn a.con
cat(diff_p
ath2(v_map
2, text1.s
ubstring(t
ext1.lengt
h-x), text
2.substrin
g(text2.le
ngth-y)));
|
|
|
293 |
|
}
|
|
|
294 |
|
}
|
|
|
295 |
|
}
|
|
|
296 |
|
// Numbe
r of diffs
equals nu
mber of ch
aracters,
no commona
lity at al
l.
|
|
|
297 |
|
return n
ull;
|
|
|
298 |
|
}
|
|
|
299 |
|
|
|
|
300 |
|
|
|
|
301 |
|
function d
iff_path1(
v_map, tex
t1, text2)
{
|
|
|
302 |
|
// Work
from the m
iddle back
to the st
art to det
ermine the
path.
|
|
|
303 |
|
var path
= [];
|
|
|
304 |
|
var x =
text1.leng
th;
|
|
|
305 |
|
var y =
text2.leng
th;
|
|
|
306 |
|
var last
_op = null
;
|
|
|
307 |
|
for (var
d=v_map.l
ength-2; d
>=0; d--)
{
|
|
|
308 |
|
while(
1) {
|
|
|
309 |
|
if (
v_map[d].h
asOwnPrope
rty ? v_ma
p[d].hasOw
nProperty(
(x-1)+","+
y) : (v_ma
p[d][(x-1)
+","+y] !=
= undefine
d)) {
|
|
|
310 |
|
x-
-;
|
|
|
311 |
|
if
(last_op
=== -1)
|
|
|
312 |
|
path[0][1]
= text1.c
harAt(x) +
path[0][1
];
|
|
|
313 |
|
el
se
|
|
|
314 |
|
path.unshi
ft([-1, te
xt1.charAt
(x)]);
|
|
|
315 |
|
la
st_op = -1
;
|
|
|
316 |
|
br
eak;
|
|
|
317 |
|
} el
se if (v_m
ap[d].hasO
wnProperty
? v_map[d
].hasOwnPr
operty(x+"
,"+(y-1))
: (v_map[d
][x+","+(y
-1)] !== u
ndefined))
{
|
|
|
318 |
|
y-
-;
|
|
|
319 |
|
if
(last_op
=== 1)
|
|
|
320 |
|
path[0][1]
= text2.c
harAt(y) +
path[0][1
];
|
|
|
321 |
|
el
se
|
|
|
322 |
|
path.unshi
ft([1, tex
t2.charAt(
y)]);
|
|
|
323 |
|
la
st_op = 1;
|
|
|
324 |
|
br
eak;
|
|
|
325 |
|
} el
se {
|
|
|
326 |
|
x-
-;
|
|
|
327 |
|
y-
-;
|
|
|
328 |
|
//
if (text1.
charAt(x)
!= text2.c
harAt(y))
|
|
|
329 |
|
//
return a
lert("No d
iagonal.
Can't happ
en. (diff_
path1)");
|
|
|
330 |
|
if
(last_op
=== 0)
|
|
|
331 |
|
path[0][1]
= text1.c
harAt(x) +
path[0][1
];
|
|
|
332 |
|
el
se
|
|
|
333 |
|
path.unshi
ft([0, tex
t1.charAt(
x)]);
|
|
|
334 |
|
la
st_op = 0;
|
|
|
335 |
|
}
|
|
|
336 |
|
}
|
|
|
337 |
|
}
|
|
|
338 |
|
return p
ath;
|
|
|
339 |
|
}
|
|
|
340 |
|
|
|
|
341 |
|
|
|
|
342 |
|
function d
iff_path2(
v_map, tex
t1, text2)
{
|
|
|
343 |
|
// Work
from the m
iddle back
to the en
d to deter
mine the p
ath.
|
|
|
344 |
|
var path
= [];
|
|
|
345 |
|
var x =
text1.leng
th;
|
|
|
346 |
|
var y =
text2.leng
th;
|
|
|
347 |
|
var last
_op = null
;
|
|
|
348 |
|
for (var
d=v_map.l
ength-2; d
>=0; d--)
{
|
|
|
349 |
|
while(
1) {
|
|
|
350 |
|
if (
v_map[d].h
asOwnPrope
rty ? v_ma
p[d].hasOw
nProperty(
(x-1)+","+
y) : (v_ma
p[d][(x-1)
+","+y] !=
= undefine
d)) {
|
|
|
351 |
|
x-
-;
|
|
|
352 |
|
if
(last_op
=== -1)
|
|
|
353 |
|
path[path.
length-1][
1] += text
1.charAt(t
ext1.lengt
h-x-1);
|
|
|
354 |
|
el
se
|
|
|
355 |
|
path.push(
[-1, text1
.charAt(te
xt1.length
-x-1)]);
|
|
|
356 |
|
la
st_op = -1
;
|
|
|
357 |
|
br
eak;
|
|
|
358 |
|
} el
se if (v_m
ap[d].hasO
wnProperty
? v_map[d
].hasOwnPr
operty(x+"
,"+(y-1))
: (v_map[d
][x+","+(y
-1)] !== u
ndefined))
{
|
|
|
359 |
|
y-
-;
|
|
|
360 |
|
if
(last_op
=== 1)
|
|
|
361 |
|
path[path.
length-1][
1] += text
2.charAt(t
ext2.lengt
h-y-1);
|
|
|
362 |
|
el
se
|
|
|
363 |
|
path.push(
[1, text2.
charAt(tex
t2.length-
y-1)]);
|
|
|
364 |
|
la
st_op = 1;
|
|
|
365 |
|
br
eak;
|
|
|
366 |
|
} el
se {
|
|
|
367 |
|
x-
-;
|
|
|
368 |
|
y-
-;
|
|
|
369 |
|
//
if (text1.
charAt(tex
t1.length-
x-1) != te
xt2.charAt
(text2.len
gth-y-1))
|
|
|
370 |
|
//
return a
lert("No d
iagonal.
Can't happ
en. (diff_
path2)");
|
|
|
371 |
|
if
(last_op
=== 0)
|
|
|
372 |
|
path[path.
length-1][
1] += text
1.charAt(t
ext1.lengt
h-x-1);
|
|
|
373 |
|
el
se
|
|
|
374 |
|
path.push(
[0, text1.
charAt(tex
t1.length-
x-1)]);
|
|
|
375 |
|
la
st_op = 0;
|
|
|
376 |
|
}
|
|
|
377 |
|
}
|
|
|
378 |
|
}
|
|
|
379 |
|
return p
ath;
|
|
|
380 |
|
}
|
|
|
381 |
|
|
|
|
382 |
|
|
|
|
383 |
|
function d
iff_prefix
(text1, te
xt2) {
|
|
|
384 |
|
// Trim
off common
prefix
|
|
|
385 |
|
var poin
termin = 0
;
|
|
|
386 |
|
var poin
termax = M
ath.min(te
xt1.length
, text2.le
ngth);
|
|
|
387 |
|
var poin
termid = p
ointermax;
|
|
|
388 |
|
while(po
intermin <
pointermi
d) {
|
|
|
389 |
|
if (te
xt1.substr
ing(0, poi
ntermid) =
= text2.su
bstring(0,
pointermi
d))
|
|
|
390 |
|
poin
termin = p
ointermid;
|
|
|
391 |
|
else
|
|
|
392 |
|
poin
termax = p
ointermid;
|
|
|
393 |
|
pointe
rmid = Mat
h.floor((p
ointermax
- pointerm
in) / 2 +
pointermin
);
|
|
|
394 |
|
}
|
|
|
395 |
|
var comm
onprefix =
text1.sub
string(0,
pointermid
);
|
|
|
396 |
|
text1 =
text1.subs
tring(poin
termid);
|
|
|
397 |
|
text2 =
text2.subs
tring(poin
termid);
|
|
|
398 |
|
return [
text1, tex
t2, common
prefix];
|
|
|
399 |
|
}
|
|
|
400 |
|
|
|
|
401 |
|
|
|
|
402 |
|
function d
iff_suffix
(text1, te
xt2) {
|
|
|
403 |
|
// Trim
off common
suffix
|
|
|
404 |
|
var poin
termin = 0
;
|
|
|
405 |
|
var poin
termax = M
ath.min(te
xt1.length
, text2.le
ngth);
|
|
|
406 |
|
var poin
termid = p
ointermax;
|
|
|
407 |
|
while(po
intermin <
pointermi
d) {
|
|
|
408 |
|
if (te
xt1.substr
ing(text1.
length-poi
ntermid) =
= text2.su
bstring(te
xt2.length
-pointermi
d))
|
|
|
409 |
|
poin
termin = p
ointermid;
|
|
|
410 |
|
else
|
|
|
411 |
|
poin
termax = p
ointermid;
|
|
|
412 |
|
pointe
rmid = Mat
h.floor((p
ointermax
- pointerm
in) / 2 +
pointermin
);
|
|
|
413 |
|
}
|
|
|
414 |
|
var comm
onsuffix =
text1.sub
string(tex
t1.length-
pointermid
);
|
|
|
415 |
|
text1 =
text1.subs
tring(0, t
ext1.lengt
h-pointerm
id);
|
|
|
416 |
|
text2 =
text2.subs
tring(0, t
ext2.lengt
h-pointerm
id);
|
|
|
417 |
|
return [
text1, tex
t2, common
suffix];
|
|
|
418 |
|
}
|
|
|
419 |
|
|
|
|
420 |
|
|
|
|
421 |
|
function d
iff_halfma
tch(text1,
text2) {
|
|
|
422 |
|
// Do th
e two text
s share a
substring
which is a
t least ha
lf the len
gth of the
longer te
xt?
|
|
|
423 |
|
var long
text = tex
t1.length
> text2.le
ngth ? tex
t1 : text2
;
|
|
|
424 |
|
var shor
ttext = te
xt1.length
> text2.l
ength ? te
xt2 : text
1;
|
|
|
425 |
|
if (long
text.lengt
h < 10 ||
shorttext.
length < 1
)
|
|
|
426 |
|
return
null; //
Pointless.
|
|
|
427 |
|
|
|
|
428 |
|
function
diff_half
match_i(lo
ngtext, sh
orttext, i
) {
|
|
|
429 |
|
// Sta
rt with a
1/4 length
substring
at positi
on i as a
seed.
|
|
|
430 |
|
var se
ed = longt
ext.substr
ing(i, i+M
ath.floor(
longtext.l
ength/4));
|
|
|
431 |
|
var j
= -1;
|
|
|
432 |
|
var be
st_common
= '';
|
|
|
433 |
|
var be
st_longtex
t_a, best_
longtext_b
, best_sho
rttext_a,
best_short
text_b;
|
|
|
434 |
|
while
((j = shor
ttext.inde
xOf(seed,
j+1)) != -
1) {
|
|
|
435 |
|
var
my_prefix
= diff_pre
fix(longte
xt.substri
ng(i), sho
rttext.sub
string(j))
;
|
|
|
436 |
|
var
my_suffix
= diff_suf
fix(longte
xt.substri
ng(0, i),
shorttext.
substring(
0, j));
|
|
|
437 |
|
if (
best_commo
n.length <
(my_suffi
x[2] + my_
prefix[2])
.length) {
|
|
|
438 |
|
be
st_common
= my_suffi
x[2] + my_
prefix[2];
|
|
|
439 |
|
be
st_longtex
t_a = my_s
uffix[0];
|
|
|
440 |
|
be
st_longtex
t_b = my_p
refix[0];
|
|
|
441 |
|
be
st_shortte
xt_a = my_
suffix[1];
|
|
|
442 |
|
be
st_shortte
xt_b = my_
prefix[1];
|
|
|
443 |
|
}
|
|
|
444 |
|
}
|
|
|
445 |
|
if (be
st_common.
length >=
longtext.l
ength/2)
|
|
|
446 |
|
retu
rn [best_l
ongtext_a,
best_long
text_b, be
st_shortte
xt_a, best
_shorttext
_b, best_c
ommon];
|
|
|
447 |
|
else
|
|
|
448 |
|
retu
rn null;
|
|
|
449 |
|
}
|
|
|
450 |
|
|
|
|
451 |
|
// First
check if
the second
quarter i
s the seed
for a hal
f-match.
|
|
|
452 |
|
var hm1
= diff_hal
fmatch_i(l
ongtext, s
horttext,
Math.ceil(
longtext.l
ength/4));
|
|
|
453 |
|
// Check
again bas
ed on the
third quar
ter.
|
|
|
454 |
|
var hm2
= diff_hal
fmatch_i(l
ongtext, s
horttext,
Math.ceil(
longtext.l
ength/2));
|
|
|
455 |
|
var hm;
|
|
|
456 |
|
if (!hm1
&& !hm2)
|
|
|
457 |
|
return
null;
|
|
|
458 |
|
else if
(!hm2)
|
|
|
459 |
|
hm = h
m1;
|
|
|
460 |
|
else if
(!hm1)
|
|
|
461 |
|
hm = h
m2;
|
|
|
462 |
|
else //
Both match
ed. Selec
t the long
est.
|
|
|
463 |
|
hm = h
m1[4].leng
th > hm2[4
].length ?
hm1 : hm2
;
|
|
|
464 |
|
|
|
|
465 |
|
// A hal
f-match wa
s found, s
ort out th
e return d
ata.
|
|
|
466 |
|
if (text
1.length >
text2.len
gth) {
|
|
|
467 |
|
var te
xt1_a = hm
[0];
|
|
|
468 |
|
var te
xt1_b = hm
[1];
|
|
|
469 |
|
var te
xt2_a = hm
[2];
|
|
|
470 |
|
var te
xt2_b = hm
[3];
|
|
|
471 |
|
} else {
|
|
|
472 |
|
var te
xt2_a = hm
[0];
|
|
|
473 |
|
var te
xt2_b = hm
[1];
|
|
|
474 |
|
var te
xt1_a = hm
[2];
|
|
|
475 |
|
var te
xt1_b = hm
[3];
|
|
|
476 |
|
}
|
|
|
477 |
|
var mid_
common = h
m[4];
|
|
|
478 |
|
return [
text1_a, t
ext1_b, te
xt2_a, tex
t2_b, mid_
common];
|
|
|
479 |
|
}
|
|
|
480 |
|
|
|
|
481 |
|
|
|
|
482 |
|
function d
iff_cleanu
p_semantic
(diff) {
|
|
|
483 |
|
// Reduc
e the numb
er of edit
s by elimi
nating sem
antically
trivial eq
ualities.
|
|
|
484 |
|
var chan
ges = fals
e;
|
|
|
485 |
|
var equa
lities = [
]; // Stac
k of indic
es where e
qualities
are found.
|
|
|
486 |
|
var last
equality =
null; //
Always equ
al to equa
lities[equ
alities.le
ngth-1][1]
|
|
|
487 |
|
var poin
ter = 0; /
/ Index of
current p
osition.
|
|
|
488 |
|
var leng
th_changes
1 = 0; //
Number of
characters
that chan
ged prior
to the equ
ality.
|
|
|
489 |
|
var leng
th_changes
2 = 0; //
Number of
characters
that chan
ged after
the equali
ty.
|
|
|
490 |
|
while (p
ointer < d
iff.length
) {
|
|
|
491 |
|
if (di
ff[pointer
][0] == 0)
{ // equa
lity found
|
|
|
492 |
|
equa
lities.pus
h(pointer)
;
|
|
|
493 |
|
leng
th_changes
1 = length
_changes2;
|
|
|
494 |
|
leng
th_changes
2 = 0;
|
|
|
495 |
|
last
equality =
diff[poin
ter][1];
|
|
|
496 |
|
} else
{ // an i
nsertion o
r deletion
|
|
|
497 |
|
leng
th_changes
2 += diff[
pointer][1
].length;
|
|
|
498 |
|
if (
lastequali
ty != null
&& (laste
quality.le
ngth <= le
ngth_chang
es1) && (l
astequalit
y.length <
= length_c
hanges2))
{
|
|
|
499 |
|
//
alert("Spl
itting: '"
+lastequal
ity+"'");
|
|
|
500 |
|
di
ff.splice(
equalities
[equalitie
s.length-1
], 0, [-1,
lastequal
ity]); //
Duplicate
record
|
|
|
501 |
|
di
ff[equalit
ies[equali
ties.lengt
h-1]+1][0]
= 1; // C
hange seco
nd copy to
insert.
|
|
|
502 |
|
eq
ualities.p
op(); //
Throw away
the equal
ity we jus
t deleted;
|
|
|
503 |
|
eq
ualities.p
op(); //
Throw away
the previ
ous equali
ty;
|
|
|
504 |
|
po
inter = eq
ualities.l
ength ? eq
ualities[e
qualities.
length-1]
: -1;
|
|
|
505 |
|
le
ngth_chang
es1 = 0; /
/ Reset th
e counters
.
|
|
|
506 |
|
le
ngth_chang
es2 = 0;
|
|
|
507 |
|
la
stequality
= null;
|
|
|
508 |
|
ch
anges = tr
ue;
|
|
|
509 |
|
}
|
|
|
510 |
|
}
|
|
|
511 |
|
pointe
r++;
|
|
|
512 |
|
}
|
|
|
513 |
|
|
|
|
514 |
|
if (chan
ges)
|
|
|
515 |
|
diff_c
leanup_mer
ge(diff);
|
|
|
516 |
|
}
|
|
|
517 |
|
|
|
|
518 |
|
|
|
|
519 |
|
function d
iff_cleanu
p_efficien
cy(diff) {
|
|
|
520 |
|
// Reduc
e the numb
er of edit
s by elimi
nating ope
rationally
trivial e
qualities.
|
|
|
521 |
|
var chan
ges = fals
e;
|
|
|
522 |
|
var equa
lities = [
]; // Stac
k of indic
es where e
qualities
are found.
|
|
|
523 |
|
var last
equality =
''; // Al
ways equal
to equali
ties[equal
ities.leng
th-1][1]
|
|
|
524 |
|
var poin
ter = 0; /
/ Index of
current p
osition.
|
|
|
525 |
|
var pre_
ins = fals
e; // Is t
here an in
sertion op
eration be
fore the l
ast equali
ty.
|
|
|
526 |
|
var pre_
del = fals
e; // Is t
here an de
letion ope
ration bef
ore the la
st equalit
y.
|
|
|
527 |
|
var post
_ins = fal
se; // Is
there an i
nsertion o
peration a
fter the l
ast equali
ty.
|
|
|
528 |
|
var post
_del = fal
se; // Is
there an d
eletion op
eration af
ter the la
st equalit
y.
|
|
|
529 |
|
while (p
ointer < d
iff.length
) {
|
|
|
530 |
|
if (di
ff[pointer
][0] == 0)
{ // equa
lity found
|
|
|
531 |
|
if (
diff[point
er][1].len
gth < DIFF
_EDIT_COST
&& (post_
ins || pos
t_del)) {
|
|
|
532 |
|
//
Candidate
found.
|
|
|
533 |
|
eq
ualities.p
ush(pointe
r);
|
|
|
534 |
|
pr
e_ins = po
st_ins;
|
|
|
535 |
|
pr
e_del = po
st_del;
|
|
|
536 |
|
la
stequality
= diff[po
inter][1];
|
|
|
537 |
|
} el
se {
|
|
|
538 |
|
//
Not a can
didate, an
d can neve
r become o
ne.
|
|
|
539 |
|
eq
ualities =
[];
|
|
|
540 |
|
la
stequality
= '';
|
|
|
541 |
|
}
|
|
|
542 |
|
post
_ins = pos
t_del = fa
lse;
|
|
|
543 |
|
} else
{ // an i
nsertion o
r deletion
|
|
|
544 |
|
if (
diff[point
er][0] ==
-1)
|
|
|
545 |
|
po
st_del = t
rue;
|
|
|
546 |
|
else
|
|
|
547 |
|
po
st_ins = t
rue;
|
|
|
548 |
|
// F
ive types
to be spli
t:
|
|
|
549 |
|
// <
ins>A</ins
><del>B</d
el>XY<ins>
C</ins><de
l>D</del>
|
|
|
550 |
|
// <
ins>A</ins
>X<ins>C</
ins><del>D
</del>
|
|
|
551 |
|
// <
ins>A</ins
><del>B</d
el>X<ins>C
</ins>
|
|
|
552 |
|
// <
ins>A</del
>X<ins>C</
ins><del>D
</del>
|
|
|
553 |
|
// <
ins>A</ins
><del>B</d
el>X<del>C
</del>
|
|
|
554 |
|
if (
lastequali
ty && ((pr
e_ins && p
re_del &&
post_ins &
& post_del
) || ((las
tequality.
length < D
IFF_EDIT_C
OST/2) &&
(pre_ins +
pre_del +
post_ins
+ post_del
) == 3)))
{
|
|
|
555 |
|
//
alert("Spl
itting: '"
+lastequal
ity+"'");
|
|
|
556 |
|
di
ff.splice(
equalities
[equalitie
s.length-1
], 0, [-1,
lastequal
ity]); //
Duplicate
record
|
|
|
557 |
|
di
ff[equalit
ies[equali
ties.lengt
h-1]+1][0]
= 1; // C
hange seco
nd copy to
insert.
|
|
|
558 |
|
eq
ualities.p
op(); //
Throw away
the equal
ity we jus
t deleted;
|
|
|
559 |
|
la
stequality
= '';
|
|
|
560 |
|
if
(pre_ins
&& pre_del
) {
|
|
|
561 |
|
// No chan
ges made w
hich could
affect pr
evious ent
ry, keep g
oing.
|
|
|
562 |
|
post_ins =
post_del
= true;
|
|
|
563 |
|
equalities
= [];
|
|
|
564 |
|
}
else {
|
|
|
565 |
|
equalities
.pop(); /
/ Throw aw
ay the pre
vious equa
lity;
|
|
|
566 |
|
pointer =
equalities
.length ?
equalities
[equalitie
s.length-1
] : -1;
|
|
|
567 |
|
post_ins =
post_del
= false;
|
|
|
568 |
|
}
|
|
|
569 |
|
ch
anges = tr
ue;
|
|
|
570 |
|
}
|
|
|
571 |
|
}
|
|
|
572 |
|
pointe
r++;
|
|
|
573 |
|
}
|
|
|
574 |
|
|
|
|
575 |
|
if (chan
ges)
|
|
|
576 |
|
diff_c
leanup_mer
ge(diff);
|
|
|
577 |
|
}
|
|
|
578 |
|
|
|
|
579 |
|
|
|
|
580 |
|
function d
iff_cleanu
p_merge(di
ff) {
|
|
|
581 |
|
// Reord
er and mer
ge like ed
it section
s. Merge
equalities
.
|
|
|
582 |
|
// Any e
dit sectio
n can move
as long a
s it doesn
't cross a
n equality
.
|
|
|
583 |
|
diff.pus
h([0, ''])
; // Add
a dummy en
try at the
end.
|
|
|
584 |
|
var poin
ter = 0;
|
|
|
585 |
|
var coun
t_delete =
0;
|
|
|
586 |
|
var coun
t_insert =
0;
|
|
|
587 |
|
var text
_delete =
'';
|
|
|
588 |
|
var text
_insert =
'';
|
|
|
589 |
|
var reco
rd_insert,
record_de
lete;
|
|
|
590 |
|
var my_x
fix;
|
|
|
591 |
|
while(po
inter < di
ff.length)
{
|
|
|
592 |
|
if (di
ff[pointer
][0] == 1)
{
|
|
|
593 |
|
coun
t_insert++
;
|
|
|
594 |
|
text
_insert +=
diff[poin
ter][1];
|
|
|
595 |
|
poin
ter++;
|
|
|
596 |
|
} else
if (diff[
pointer][0
] == -1) {
|
|
|
597 |
|
coun
t_delete++
;
|
|
|
598 |
|
text
_delete +=
diff[poin
ter][1];
|
|
|
599 |
|
poin
ter++;
|
|
|
600 |
|
} else
{ // Upo
n reaching
an equali
ty, check
for prior
redundanci
es.
|
|
|
601 |
|
if (
count_dele
te > 1 ||
count_inse
rt > 1) {
|
|
|
602 |
|
if
(count_de
lete > 1 &
& count_in
sert > 1)
{
|
|
|
603 |
|
// Factor
out any co
mmon prefi
xies.
|
|
|
604 |
|
my_xfix =
diff_prefi
x(text_ins
ert, text_
delete);
|
|
|
605 |
|
if (my_xfi
x[2] != ''
) {
|
|
|
606 |
|
if ((poi
nter - cou
nt_delete
- count_in
sert) > 0
&& diff[po
inter - co
unt_delete
- count_i
nsert - 1]
[0] == 0)
{
|
|
|
607 |
|
text_i
nsert = my
_xfix[0];
|
|
|
608 |
|
text_d
elete = my
_xfix[1];
|
|
|
609 |
|
diff[p
ointer - c
ount_delet
e - count_
insert - 1
][1] += my
_xfix[2];
|
|
|
610 |
|
}
|
|
|
611 |
|
}
|
|
|
612 |
|
// Factor
out any co
mmon suffi
xies.
|
|
|
613 |
|
my_xfix =
diff_suffi
x(text_ins
ert, text_
delete);
|
|
|
614 |
|
if (my_xfi
x[2] != ''
) {
|
|
|
615 |
|
text_ins
ert = my_x
fix[0];
|
|
|
616 |
|
text_del
ete = my_x
fix[1];
|
|
|
617 |
|
diff[poi
nter][1] =
my_xfix[2
] + diff[p
ointer][1]
;
|
|
|
618 |
|
}
|
|
|
619 |
|
}
|
|
|
620 |
|
//
Delete th
e offendin
g records
and add th
e merged o
nes.
|
|
|
621 |
|
if
(count_de
lete == 0)
|
|
|
622 |
|
diff.splic
e(pointer
- count_de
lete - cou
nt_insert,
count_del
ete + coun
t_insert,
[1, text_i
nsert]);
|
|
|
623 |
|
el
se if (cou
nt_insert
== 0)
|
|
|
624 |
|
diff.splic
e(pointer
- count_de
lete - cou
nt_insert,
count_del
ete + coun
t_insert,
[-1, text_
delete]);
|
|
|
625 |
|
el
se
|
|
|
626 |
|
diff.splic
e(pointer
- count_de
lete - cou
nt_insert,
count_del
ete + coun
t_insert,
[-1, text_
delete], [
1, text_in
sert]);
|
|
|
627 |
|
po
inter = po
inter - co
unt_delete
- count_i
nsert + (c
ount_delet
e ? 1 : 0)
+ (count_
insert ? 1
: 0) + 1;
|
|
|
628 |
|
} el
se if (poi
nter != 0
&& diff[po
inter-1][0
] == 0) {
|
|
|
629 |
|
//
Merge thi
s equality
with the
previous o
ne.
|
|
|
630 |
|
di
ff[pointer
-1][1] +=
diff[point
er][1];
|
|
|
631 |
|
di
ff.splice(
pointer, 1
);
|
|
|
632 |
|
} el
se {
|
|
|
633 |
|
po
inter++;
|
|
|
634 |
|
}
|
|
|
635 |
|
coun
t_insert =
0;
|
|
|
636 |
|
coun
t_delete =
0;
|
|
|
637 |
|
text
_delete =
'';
|
|
|
638 |
|
text
_insert =
'';
|
|
|
639 |
|
}
|
|
|
640 |
|
}
|
|
|
641 |
|
if (diff
[diff.leng
th-1][1] =
= '')
|
|
|
642 |
|
diff.p
op(); //
Remove the
dummy ent
ry at the
end.
|
|
|
643 |
|
}
|
|
|
644 |
|
|
|
|
645 |
|
|
|
|
646 |
|
function d
iff_addind
ex(diff) {
|
|
|
647 |
|
// Add a
n index to
each tupl
e, represe
nts where
the tuple
is located
in text2.
|
|
|
648 |
|
// e.g.
[[-1, 'h',
0], [1, '
c', 0], [0
, 'at', 1]
]
|
|
|
649 |
|
var i =
0;
|
|
|
650 |
|
for (var
x=0; x<di
ff.length;
x++) {
|
|
|
651 |
|
diff[x
].push(i);
|
|
|
652 |
|
if (di
ff[x][0] !
= -1)
|
|
|
653 |
|
i +=
diff[x][1
].length;
|
|
|
654 |
|
}
|
|
|
655 |
|
}
|
|
|
656 |
|
|
|
|
657 |
|
|
|
|
658 |
|
function d
iff_xindex
(diff, loc
) {
|
|
|
659 |
|
// loc i
s a locati
on in text
1, compute
and retur
n the equi
valent loc
ation in t
ext2.
|
|
|
660 |
|
// e.g.
"The cat"
vs "The bi
g cat", 1-
>1, 5->8
|
|
|
661 |
|
var char
s1 = 0;
|
|
|
662 |
|
var char
s2 = 0;
|
|
|
663 |
|
var last
_chars1 =
0;
|
|
|
664 |
|
var last
_chars2 =
0;
|
|
|
665 |
|
for (var
x=0; x<di
ff.length;
x++) {
|
|
|
666 |
|
if (di
ff[x][0] !
= 1) // Eq
uality or
deletion.
|
|
|
667 |
|
char
s1 += diff
[x][1].len
gth;
|
|
|
668 |
|
if (di
ff[x][0] !
= -1) // E
quality or
insertion
.
|
|
|
669 |
|
char
s2 += diff
[x][1].len
gth;
|
|
|
670 |
|
if (ch
ars1 > loc
) // Overs
hot the lo
cation.
|
|
|
671 |
|
brea
k;
|
|
|
672 |
|
last_c
hars1 = ch
ars1;
|
|
|
673 |
|
last_c
hars2 = ch
ars2;
|
|
|
674 |
|
}
|
|
|
675 |
|
if (diff
.length !=
x && diff
[x][0] ==
-1) // The
location
was delete
d.
|
|
|
676 |
|
return
last_char
s2;
|
|
|
677 |
|
// Add t
he remaini
ng charact
er length.
|
|
|
678 |
|
return l
ast_chars2
+ (loc -
last_chars
1);
|
|
|
679 |
|
}
|
|
|
680 |
|
|
|
|
681 |
|
|
|
|
682 |
|
function d
iff_pretty
html(diff)
{
|
|
|
683 |
|
// Conve
rt a diff
array into
a pretty
HTML repor
t.
|
|
|
684 |
|
diff_add
index(diff
);
|
|
|
685 |
|
var html
= '';
|
|
|
686 |
|
for (var
x=0; x<di
ff.length;
x++) {
|
|
|
687 |
|
var m
= diff[x][
0]; // Mod
e (-1=dele
te, 0=copy
, 1=add)
|
|
|
688 |
|
var t
= diff[x][
1]; // Tex
t of chang
e.
|
|
|
689 |
|
var i
= diff[x][
2]; // Ind
ex of chan
ge.
|
|
|
690 |
|
t = t.
replace(/&
/g, "&
").replace
(/</g, "&l
t;").repla
ce(/>/g, "
>");
|
|
|
691 |
|
t = t.
replace(/\
n/g, "&par
a;<BR>");
|
|
|
692 |
|
if (m
== -1)
|
|
|
693 |
|
html
+= "<DEL
STYLE='bac
kground:#F
FE6E6;' TI
TLE='i="+i
+"'>"+t+"<
/DEL>";
|
|
|
694 |
|
else i
f (m == 1)
|
|
|
695 |
|
html
+= "<INS
STYLE='bac
kground:#E
6FFE6;' TI
TLE='i="+i
+"'>"+t+"<
/INS>";
|
|
|
696 |
|
else
|
|
|
697 |
|
html
+= "<SPAN
TITLE='i=
"+i+"'>"+t
+"</SPAN>"
;
|
|
|
698 |
|
}
|
|
|
699 |
|
return h
tml;
|
|
|
700 |
|
}
|
|
|
701 |
|
|
|
|
702 |
|
|
|
|
703 |
|
////////
//////////
//////////
//////////
//////////
//////////
//////////
//
|
|
|
704 |
|
// Match
/
/
|
|
|
705 |
|
//////////
//////////
//////////
//////////
//////////
//////////
//////////
|
|
|
706 |
|
|
|
|
707 |
|
|
|
|
708 |
|
function m
atch_getma
xbits() {
|
|
|
709 |
|
// Compu
te the num
ber of bit
s in an in
t.
|
|
|
710 |
|
// The n
ormal answ
er for Jav
aScript is
32.
|
|
|
711 |
|
var maxb
its = 0;
|
|
|
712 |
|
var oldi
= 1;
|
|
|
713 |
|
var newi
= 2;
|
|
|
714 |
|
while (o
ldi != new
i) {
|
|
|
715 |
|
maxbit
s++;
|
|
|
716 |
|
oldi =
newi;
|
|
|
717 |
|
newi =
newi << 1
;
|
|
|
718 |
|
}
|
|
|
719 |
|
return m
axbits;
|
|
|
720 |
|
}
|
|
|
721 |
|
var MATCH_
MAXBITS =
match_getm
axbits();
|
|
|
722 |
|
|
|
|
723 |
|
|
|
|
724 |
|
function m
atch_main(
text, patt
ern, loc)
{
|
|
|
725 |
|
// Locat
e the best
instance
of 'patter
n' in 'tex
t' near 'l
oc'.
|
|
|
726 |
|
loc = Ma
th.max(0,
Math.min(l
oc, text.l
ength-patt
ern.length
));
|
|
|
727 |
|
if (text
== patter
n) {
|
|
|
728 |
|
// Sho
rtcut (pot
entially n
ot guarant
eed by the
algorithm
)
|
|
|
729 |
|
return
0;
|
|
|
730 |
|
} else i
f (text.le
ngth == 0)
{
|
|
|
731 |
|
// Not
hing to ma
tch.
|
|
|
732 |
|
return
null;
|
|
|
733 |
|
} else i
f (text.su
bstring(lo
c, loc + p
attern.len
gth) == pa
ttern) {
|
|
|
734 |
|
// Per
fect match
at the pe
rfect spot
! (Includ
es case of
null patt
ern)
|
|
|
735 |
|
return
loc;
|
|
|
736 |
|
} else {
|
|
|
737 |
|
// Do
a fuzzy co
mpare.
|
|
|
738 |
|
var ma
tch = matc
h_bitap(te
xt, patter
n, loc);
|
|
|
739 |
|
return
match;
|
|
|
740 |
|
}
|
|
|
741 |
|
}
|
|
|
742 |
|
|
|
|
743 |
|
|
|
|
744 |
|
function m
atch_bitap
(text, pat
tern, loc)
{
|
|
|
745 |
|
// Locat
e the best
instance
of 'patter
n' in 'tex
t' near 'l
oc' using
the Bitap
algorithm.
|
|
|
746 |
|
if (patt
ern.length
> MATCH_M
AXBITS)
|
|
|
747 |
|
return
alert("Pa
ttern too
long for t
his browse
r.");
|
|
|
748 |
|
|
|
|
749 |
|
// Initi
alise the
alphabet.
|
|
|
750 |
|
var s =
match_alph
abet(patte
rn);
|
|
|
751 |
|
|
|
|
752 |
|
var scor
e_text_len
gth = text
.length;
|
|
|
753 |
|
// Coerc
e the text
length be
tween reas
onable max
imums and
minimums.
|
|
|
754 |
|
score_te
xt_length
= Math.max
(score_tex
t_length,
MATCH_MINL
ENGTH);
|
|
|
755 |
|
score_te
xt_length
= Math.min
(score_tex
t_length,
MATCH_MAXL
ENGTH);
|
|
|
756 |
|
|
|
|
757 |
|
function
match_bit
ap_score (
e, x) {
|
|
|
758 |
|
// Com
pute and r
eturn the
score for
a match wi
th e error
s and x lo
cation.
|
|
|
759 |
|
var d
= Math.abs
(loc-x);
|
|
|
760 |
|
return
(e / patt
ern.length
/ MATCH_B
ALANCE) +
(d / score
_text_leng
th / (1.0
- MATCH_BA
LANCE));
|
|
|
761 |
|
}
|
|
|
762 |
|
|
|
|
763 |
|
// Highe
st score b
eyond whic
h we give
up.
|
|
|
764 |
|
var scor
e_threshol
d = MATCH_
THRESHOLD;
|
|
|
765 |
|
// Is th
ere a near
by exact m
atch? (spe
edup)
|
|
|
766 |
|
var best
_loc = tex
t.indexOf(
pattern, l
oc);
|
|
|
767 |
|
if (best
_loc != -1
)
|
|
|
768 |
|
score_
threshold
= Math.min
(match_bit
ap_score(0
, best_loc
), score_t
hreshold);
|
|
|
769 |
|
// What
about in t
he other d
irection?
(speedup)
|
|
|
770 |
|
best_loc
= text.la
stIndexOf(
pattern, l
oc+pattern
.length);
|
|
|
771 |
|
if (best
_loc != -1
)
|
|
|
772 |
|
score_
threshold
= Math.min
(match_bit
ap_score(0
, best_loc
), score_t
hreshold);
|
|
|
773 |
|
|
|
|
774 |
|
// Initi
alise the
bit arrays
.
|
|
|
775 |
|
var r =
Array();
|
|
|
776 |
|
var d =
-1;
|
|
|
777 |
|
var matc
hmask = Ma
th.pow(2,
pattern.le
ngth-1);
|
|
|
778 |
|
best_loc
= null;
|
|
|
779 |
|
|
|
|
780 |
|
var bin_
min, bin_m
id;
|
|
|
781 |
|
var bin_
max = Math
.max(loc+l
oc, text.l
ength);
|
|
|
782 |
|
var last
_rd;
|
|
|
783 |
|
for (var
d=0; d<pa
ttern.leng
th; d++) {
|
|
|
784 |
|
// Sca
n for the
best match
; each ite
ration all
ows for on
e more err
or.
|
|
|
785 |
|
var rd
= Array(t
ext.length
);
|
|
|
786 |
|
|
|
|
787 |
|
// Run
a binary
search to
determine
how far fr
om 'loc' w
e can stra
y at this
error leve
l.
|
|
|
788 |
|
bin_mi
n = loc;
|
|
|
789 |
|
bin_mi
d = bin_ma
x;
|
|
|
790 |
|
while(
bin_min <
bin_mid) {
|
|
|
791 |
|
if (
match_bita
p_score(d,
bin_mid)
< score_th
reshold)
|
|
|
792 |
|
bi
n_min = bi
n_mid;
|
|
|
793 |
|
else
|
|
|
794 |
|
bi
n_max = bi
n_mid;
|
|
|
795 |
|
bin_
mid = Math
.floor((bi
n_max - bi
n_min) / 2
+ bin_min
);
|
|
|
796 |
|
}
|
|
|
797 |
|
bin_ma
x = bin_mi
d; // Use
the result
from this
iteration
as the ma
ximum for
the next.
|
|
|
798 |
|
var st
art = Math
.max(0, lo
c - (bin_m
id - loc)
- 1);
|
|
|
799 |
|
var fi
nish = Mat
h.min(text
.length-1,
pattern.l
ength + bi
n_mid);
|
|
|
800 |
|
|
|
|
801 |
|
if (te
xt.charAt(
finish) ==
pattern.c
harAt(patt
ern.length
-1))
|
|
|
802 |
|
rd[f
inish] = M
ath.pow(2,
d+1)-1;
|
|
|
803 |
|
else
|
|
|
804 |
|
rd[f
inish] = M
ath.pow(2,
d)-1;
|
|
|
805 |
|
for (v
ar j=finis
h-1; j>=st
art; j--)
{
|
|
|
806 |
|
// T
he alphabe
t (s) is a
sparse ha
sh, so the
following
lines gen
erate warn
ings.
|
|
|
807 |
|
if (
d == 0) //
First pas
s: exact m
atch.
|
|
|
808 |
|
rd
[j] = ((rd
[j+1] << 1
) | 1) & s
[text.char
At(j)];
|
|
|
809 |
|
else
// Subseq
uent passe
s: fuzzy m
atch.
|
|
|
810 |
|
rd
[j] = ((rd
[j+1] << 1
) | 1) & s
[text.char
At(j)] | (
(last_rd[j
+1] << 1)
| 1) | ((l
ast_rd[j]
<< 1) | 1)
| last_rd
[j+1];
|
|
|
811 |
|
if (
rd[j] & ma
tchmask) {
|
|
|
812 |
|
va
r score =
match_bita
p_score(d,
j);
|
|
|
813 |
|
//
This matc
h will alm
ost certai
nly be bet
ter than a
ny existin
g match.
But check
anyway.
|
|
|
814 |
|
if
(score <=
score_thr
eshold) {
|
|
|
815 |
|
// Told yo
u so.
|
|
|
816 |
|
score_thre
shold = sc
ore;
|
|
|
817 |
|
best_loc =
j;
|
|
|
818 |
|
if (j > lo
c) {
|
|
|
819 |
|
// When
passing lo
c, don't e
xceed our
current di
stance fro
m loc.
|
|
|
820 |
|
start =
Math.max(0
, loc - (j
- loc));
|
|
|
821 |
|
} else {
|
|
|
822 |
|
// Alrea
dy passed
loc, downh
ill from h
ere on in.
|
|
|
823 |
|
break;
|
|
|
824 |
|
}
|
|
|
825 |
|
}
|
|
|
826 |
|
}
|
|
|
827 |
|
}
|
|
|
828 |
|
if (ma
tch_bitap_
score(d+1,
loc) > sc
ore_thresh
old) // No
hope for
a (better)
match at
greater er
ror levels
.
|
|
|
829 |
|
brea
k;
|
|
|
830 |
|
last_r
d = rd;
|
|
|
831 |
|
}
|
|
|
832 |
|
return b
est_loc;
|
|
|
833 |
|
}
|
|
|
834 |
|
|
|
|
835 |
|
|
|
|
836 |
|
function m
atch_alpha
bet(patter
n) {
|
|
|
837 |
|
// Initi
alise the
alphabet f
or the Bit
ap algorit
hm.
|
|
|
838 |
|
var s =
Object();
|
|
|
839 |
|
for (var
i=0; i<pa
ttern.leng
th; i++)
|
|
|
840 |
|
s[patt
ern.charAt
(i)] = 0;
|
|
|
841 |
|
for (var
i=0; i<pa
ttern.leng
th; i++)
|
|
|
842 |
|
s[patt
ern.charAt
(i)] |= Ma
th.pow(2,
pattern.le
ngth-i-1);
|
|
|
843 |
|
return s
;
|
|
|
844 |
|
}
|
|
|
845 |
|
|
|
|
846 |
|
|
|
|
847 |
|
////////
//////////
//////////
//////////
//////////
//////////
//////////
//
|
|
|
848 |
|
// Patch
/
/
|
|
|
849 |
|
//////////
//////////
//////////
//////////
//////////
//////////
//////////
|
|
|
850 |
|
|
|
|
851 |
|
|
|
|
852 |
|
function p
atch_obj()
{
|
|
|
853 |
|
// Const
ructor for
a patch o
bject.
|
|
|
854 |
|
this.dif
fs = [];
|
|
|
855 |
|
this.sta
rt1 = null
;
|
|
|
856 |
|
this.sta
rt2 = null
;
|
|
|
857 |
|
this.len
gth1 = 0;
|
|
|
858 |
|
this.len
gth2 = 0;
|
|
|
859 |
|
|
|
|
860 |
|
this.toS
tring = fu
nction() {
|
|
|
861 |
|
// Emm
ulate GNU
diff's for
mat.
|
|
|
862 |
|
// Hea
der: @@ -3
82,8 +481,
9 @@
|
|
|
863 |
|
// Ind
icies are
printed as
1-based,
not 0-base
d.
|
|
|
864 |
|
var co
ords1, coo
rds2;
|
|
|
865 |
|
if (th
is.length1
== 0)
|
|
|
866 |
|
coor
ds1 = this
.start1+",
0";
|
|
|
867 |
|
else i
f (this.le
ngth1 == 1
)
|
|
|
868 |
|
coor
ds1 = this
.start1+1;
|
|
|
869 |
|
else
|
|
|
870 |
|
coor
ds1 = (thi
s.start1+1
)+","+this
.length1;
|
|
|
871 |
|
if (th
is.length2
== 0)
|
|
|
872 |
|
coor
ds2 = this
.start2+",
0";
|
|
|
873 |
|
else i
f (this.le
ngth2 == 1
)
|
|
|
874 |
|
coor
ds2 = this
.start2+1;
|
|
|
875 |
|
else
|
|
|
876 |
|
coor
ds2 = (thi
s.start2+1
)+","+this
.length2;
|
|
|
877 |
|
var tx
t = "@@ -"
+coords1+"
+"+coords
2+" @@\n";
|
|
|
878 |
|
// Esc
ape the bo
dy of the
patch with
%xx notat
ion.
|
|
|
879 |
|
for (v
ar x=0; x<
this.diffs
.length; x
++)
|
|
|
880 |
|
txt
+= ("- +".
charAt(thi
s.diffs[x]
[0]+1)) +
encodeURI(
this.diffs
[x][1]) +
"\n";
|
|
|
881 |
|
return
txt.repla
ce(/%20/g,
' ');
|
|
|
882 |
|
}
|
|
|
883 |
|
|
|
|
884 |
|
this.tex
t1 = funct
ion() {
|
|
|
885 |
|
// Com
pute and r
eturn the
source tex
t (all equ
alities an
d deletion
s).
|
|
|
886 |
|
var tx
t = '';
|
|
|
887 |
|
for (v
ar x=0; x<
this.diffs
.length; x
++)
|
|
|
888 |
|
if (
this.diffs
[x][0] ==
0 || this.
diffs[x][0
] == -1)
|
|
|
889 |
|
tx
t += this.
diffs[x][1
];
|
|
|
890 |
|
return
txt;
|
|
|
891 |
|
}
|
|
|
892 |
|
|
|
|
893 |
|
this.tex
t2 = funct
ion() {
|
|
|
894 |
|
// Com
pute and r
eturn the
destinatio
n text (al
l equaliti
es and ins
ertions).
|
|
|
895 |
|
var tx
t = '';
|
|
|
896 |
|
for (v
ar x=0; x<
this.diffs
.length; x
++)
|
|
|
897 |
|
if (
this.diffs
[x][0] ==
0 || this.
diffs[x][0
] == 1)
|
|
|
898 |
|
tx
t += this.
diffs[x][1
];
|
|
|
899 |
|
return
txt;
|
|
|
900 |
|
}
|
|
|
901 |
|
}
|
|
|
902 |
|
|
|
|
903 |
|
|
|
|
904 |
|
function p
atch_addco
ntext(patc
h, text) {
|
|
|
905 |
|
var patt
ern = text
.substring
(patch.sta
rt2, patch
.start2+pa
tch.length
1);
|
|
|
906 |
|
var padd
ing = 0;
|
|
|
907 |
|
// Incre
ase the co
ntext unti
l we're un
ique (but
don't let
the patter
n expand b
eyond MATC
H_MAXBITS)
.
|
|
|
908 |
|
while (t
ext.indexO
f(pattern)
!= text.l
astIndexOf
(pattern)
&& pattern
.length <
MATCH_MAXB
ITS-PATCH_
MARGIN-PAT
CH_MARGIN)
{
|
|
|
909 |
|
paddin
g += PATCH
_MARGIN;
|
|
|
910 |
|
patter
n = text.s
ubstring(p
atch.start
2 - paddin
g, patch.s
tart2+patc
h.length1
+ padding)
;
|
|
|
911 |
|
}
|
|
|
912 |
|
// Add o
ne chunk f
or good lu
ck.
|
|
|
913 |
|
padding
+= PATCH_M
ARGIN;
|
|
|
914 |
|
// Add t
he prefix.
|
|
|
915 |
|
var pref
ix = text.
substring(
patch.star
t2 - paddi
ng, patch.
start2);
|
|
|
916 |
|
if (pref
ix != '')
|
|
|
917 |
|
patch.
diffs.unsh
ift([0, pr
efix]);
|
|
|
918 |
|
// Add t
he suffix
|
|
|
919 |
|
var suff
ix = text.
substring(
patch.star
t2+patch.l
ength1, pa
tch.start2
+patch.len
gth1 + pad
ding);
|
|
|
920 |
|
if (suff
ix != '')
|
|
|
921 |
|
patch.
diffs.push
([0, suffi
x]);
|
|
|
922 |
|
|
|
|
923 |
|
// Roll
back the s
tart point
s.
|
|
|
924 |
|
patch.st
art1 -= pr
efix.lengt
h;
|
|
|
925 |
|
patch.st
art2 -= pr
efix.lengt
h;
|
|
|
926 |
|
// Exten
d the leng
ths.
|
|
|
927 |
|
patch.le
ngth1 += p
refix.leng
th + suffi
x.length;
|
|
|
928 |
|
patch.le
ngth2 += p
refix.leng
th + suffi
x.length;
|
|
|
929 |
|
}
|
|
|
930 |
|
|
|
|
931 |
|
|
|
|
932 |
|
function p
atch_make(
text1, tex
t2, diff)
{
|
|
|
933 |
|
// Compu
te a list
of patches
to turn t
ext1 into
text2.
|
|
|
934 |
|
// Use d
iff if pro
vided, oth
erwise com
pute it ou
rselves.
|
|
|
935 |
|
if (type
of diff ==
'undefine
d') {
|
|
|
936 |
|
diff =
diff_main
(text1, te
xt2, true)
;
|
|
|
937 |
|
if (di
ff.length
> 2) {
|
|
|
938 |
|
diff
_cleanup_s
emantic(di
ff);
|
|
|
939 |
|
diff
_cleanup_e
fficiency(
diff);
|
|
|
940 |
|
}
|
|
|
941 |
|
}
|
|
|
942 |
|
if (diff
.length ==
0)
|
|
|
943 |
|
return
[]; // Ge
t rid of t
he null ca
se.
|
|
|
944 |
|
var patc
hes = [];
|
|
|
945 |
|
var patc
h = new pa
tch_obj();
|
|
|
946 |
|
var char
_count1 =
0; // Numb
er of char
acters int
o the text
1 string.
|
|
|
947 |
|
var char
_count2 =
0; // Numb
er of char
acters int
o the text
2 string.
|
|
|
948 |
|
var last
_type = nu
ll;
|
|
|
949 |
|
var prep
atch_text
= text1; /
/ Recreate
the patch
es to dete
rmine cont
ext info.
|
|
|
950 |
|
var post
patch_text
= text1;
|
|
|
951 |
|
for (var
x=0; x<di
ff.length;
x++) {
|
|
|
952 |
|
var di
ff_type =
diff[x][0]
;
|
|
|
953 |
|
var di
ff_text =
diff[x][1]
;
|
|
|
954 |
|
|
|
|
955 |
|
if (pa
tch.diffs.
length ==
0 && diff_
type != 0)
{
|
|
|
956 |
|
// A
new patch
starts he
re.
|
|
|
957 |
|
patc
h.start1 =
char_coun
t1;
|
|
|
958 |
|
patc
h.start2 =
char_coun
t2;
|
|
|
959 |
|
}
|
|
|
960 |
|
|
|
|
961 |
|
if (di
ff_type ==
1) {
|
|
|
962 |
|
// I
nsertion
|
|
|
963 |
|
patc
h.diffs.pu
sh(diff[x]
);
|
|
|
964 |
|
patc
h.length2
+= diff_te
xt.length;
|
|
|
965 |
|
post
patch_text
= postpat
ch_text.su
bstring(0,
char_coun
t2) + diff
_text + po
stpatch_te
xt.substri
ng(char_co
unt2);
|
|
|
966 |
|
} else
if (diff_
type == -1
) {
|
|
|
967 |
|
// D
eletion.
|
|
|
968 |
|
patc
h.length1
+= diff_te
xt.length;
|
|
|
969 |
|
patc
h.diffs.pu
sh(diff[x]
);
|
|
|
970 |
|
post
patch_text
= postpat
ch_text.su
bstring(0,
char_coun
t2) + post
patch_text
.substring
(char_coun
t2 + diff_
text.lengt
h);
|
|
|
971 |
|
} else
if (diff_
type == 0
&& diff_te
xt.length
<= 2*PATCH
_MARGIN &&
patch.dif
fs.length
!= 0 && di
ff.length
!= x+1) {
|
|
|
972 |
|
// S
mall equal
ity inside
a patch.
|
|
|
973 |
|
patc
h.diffs.pu
sh(diff[x]
);
|
|
|
974 |
|
patc
h.length1
+= diff_te
xt.length;
|
|
|
975 |
|
patc
h.length2
+= diff_te
xt.length;
|
|
|
976 |
|
}
|
|
|
977 |
|
|
|
|
978 |
|
last_t
ype = diff
_type;
|
|
|
979 |
|
if (di
ff_type ==
0 && diff
_text.leng
th >= 2*PA
TCH_MARGIN
) {
|
|
|
980 |
|
// T
ime for a
new patch.
|
|
|
981 |
|
if (
patch.diff
s.length !
= 0) {
|
|
|
982 |
|
pa
tch_addcon
text(patch
, prepatch
_text);
|
|
|
983 |
|
pa
tches.push
(patch);
|
|
|
984 |
|
va
r patch =
new patch_
obj();
|
|
|
985 |
|
la
st_type =
null;
|
|
|
986 |
|
pr
epatch_tex
t = postpa
tch_text;
|
|
|
987 |
|
}
|
|
|
988 |
|
}
|
|
|
989 |
|
|
|
|
990 |
|
// Upd
ate the cu
rrent char
acter coun
t.
|
|
|
991 |
|
if (di
ff_type !=
1)
|
|
|
992 |
|
char
_count1 +=
diff_text
.length;
|
|
|
993 |
|
if (di
ff_type !=
-1)
|
|
|
994 |
|
char
_count2 +=
diff_text
.length;
|
|
|
995 |
|
}
|
|
|
996 |
|
// Pick
up the lef
tover patc
h if not e
mpty.
|
|
|
997 |
|
if (patc
h.diffs.le
ngth != 0)
{
|
|
|
998 |
|
patch_
addcontext
(patch, pr
epatch_tex
t);
|
|
|
999 |
|
patche
s.push(pat
ch);
|
|
|
1000 |
|
}
|
|
|
1001 |
|
|
|
|
1002 |
|
return p
atches;
|
|
|
1003 |
|
}
|
|
|
1004 |
|
|
|
|
1005 |
|
|
|
|
1006 |
|
function p
atch_apply
(patches,
text) {
|
|
|
1007 |
|
// Merge
a set of
patches on
to the tex
t.
|
|
|
1008 |
|
// Retur
n a patche
d text, as
well as a
list of t
rue/false
values ind
icating wh
ich patche
s were app
lied.
|
|
|
1009 |
|
patch_sp
litmax(pat
ches);
|
|
|
1010 |
|
var resu
lts = [];
|
|
|
1011 |
|
var delt
a = 0;
|
|
|
1012 |
|
var expe
cted_loc,
start_loc;
|
|
|
1013 |
|
var text
1, text2;
|
|
|
1014 |
|
var diff
, mod, ind
ex1, index
2;
|
|
|
1015 |
|
for (var
x=0; x<pa
tches.leng
th; x++) {
|
|
|
1016 |
|
expect
ed_loc = p
atches[x].
start2 + d
elta;
|
|
|
1017 |
|
text1
= patches[
x].text1()
;
|
|
|
1018 |
|
start_
loc = matc
h_main(tex
t, text1,
expected_l
oc);
|
|
|
1019 |
|
if (st
art_loc ==
null) {
|
|
|
1020 |
|
// N
o match fo
und. :(
|
|
|
1021 |
|
resu
lts.push(f
alse);
|
|
|
1022 |
|
} else
{
|
|
|
1023 |
|
// F
ound a mat
ch. :)
|
|
|
1024 |
|
resu
lts.push(t
rue);
|
|
|
1025 |
|
delt
a = start_
loc - expe
cted_loc;
|
|
|
1026 |
|
text
2 = text.s
ubstring(s
tart_loc,
start_loc
+ text1.le
ngth);
|
|
|
1027 |
|
if (
text1 == t
ext2) {
|
|
|
1028 |
|
//
Perfect m
atch, just
shove the
replaceme
nt text in
.
|
|
|
1029 |
|
te
xt = text.
substring(
0, start_l
oc) + patc
hes[x].tex
t2() + tex
t.substrin
g(start_lo
c + text1.
length);
|
|
|
1030 |
|
} el
se {
|
|
|
1031 |
|
//
Imperfect
match. R
un a diff
to get a f
ramework o
f equivale
nt indicie
s.
|
|
|
1032 |
|
di
ff = diff_
main(text1
, text2, f
alse);
|
|
|
1033 |
|
in
dex1 = 0;
|
|
|
1034 |
|
fo
r (var y=0
; y<patche
s[x].diffs
.length; y
++) {
|
|
|
1035 |
|
mod = patc
hes[x].dif
fs[y];
|
|
|
1036 |
|
if (mod[0]
!= 0)
|
|
|
1037 |
|
index2 =
diff_xind
ex(diff, i
ndex1);
|
|
|
1038 |
|
if (mod[0]
== 1) //
Insertion
|
|
|
1039 |
|
text = t
ext.substr
ing(0, sta
rt_loc + i
ndex2) + m
od[1] + te
xt.substri
ng(start_l
oc + index
2);
|
|
|
1040 |
|
else if (m
od[0] == -
1) // Dele
tion
|
|
|
1041 |
|
text = t
ext.substr
ing(0, sta
rt_loc + i
ndex2) + t
ext.substr
ing(start_
loc + diff
_xindex(di
ff, index1
+ mod[1].
length));
|
|
|
1042 |
|
if (mod[0]
!= -1)
|
|
|
1043 |
|
index1 +
= mod[1].l
ength;
|
|
|
1044 |
|
}
|
|
|
1045 |
|
}
|
|
|
1046 |
|
}
|
|
|
1047 |
|
}
|
|
|
1048 |
|
return [
text, resu
lts];
|
|
|
1049 |
|
}
|
|
|
1050 |
|
|
|
|
1051 |
|
|
|
|
1052 |
|
function p
atch_split
max(patche
s) {
|
|
|
1053 |
|
// Look
through th
e patches
and break
up any whi
ch are lon
ger than t
he maximum
limit of
the match
algorithm.
|
|
|
1054 |
|
var bigp
atch, patc
h, patch_s
ize, start
1, start2,
diff_type
, diff_tex
t, precont
ext, postc
ontext, em
pty;
|
|
|
1055 |
|
for (var
x=0; x<pa
tches.leng
th; x++) {
|
|
|
1056 |
|
if (pa
tches[x].l
ength1 > M
ATCH_MAXBI
TS) {
|
|
|
1057 |
|
bigp
atch = pat
ches[x];
|
|
|
1058 |
|
// R
emove the
big old pa
tch.
|
|
|
1059 |
|
patc
hes.splice
(x, 1);
|
|
|
1060 |
|
patc
h_size = M
ATCH_MAXBI
TS;
|
|
|
1061 |
|
star
t1 = bigpa
tch.start1
;
|
|
|
1062 |
|
star
t2 = bigpa
tch.start2
;
|
|
|
1063 |
|
prec
ontext = '
';
|
|
|
1064 |
|
whil
e (bigpatc
h.diffs.le
ngth != 0)
{
|
|
|
1065 |
|
//
Create on
e of sever
al smaller
patches.
|
|
|
1066 |
|
pa
tch = new
patch_obj(
);
|
|
|
1067 |
|
em
pty = true
;
|
|
|
1068 |
|
pa
tch.start1
= start1
- preconte
xt.length;
|
|
|
1069 |
|
pa
tch.start2
= start2
- preconte
xt.length;
|
|
|
1070 |
|
if
(preconte
xt != '')
{
|
|
|
1071 |
|
patch.leng
th1 = patc
h.length2
= preconte
xt.length;
|
|
|
1072 |
|
patch.diff
s.push([0,
precontex
t]);
|
|
|
1073 |
|
}
|
|
|
1074 |
|
wh
ile (bigpa
tch.diffs.
length !=
0 && patch
.length1 <
patch_siz
e - PATCH_
MARGIN) {
|
|
|
1075 |
|
diff_type
= bigpatch
.diffs[0][
0];
|
|
|
1076 |
|
diff_text
= bigpatch
.diffs[0][
1];
|
|
|
1077 |
|
if (diff_t
ype == 1)
{
|
|
|
1078 |
|
// Inser
tions are
harmless.
|
|
|
1079 |
|
patch.le
ngth2 += d
iff_text.l
ength;
|
|
|
1080 |
|
start2 +
= diff_tex
t.length;
|
|
|
1081 |
|
patch.di
ffs.push(b
igpatch.di
ffs.shift(
));
|
|
|
1082 |
|
empty =
false;
|
|
|
1083 |
|
} else {
|
|
|
1084 |
|
// Delet
ion or equ
ality. On
ly take as
much as w
e can stom
ach.
|
|
|
1085 |
|
diff_tex
t = diff_t
ext.substr
ing(0, pat
ch_size -
patch.leng
th1 - PATC
H_MARGIN);
|
|
|
1086 |
|
patch.le
ngth1 += d
iff_text.l
ength;
|
|
|
1087 |
|
start1 +
= diff_tex
t.length;
|
|
|
1088 |
|
if (diff
_type == 0
) {
|
|
|
1089 |
|
patch.
length2 +=
diff_text
.length;
|
|
|
1090 |
|
start2
+= diff_t
ext.length
;
|
|
|
1091 |
|
} else {
|
|
|
1092 |
|
empty
= false;
|
|
|
1093 |
|
}
|
|
|
1094 |
|
patch.di
ffs.push([
diff_type,
diff_text
]);
|
|
|
1095 |
|
if (diff
_text == b
igpatch.di
ffs[0][1])
|
|
|
1096 |
|
bigpat
ch.diffs.s
hift();
|
|
|
1097 |
|
else
|
|
|
1098 |
|
bigpat
ch.diffs[0
][1] = big
patch.diff
s[0][1].su
bstring(di
ff_text.le
ngth);
|
|
|
1099 |
|
}
|
|
|
1100 |
|
}
|
|
|
1101 |
|
//
Compute t
he head co
ntext for
the next p
atch.
|
|
|
1102 |
|
pr
econtext =
patch.tex
t2();
|
|
|
1103 |
|
pr
econtext =
precontex
t.substrin
g(preconte
xt.length
- PATCH_MA
RGIN);
|
|
|
1104 |
|
//
Append th
e end cont
ext for th
is patch.
|
|
|
1105 |
|
po
stcontext
= bigpatch
.text1().s
ubstring(0
, PATCH_MA
RGIN);
|
|
|
1106 |
|
if
(postcont
ext != ''
) {
|
|
|
1107 |
|
patch.leng
th1 += pos
tcontext.l
ength;
|
|
|
1108 |
|
patch.leng
th2 += pos
tcontext.l
ength;
|
|
|
1109 |
|
if (patch.
diffs.leng
th > 0 &&
patch.diff
s[patch.di
ffs.length
-1][0] ==
0)
|
|
|
1110 |
|
patch.di
ffs[patch.
diffs.leng
th-1][1] +
= postcont
ext;
|
|
|
1111 |
|
else
|
|
|
1112 |
|
patch.di
ffs.push([
0, postcon
text]);
|
|
|
1113 |
|
}
|
|
|
1114 |
|
if
(!empty)
|
|
|
1115 |
|
patches.sp
lice(x++,
0, patch);
|
|
|
1116 |
|
}
|
|
|
1117 |
|
}
|
|
|
1118 |
|
}
|
|
|
1119 |
|
}
|
|
|
1120 |
|
|
|
|
1121 |
|
|
|
|
1122 |
|
function p
atch_totex
t(patches)
{
|
|
|
1123 |
|
// Take
a list of
patches an
d return a
textual r
epresentat
ion.
|
|
|
1124 |
|
var text
= '';
|
|
|
1125 |
|
for (var
x=0; x<pa
tches.leng
th; x++)
|
|
|
1126 |
|
text +
= patches[
x];
|
|
|
1127 |
|
return t
ext;
|
|
|
1128 |
|
}
|
|
|
1129 |
|
|
|
|
1130 |
|
|
|
|
1131 |
|
function p
atch_fromt
ext(text)
{
|
|
|
1132 |
|
// Take
a textual
representa
tion of pa
tches and
return a l
ist of pat
ch objects
.
|
|
|
1133 |
|
var patc
hes = [];
|
|
|
1134 |
|
text = t
ext.split(
'\n');
|
|
|
1135 |
|
var patc
h, m, char
s1, chars2
, sign, li
ne;
|
|
|
1136 |
|
while (t
ext.length
!= 0) {
|
|
|
1137 |
|
m = te
xt[0].matc
h(/^@@ -(\
d+),?(\d*)
\+(\d+),?
(\d*) @@$/
);
|
|
|
1138 |
|
if (!m
)
|
|
|
1139 |
|
retu
rn alert("
Invalid pa
tch string
:\n"+text[
0]);
|
|
|
1140 |
|
patch
= new patc
h_obj();
|
|
|
1141 |
|
patche
s.push(pat
ch);
|
|
|
1142 |
|
patch.
start1 = p
arseInt(m[
1]);
|
|
|
1143 |
|
if (m[
2] == '')
{
|
|
|
1144 |
|
patc
h.start1--
;
|
|
|
1145 |
|
patc
h.length1
= 1;
|
|
|
1146 |
|
} else
if (m[2]
== '0') {
|
|
|
1147 |
|
patc
h.length1
= 0;
|
|
|
1148 |
|
} else
{
|
|
|
1149 |
|
patc
h.start1--
;
|
|
|
1150 |
|
patc
h.length1
= parseInt
(m[2]);
|
|
|
1151 |
|
}
|
|
|
1152 |
|
|
|
|
1153 |
|
patch.
start2 = p
arseInt(m[
3]);
|
|
|
1154 |
|
if (m[
4] == '')
{
|
|
|
1155 |
|
patc
h.start2--
;
|
|
|
1156 |
|
patc
h.length2
= 1;
|
|
|
1157 |
|
} else
if (m[4]
== '0') {
|
|
|
1158 |
|
patc
h.length2
= 0;
|
|
|
1159 |
|
} else
{
|
|
|
1160 |
|
patc
h.start2--
;
|
|
|
1161 |
|
patc
h.length2
= parseInt
(m[4]);
|
|
|
1162 |
|
}
|
|
|
1163 |
|
text.s
hift();
|
|
|
1164 |
|
|
|
|
1165 |
|
while
(text.leng
th != 0) {
|
|
|
1166 |
|
sign
= text[0]
.charAt(0)
;
|
|
|
1167 |
|
line
= decodeU
RIComponen
t(text[0].
substring(
1));
|
|
|
1168 |
|
if (
sign == '-
') {
|
|
|
1169 |
|
//
Deletion.
|
|
|
1170 |
|
pa
tch.diffs.
push([-1,
line]);
|
|
|
1171 |
|
} el
se if (sig
n == '+')
{
|
|
|
1172 |
|
//
Insertion
.
|
|
|
1173 |
|
pa
tch.diffs.
push([1, l
ine]);
|
|
|
1174 |
|
} el
se if (sig
n == ' ')
{
|
|
|
1175 |
|
//
Minor equ
ality.
|
|
|
1176 |
|
pa
tch.diffs.
push([0, l
ine]);
|
|
|
1177 |
|
} el
se if (sig
n == '@')
{
|
|
|
1178 |
|
//
Start of
next patch
.
|
|
|
1179 |
|
br
eak;
|
|
|
1180 |
|
} el
se if (sig
n == '') {
|
|
|
1181 |
|
//
Blank lin
e? Whatev
er.
|
|
|
1182 |
|
} el
se {
|
|
|
1183 |
|
//
WTF?
|
|
|
1184 |
|
re
turn alert
("Invalid
patch mode
: '"+sign+
"'\n"+line
);
|
|
|
1185 |
|
}
|
|
|
1186 |
|
text
.shift();
|
|
|
1187 |
|
}
|
|
|
1188 |
|
}
|
|
|
1189 |
|
return p
atches;
|
|
|
1190 |
|
}
|
|
|
1191 |
|
|
|
|
1192 |
|
// EOF
|
|
|