No regular expressions were active.
1 |
|
<?php
|
|
1 |
|
<?php
|
2 |
|
/* vim: se
t expandta
b tabstop=
4 shiftwid
th=4 softt
abstop=4:
*/
|
|
2 |
|
/* vim: se
t expandta
b tabstop=
4 shiftwid
th=4 softt
abstop=4:
*/
|
3 |
|
|
|
3 |
|
|
4 |
|
/**
|
|
4 |
|
/**
|
5 |
|
* SafeHTM
L Parser
|
|
5 |
|
* SafeHTM
L Parser
|
6 |
|
*
|
|
6 |
|
*
|
7 |
|
* PHP ver
sions 4 an
d 5
|
|
7 |
|
* PHP ver
sions 4 an
d 5
|
8 |
|
*
|
|
8 |
|
*
|
9 |
|
* @catego
ry HTML
|
|
9 |
|
* @catego
ry HTML
|
10 |
|
* @packag
e SafeH
TML
|
|
10 |
|
* @packag
e SafeH
TML
|
11 |
|
* @author
Roman
Ivanov <t
hingol@mai
l.ru>
|
|
11 |
|
* @author
Roman
Ivanov <t
hingol@mai
l.ru>
|
12 |
|
* @copyri
ght 2004-
2005 Roman
Ivanov
|
|
12 |
|
* @copyri
ght 2004-
2005 Roman
Ivanov
|
13 |
|
* @licens
e http:
//www.debi
an.org/mis
c/bsd.lice
nse BSD L
icense (3
Clause)
|
|
13 |
|
* @licens
e http:
//www.debi
an.org/mis
c/bsd.lice
nse BSD L
icense (3
Clause)
|
14 |
|
* @versio
n 1.3.7
|
|
14 |
|
* @versio
n 1.3.7
|
15 |
|
* @link
http:
//pixel-ap
es.com/saf
ehtml/
|
|
15 |
|
* @link
http:
//pixel-ap
es.com/saf
ehtml/
|
16 |
|
*/
|
|
16 |
|
*/
|
17 |
|
|
|
17 |
|
|
18 |
|
|
|
18 |
|
|
19 |
|
/**
|
|
19 |
|
/**
|
20 |
|
* This pa
ckage requ
ires HTMLS
ax3 packag
e
|
|
20 |
|
* This pa
ckage requ
ires HTMLS
ax3 packag
e
|
21 |
|
*/
|
|
21 |
|
*/
|
22 |
|
require_on
ce(
"{$dir['pl
ugins
'
]}
safehtml/H
TMLSax3.ph
p
"
);
|
|
22 |
|
require_on
ce(
BX_DIRECTO
RY_PATH_PL
UGINS
.
'
safehtml/H
TMLSax3.ph
p
'
);
|
23 |
|
|
|
23 |
|
|
24 |
|
|
|
24 |
|
|
25 |
|
/**
|
|
25 |
|
/**
|
26 |
|
*
|
|
26 |
|
*
|
27 |
|
* SafeHTM
L Parser
|
|
27 |
|
* SafeHTM
L Parser
|
28 |
|
*
|
|
28 |
|
*
|
29 |
|
* This pa
rser strip
s down all
potential
ly dangero
us content
within HT
ML:
|
|
29 |
|
* This pa
rser strip
s down all
potential
ly dangero
us content
within HT
ML:
|
30 |
|
* <ul>
|
|
30 |
|
* <ul>
|
31 |
|
* <li>ope
ning tag w
ithout its
closing t
ag</li>
|
|
31 |
|
* <li>ope
ning tag w
ithout its
closing t
ag</li>
|
32 |
|
* <li>clo
sing tag w
ithout its
opening t
ag</li>
|
|
32 |
|
* <li>clo
sing tag w
ithout its
opening t
ag</li>
|
33 |
|
* <li>any
of these
tags: "bas
e", "basef
ont", "hea
d", "html"
, "body",
"applet",
|
|
33 |
|
* <li>any
of these
tags: "bas
e", "basef
ont", "hea
d", "html"
, "body",
"applet",
|
34 |
|
* "object
", "iframe
", "frame"
, "framese
t", "scrip
t", "layer
", "ilayer
", "embed"
,
|
|
34 |
|
* "object
", "iframe
", "frame"
, "framese
t", "scrip
t", "layer
", "ilayer
", "embed"
,
|
35 |
|
* "bgsoun
d", "link"
, "meta",
"style", "
title", "b
link", "xm
l" etc.</l
i>
|
|
35 |
|
* "bgsoun
d", "link"
, "meta",
"style", "
title", "b
link", "xm
l" etc.</l
i>
|
36 |
|
* <li>any
of these
attributes
: on*, dat
a*, dynsrc
</li>
|
|
36 |
|
* <li>any
of these
attributes
: on*, dat
a*, dynsrc
</li>
|
37 |
|
* <li>jav
ascript:/v
bscript:/a
bout: etc.
protocols
</li>
|
|
37 |
|
* <li>jav
ascript:/v
bscript:/a
bout: etc.
protocols
</li>
|
38 |
|
* <li>exp
ression/be
havior etc
. in style
s</li>
|
|
38 |
|
* <li>exp
ression/be
havior etc
. in style
s</li>
|
39 |
|
* <li>any
other act
ive conten
t</li>
|
|
39 |
|
* <li>any
other act
ive conten
t</li>
|
40 |
|
* </ul>
|
|
40 |
|
* </ul>
|
41 |
|
* It also
tries to
convert co
de to XHTM
L valid, b
ut htmltid
y is far b
etter
|
|
41 |
|
* It also
tries to
convert co
de to XHTM
L valid, b
ut htmltid
y is far b
etter
|
42 |
|
* solutio
n for this
task.
|
|
42 |
|
* solutio
n for this
task.
|
43 |
|
*
|
|
43 |
|
*
|
44 |
|
* <b>Exam
ple:</b>
|
|
44 |
|
* <b>Exam
ple:</b>
|
45 |
|
* <pre>
|
|
45 |
|
* <pre>
|
46 |
|
* $parser
=& new Sa
feHTML();
|
|
46 |
|
* $parser
=& new Sa
feHTML();
|
47 |
|
* $result
= $parser
->parse($d
oc);
|
|
47 |
|
* $result
= $parser
->parse($d
oc);
|
48 |
|
* </pre>
|
|
48 |
|
* </pre>
|
49 |
|
*
|
|
49 |
|
*
|
50 |
|
* @catego
ry HTML
|
|
50 |
|
* @catego
ry HTML
|
51 |
|
* @packag
e SafeH
TML
|
|
51 |
|
* @packag
e SafeH
TML
|
52 |
|
* @author
Roman
Ivanov <t
hingol@mai
l.ru>
|
|
52 |
|
* @author
Roman
Ivanov <t
hingol@mai
l.ru>
|
53 |
|
* @copyri
ght 1997-
2005 Roman
Ivanov
|
|
53 |
|
* @copyri
ght 1997-
2005 Roman
Ivanov
|
54 |
|
* @licens
e http:
//www.debi
an.org/mis
c/bsd.lice
nse BSD L
icense (3
Clause)
|
|
54 |
|
* @licens
e http:
//www.debi
an.org/mis
c/bsd.lice
nse BSD L
icense (3
Clause)
|
55 |
|
* @versio
n Relea
se: @packa
ge_version
@
|
|
55 |
|
* @versio
n Relea
se: @packa
ge_version
@
|
56 |
|
* @link
http:
//pear.php
.net/packa
ge/SafeHTM
L
|
|
56 |
|
* @link
http:
//pear.php
.net/packa
ge/SafeHTM
L
|
57 |
|
*/
|
|
57 |
|
*/
|
58 |
|
class Safe
HTML
|
|
58 |
|
class Safe
HTML
|
59 |
|
{
|
|
59 |
|
{
|
60 |
|
/**
|
|
60 |
|
/**
|
61 |
|
* Sto
rage for r
esulting H
TML output
|
|
61 |
|
* Sto
rage for r
esulting H
TML output
|
62 |
|
*
|
|
62 |
|
*
|
63 |
|
* @va
r string
|
|
63 |
|
* @va
r string
|
64 |
|
* @ac
cess priva
te
|
|
64 |
|
* @ac
cess priva
te
|
65 |
|
*/
|
|
65 |
|
*/
|
66 |
|
var $_
xhtml = ''
;
|
|
66 |
|
var $_
xhtml = ''
;
|
67 |
|
|
|
67 |
|
|
68 |
|
/**
|
|
68 |
|
/**
|
69 |
|
* Arr
ay of coun
ters for e
ach tag
|
|
69 |
|
* Arr
ay of coun
ters for e
ach tag
|
70 |
|
*
|
|
70 |
|
*
|
71 |
|
* @va
r array
|
|
71 |
|
* @va
r array
|
72 |
|
* @ac
cess priva
te
|
|
72 |
|
* @ac
cess priva
te
|
73 |
|
*/
|
|
73 |
|
*/
|
74 |
|
var $_
counter =
array();
|
|
74 |
|
var $_
counter =
array();
|
75 |
|
|
|
75 |
|
|
76 |
|
/**
|
|
76 |
|
/**
|
77 |
|
* Sta
ck of uncl
osed tags
|
|
77 |
|
* Sta
ck of uncl
osed tags
|
78 |
|
*
|
|
78 |
|
*
|
79 |
|
* @va
r array
|
|
79 |
|
* @va
r array
|
80 |
|
* @ac
cess priva
te
|
|
80 |
|
* @ac
cess priva
te
|
81 |
|
*/
|
|
81 |
|
*/
|
82 |
|
var $_
stack = ar
ray();
|
|
82 |
|
var $_
stack = ar
ray();
|
83 |
|
|
|
83 |
|
|
84 |
|
/**
|
|
84 |
|
/**
|
85 |
|
* Arr
ay of coun
ters for t
ags that m
ust be del
eted with
all conten
t
|
|
85 |
|
* Arr
ay of coun
ters for t
ags that m
ust be del
eted with
all conten
t
|
86 |
|
*
|
|
86 |
|
*
|
87 |
|
* @va
r array
|
|
87 |
|
* @va
r array
|
88 |
|
* @ac
cess priva
te
|
|
88 |
|
* @ac
cess priva
te
|
89 |
|
*/
|
|
89 |
|
*/
|
90 |
|
var $_
dcCounter
= array();
|
|
90 |
|
var $_
dcCounter
= array();
|
91 |
|
|
|
91 |
|
|
92 |
|
/**
|
|
92 |
|
/**
|
93 |
|
* Sta
ck of uncl
osed tags
that must
be deleted
with all
content
|
|
93 |
|
* Sta
ck of uncl
osed tags
that must
be deleted
with all
content
|
94 |
|
*
|
|
94 |
|
*
|
95 |
|
* @va
r array
|
|
95 |
|
* @va
r array
|
96 |
|
* @ac
cess priva
te
|
|
96 |
|
* @ac
cess priva
te
|
97 |
|
*/
|
|
97 |
|
*/
|
98 |
|
var $_
dcStack =
array();
|
|
98 |
|
var $_
dcStack =
array();
|
99 |
|
|
|
99 |
|
|
100 |
|
/**
|
|
100 |
|
/**
|
101 |
|
* Sto
res level
of list (o
l/ul) nest
ing
|
|
101 |
|
* Sto
res level
of list (o
l/ul) nest
ing
|
102 |
|
*
|
|
102 |
|
*
|
103 |
|
* @va
r int
|
|
103 |
|
* @va
r int
|
104 |
|
* @ac
cess priva
te
|
|
104 |
|
* @ac
cess priva
te
|
105 |
|
*/
|
|
105 |
|
*/
|
106 |
|
var $_
listScope
= 0;
|
|
106 |
|
var $_
listScope
= 0;
|
107 |
|
|
|
107 |
|
|
108 |
|
/**
|
|
108 |
|
/**
|
109 |
|
* Sta
ck of uncl
osed list
tags
|
|
109 |
|
* Sta
ck of uncl
osed list
tags
|
110 |
|
*
|
|
110 |
|
*
|
111 |
|
* @va
r array
|
|
111 |
|
* @va
r array
|
112 |
|
* @ac
cess priva
te
|
|
112 |
|
* @ac
cess priva
te
|
113 |
|
*/
|
|
113 |
|
*/
|
114 |
|
var $_
liStack =
array();
|
|
114 |
|
var $_
liStack =
array();
|
115 |
|
|
|
115 |
|
|
116 |
|
/**
|
|
116 |
|
/**
|
117 |
|
* Arr
ay of prep
ared regul
ar express
ions for p
rotocols (
schemas) m
atching
|
|
117 |
|
* Arr
ay of prep
ared regul
ar express
ions for p
rotocols (
schemas) m
atching
|
118 |
|
*
|
|
118 |
|
*
|
119 |
|
* @va
r array
|
|
119 |
|
* @va
r array
|
120 |
|
* @ac
cess priva
te
|
|
120 |
|
* @ac
cess priva
te
|
121 |
|
*/
|
|
121 |
|
*/
|
122 |
|
var $_
protoRegex
ps = array
();
|
|
122 |
|
var $_
protoRegex
ps = array
();
|
123 |
|
|
|
123 |
|
|
124 |
|
/**
|
|
124 |
|
/**
|
125 |
|
* Arr
ay of prep
ared regul
ar express
ions for C
SS matchin
g
|
|
125 |
|
* Arr
ay of prep
ared regul
ar express
ions for C
SS matchin
g
|
126 |
|
*
|
|
126 |
|
*
|
127 |
|
* @va
r array
|
|
127 |
|
* @va
r array
|
128 |
|
* @ac
cess priva
te
|
|
128 |
|
* @ac
cess priva
te
|
129 |
|
*/
|
|
129 |
|
*/
|
130 |
|
var $_
cssRegexps
= array()
;
|
|
130 |
|
var $_
cssRegexps
= array()
;
|
131 |
|
|
|
131 |
|
|
132 |
|
/**
|
|
132 |
|
/**
|
133 |
|
* Lis
t of singl
e tags ("<
tag />")
|
|
133 |
|
* Lis
t of singl
e tags ("<
tag />")
|
134 |
|
*
|
|
134 |
|
*
|
135 |
|
* @va
r array
|
|
135 |
|
* @va
r array
|
136 |
|
* @ac
cess publi
c
|
|
136 |
|
* @ac
cess publi
c
|
137 |
|
*/
|
|
137 |
|
*/
|
138 |
|
var $s
ingleTags
= array('a
rea', 'br'
, 'img', '
input', 'h
r', 'wbr',
);
|
|
138 |
|
var $s
ingleTags
= array('a
rea', 'br'
, 'img', '
input', 'h
r', 'wbr',
);
|
139 |
|
|
|
139 |
|
|
140 |
|
/**
|
|
140 |
|
/**
|
141 |
|
* Lis
t of dange
rous tags
(such tags
will be d
eleted)
|
|
141 |
|
* Lis
t of dange
rous tags
(such tags
will be d
eleted)
|
142 |
|
*
|
|
142 |
|
*
|
143 |
|
* @va
r array
|
|
143 |
|
* @va
r array
|
144 |
|
* @ac
cess publi
c
|
|
144 |
|
* @ac
cess publi
c
|
145 |
|
*/
|
|
145 |
|
*/
|
146 |
|
var $d
eleteTags
= array(
|
|
146 |
|
var $d
eleteTags
= array(
|
147 |
|
'a
pplet', 'b
ase', 'b
asefont',
'bgsound',
'blink',
'body',
|
|
147 |
|
'a
pplet', 'b
ase', 'b
asefont',
'bgsound',
'blink',
'body',
|
148 |
|
'e
mbed', 'f
rame', 'f
rameset',
'head',
'html',
'ilayer',
|
|
148 |
|
'e
mbed', 'f
rame', 'f
rameset',
'head',
'html',
'ilayer',
|
149 |
|
'i
frame', 'l
ayer', 'l
ink',
'meta',
'object',
'style',
|
|
149 |
|
'i
frame', 'l
ayer', 'l
ink',
'meta',
'object',
'style',
|
150 |
|
't
itle', 's
cript',
|
|
150 |
|
't
itle', 's
cript',
|
151 |
|
);
|
|
151 |
|
);
|
152 |
|
|
|
152 |
|
|
153 |
|
/**
|
|
153 |
|
/**
|
154 |
|
* Lis
t of dange
rous tags
(such tags
will be d
eleted, an
d all cont
ent
|
|
154 |
|
* Lis
t of dange
rous tags
(such tags
will be d
eleted, an
d all cont
ent
|
155 |
|
* ins
ide this t
ags will b
e also rem
oved)
|
|
155 |
|
* ins
ide this t
ags will b
e also rem
oved)
|
156 |
|
*
|
|
156 |
|
*
|
157 |
|
* @va
r array
|
|
157 |
|
* @va
r array
|
158 |
|
* @ac
cess publi
c
|
|
158 |
|
* @ac
cess publi
c
|
159 |
|
*/
|
|
159 |
|
*/
|
160 |
|
var $d
eleteTagsC
ontent = a
rray('scri
pt', 'styl
e', 'title
', 'xml',
);
|
|
160 |
|
var $d
eleteTagsC
ontent = a
rray('scri
pt', 'styl
e', 'title
', 'xml',
);
|
161 |
|
|
|
161 |
|
|
162 |
|
/**
|
|
162 |
|
/**
|
163 |
|
* Typ
e of proto
cols filte
ring ('whi
te' or 'bl
ack')
|
|
163 |
|
* Typ
e of proto
cols filte
ring ('whi
te' or 'bl
ack')
|
164 |
|
*
|
|
164 |
|
*
|
165 |
|
* @va
r string
|
|
165 |
|
* @va
r string
|
166 |
|
* @ac
cess publi
c
|
|
166 |
|
* @ac
cess publi
c
|
167 |
|
*/
|
|
167 |
|
*/
|
168 |
|
var $p
rotocolFil
tering = '
white';
|
|
168 |
|
var $p
rotocolFil
tering = '
white';
|
169 |
|
|
|
169 |
|
|
170 |
|
/**
|
|
170 |
|
/**
|
171 |
|
* Lis
t of "dang
erous" pro
tocols (us
ed for bla
cklist-fil
tering)
|
|
171 |
|
* Lis
t of "dang
erous" pro
tocols (us
ed for bla
cklist-fil
tering)
|
172 |
|
*
|
|
172 |
|
*
|
173 |
|
* @va
r array
|
|
173 |
|
* @va
r array
|
174 |
|
* @ac
cess publi
c
|
|
174 |
|
* @ac
cess publi
c
|
175 |
|
*/
|
|
175 |
|
*/
|
176 |
|
var $b
lackProtoc
ols = arra
y(
|
|
176 |
|
var $b
lackProtoc
ols = arra
y(
|
177 |
|
'a
bout', '
chrome',
'data',
'di
sk', '
hcp',
|
|
177 |
|
'a
bout', '
chrome',
'data',
'di
sk', '
hcp',
|
178 |
|
'h
elp', '
javascript
', 'livesc
ript', 'ly
nxcgi', '
lynxexec',
|
|
178 |
|
'h
elp', '
javascript
', 'livesc
ript', 'ly
nxcgi', '
lynxexec',
|
179 |
|
'm
s-help', '
ms-its',
'mhtml'
, 'mo
cha', '
opera',
|
|
179 |
|
'm
s-help', '
ms-its',
'mhtml'
, 'mo
cha', '
opera',
|
180 |
|
'r
es', '
resource',
'shell'
, 'vb
script', '
view-sourc
e',
|
|
180 |
|
'r
es', '
resource',
'shell'
, 'vb
script', '
view-sourc
e',
|
181 |
|
'v
nd.ms.radi
o',
'wysiwy
g',
|
|
181 |
|
'v
nd.ms.radi
o',
'wysiwy
g',
|
182 |
|
);
|
|
182 |
|
);
|
183 |
|
|
|
183 |
|
|
184 |
|
/**
|
|
184 |
|
/**
|
185 |
|
* Lis
t of "safe
" protocol
s (used fo
r whitelis
t-filterin
g)
|
|
185 |
|
* Lis
t of "safe
" protocol
s (used fo
r whitelis
t-filterin
g)
|
186 |
|
*
|
|
186 |
|
*
|
187 |
|
* @va
r array
|
|
187 |
|
* @va
r array
|
188 |
|
* @ac
cess publi
c
|
|
188 |
|
* @ac
cess publi
c
|
189 |
|
*/
|
|
189 |
|
*/
|
190 |
|
var $w
hiteProtoc
ols = arra
y(
|
|
190 |
|
var $w
hiteProtoc
ols = arra
y(
|
191 |
|
'e
d2k', 'f
ile', 'ftp
', 'gophe
r', 'http'
, 'https'
,
|
|
191 |
|
'e
d2k', 'f
ile', 'ftp
', 'gophe
r', 'http'
, 'https'
,
|
192 |
|
'i
rc', 'm
ailto', 'n
ews', 'nnt
p', 'telne
t', 'webca
l',
|
|
192 |
|
'i
rc', 'm
ailto', 'n
ews', 'nnt
p', 'telne
t', 'webca
l',
|
193 |
|
'x
mpp', 'c
allto',
|
|
193 |
|
'x
mpp', 'c
allto',
|
194 |
|
);
|
|
194 |
|
);
|
195 |
|
|
|
195 |
|
|
196 |
|
/**
|
|
196 |
|
/**
|
197 |
|
* Lis
t of attri
butes that
can conta
in protoco
ls
|
|
197 |
|
* Lis
t of attri
butes that
can conta
in protoco
ls
|
198 |
|
*
|
|
198 |
|
*
|
199 |
|
* @va
r array
|
|
199 |
|
* @va
r array
|
200 |
|
* @ac
cess publi
c
|
|
200 |
|
* @ac
cess publi
c
|
201 |
|
*/
|
|
201 |
|
*/
|
202 |
|
var $p
rotocolAtt
ributes =
array(
|
|
202 |
|
var $p
rotocolAtt
ributes =
array(
|
203 |
|
'a
ction', 'b
ackground'
, 'codebas
e', 'dynsr
c', 'href'
, 'lowsrc'
, 'src',
|
|
203 |
|
'a
ction', 'b
ackground'
, 'codebas
e', 'dynsr
c', 'href'
, 'lowsrc'
, 'src',
|
204 |
|
);
|
|
204 |
|
);
|
205 |
|
|
|
205 |
|
|
206 |
|
/**
|
|
206 |
|
/**
|
207 |
|
* Lis
t of dange
rous CSS k
eywords
|
|
207 |
|
* Lis
t of dange
rous CSS k
eywords
|
208 |
|
*
|
|
208 |
|
*
|
209 |
|
* Who
le style="
" attribut
e will be
removed, i
f parser w
ill find o
ne of
|
|
209 |
|
* Who
le style="
" attribut
e will be
removed, i
f parser w
ill find o
ne of
|
210 |
|
* the
se keyword
s
|
|
210 |
|
* the
se keyword
s
|
211 |
|
*
|
|
211 |
|
*
|
212 |
|
* @va
r array
|
|
212 |
|
* @va
r array
|
213 |
|
* @ac
cess publi
c
|
|
213 |
|
* @ac
cess publi
c
|
214 |
|
*/
|
|
214 |
|
*/
|
215 |
|
var $c
ssKeywords
= array(
|
|
215 |
|
var $c
ssKeywords
= array(
|
216 |
|
'a
bsolute',
'behavior'
, 'b
ehaviour',
'conten
t', 'expre
ssion',
|
|
216 |
|
'a
bsolute',
'behavior'
, 'b
ehaviour',
'conten
t', 'expre
ssion',
|
217 |
|
'f
ixed',
'include-s
ource', 'm
oz-binding
',
|
|
217 |
|
'f
ixed',
'include-s
ource', 'm
oz-binding
',
|
218 |
|
);
|
|
218 |
|
);
|
219 |
|
|
|
219 |
|
|
220 |
|
/**
|
|
220 |
|
/**
|
221 |
|
* Lis
t of tags
that can h
ave no "cl
osing tag"
|
|
221 |
|
* Lis
t of tags
that can h
ave no "cl
osing tag"
|
222 |
|
*
|
|
222 |
|
*
|
223 |
|
* @va
r array
|
|
223 |
|
* @va
r array
|
224 |
|
* @ac
cess publi
c
|
|
224 |
|
* @ac
cess publi
c
|
225 |
|
* @de
precated X
HTML does
not allow
such tags
|
|
225 |
|
* @de
precated X
HTML does
not allow
such tags
|
226 |
|
*/
|
|
226 |
|
*/
|
227 |
|
var $n
oClose = a
rray();
|
|
227 |
|
var $n
oClose = a
rray();
|
228 |
|
|
|
228 |
|
|
229 |
|
/**
|
|
229 |
|
/**
|
230 |
|
* Lis
t of block
-level tag
s that ter
minates pa
ragraph
|
|
230 |
|
* Lis
t of block
-level tag
s that ter
minates pa
ragraph
|
231 |
|
*
|
|
231 |
|
*
|
232 |
|
* Par
agraph wil
l be close
d when thi
s tags ope
ned
|
|
232 |
|
* Par
agraph wil
l be close
d when thi
s tags ope
ned
|
233 |
|
*
|
|
233 |
|
*
|
234 |
|
* @va
r array
|
|
234 |
|
* @va
r array
|
235 |
|
* @ac
cess publi
c
|
|
235 |
|
* @ac
cess publi
c
|
236 |
|
*/
|
|
236 |
|
*/
|
237 |
|
var $c
loseParagr
aph = arra
y(
|
|
237 |
|
var $c
loseParagr
aph = arra
y(
|
238 |
|
'a
ddress', '
blockquote
', 'center
', 'dd',
'dir',
'di
v',
|
|
238 |
|
'a
ddress', '
blockquote
', 'center
', 'dd',
'dir',
'di
v',
|
239 |
|
'd
l', '
dt',
'h1',
'h2',
'h3',
'h4
',
|
|
239 |
|
'd
l', '
dt',
'h1',
'h2',
'h3',
'h4
',
|
240 |
|
'h
5', '
h6',
'hr',
'isinde
x', 'listi
ng', 'ma
rquee',
|
|
240 |
|
'h
5', '
h6',
'hr',
'isinde
x', 'listi
ng', 'ma
rquee',
|
241 |
|
'm
enu', '
multicol',
'ol',
'p',
'plain
text', 'pr
e',
|
|
241 |
|
'm
enu', '
multicol',
'ol',
'p',
'plain
text', 'pr
e',
|
242 |
|
't
able', '
ul',
'xmp',
|
|
242 |
|
't
able', '
ul',
'xmp',
|
243 |
|
);
|
|
243 |
|
);
|
244 |
|
|
|
244 |
|
|
245 |
|
/**
|
|
245 |
|
/**
|
246 |
|
* Lis
t of table
tags, all
table tag
s outside
a table wi
ll be remo
ved
|
|
246 |
|
* Lis
t of table
tags, all
table tag
s outside
a table wi
ll be remo
ved
|
247 |
|
*
|
|
247 |
|
*
|
248 |
|
* @va
r array
|
|
248 |
|
* @va
r array
|
249 |
|
* @ac
cess publi
c
|
|
249 |
|
* @ac
cess publi
c
|
250 |
|
*/
|
|
250 |
|
*/
|
251 |
|
var $t
ableTags =
array(
|
|
251 |
|
var $t
ableTags =
array(
|
252 |
|
'c
aption', '
col', 'col
group', 't
body', 'td
', 'tfoot'
, 'th',
|
|
252 |
|
'c
aption', '
col', 'col
group', 't
body', 'td
', 'tfoot'
, 'th',
|
253 |
|
't
head', '
tr',
|
|
253 |
|
't
head', '
tr',
|
254 |
|
);
|
|
254 |
|
);
|
255 |
|
|
|
255 |
|
|
256 |
|
/**
|
|
256 |
|
/**
|
257 |
|
* Lis
t of list
tags
|
|
257 |
|
* Lis
t of list
tags
|
258 |
|
*
|
|
258 |
|
*
|
259 |
|
* @va
r array
|
|
259 |
|
* @va
r array
|
260 |
|
* @ac
cess publi
c
|
|
260 |
|
* @ac
cess publi
c
|
261 |
|
*/
|
|
261 |
|
*/
|
262 |
|
var $l
istTags =
array('dir
', 'menu',
'ol', 'ul
', 'dl', )
;
|
|
262 |
|
var $l
istTags =
array('dir
', 'menu',
'ol', 'ul
', 'dl', )
;
|
263 |
|
|
|
263 |
|
|
264 |
|
/**
|
|
264 |
|
/**
|
265 |
|
* Lis
t of dange
rous attri
butes
|
|
265 |
|
* Lis
t of dange
rous attri
butes
|
266 |
|
*
|
|
266 |
|
*
|
267 |
|
* @va
r array
|
|
267 |
|
* @va
r array
|
268 |
|
* @ac
cess publi
c
|
|
268 |
|
* @ac
cess publi
c
|
269 |
|
*/
|
|
269 |
|
*/
|
270 |
|
var $a
ttributes
= array('d
ynsrc', 'i
d', 'name'
, );
|
|
270 |
|
var $a
ttributes
= array('d
ynsrc', 'i
d', 'name'
, );
|
271 |
|
|
|
271 |
|
|
272 |
|
/**
|
|
272 |
|
/**
|
273 |
|
* Lis
t of allow
ed "namesp
aced" attr
ibutes
|
|
273 |
|
* Lis
t of allow
ed "namesp
aced" attr
ibutes
|
274 |
|
*
|
|
274 |
|
*
|
275 |
|
* @va
r array
|
|
275 |
|
* @va
r array
|
276 |
|
* @ac
cess publi
c
|
|
276 |
|
* @ac
cess publi
c
|
277 |
|
*/
|
|
277 |
|
*/
|
278 |
|
var $a
ttributesN
S = array(
'xml:lang'
, );
|
|
278 |
|
var $a
ttributesN
S = array(
'xml:lang'
, );
|
279 |
|
|
|
279 |
|
|
280 |
|
/**
|
|
280 |
|
/**
|
281 |
|
* Con
structs cl
ass
|
|
281 |
|
* Con
structs cl
ass
|
282 |
|
*
|
|
282 |
|
*
|
283 |
|
* @ac
cess publi
c
|
|
283 |
|
* @ac
cess publi
c
|
284 |
|
*/
|
|
284 |
|
*/
|
285 |
|
functi
on SafeHTM
L()
|
|
285 |
|
functi
on SafeHTM
L()
|
286 |
|
{
|
|
286 |
|
{
|
287 |
|
//
making reg
ular expre
ssions bas
ed on Prot
o & CSS ar
rays
|
|
287 |
|
//
making reg
ular expre
ssions bas
ed on Prot
o & CSS ar
rays
|
288 |
|
fo
reach ($th
is->blackP
rotocols a
s $proto)
{
|
|
288 |
|
fo
reach ($th
is->blackP
rotocols a
s $proto)
{
|
289 |
|
$preg =
"/[\s\x01-
\x1F]*";
|
|
289 |
|
$preg =
"/[\s\x01-
\x1F]*";
|
290 |
|
for ($i=
0; $i<strl
en($proto)
; $i++) {
|
|
290 |
|
for ($i=
0; $i<strl
en($proto)
; $i++) {
|
291 |
|
$pre
g .= $prot
o{$i} . "[
\s\x01-\x1
F]*";
|
|
291 |
|
$pre
g .= $prot
o{$i} . "[
\s\x01-\x1
F]*";
|
292 |
|
}
|
|
292 |
|
}
|
293 |
|
$preg .=
":/i";
|
|
293 |
|
$preg .=
":/i";
|
294 |
|
$this->_
protoRegex
ps[] = $pr
eg;
|
|
294 |
|
$this->_
protoRegex
ps[] = $pr
eg;
|
295 |
|
}
|
|
295 |
|
}
|
296 |
|
|
|
296 |
|
|
297 |
|
fo
reach ($th
is->cssKey
words as $
css) {
|
|
297 |
|
fo
reach ($th
is->cssKey
words as $
css) {
|
298 |
|
$this->_
cssRegexps
[] = '/' .
$css . '/
i';
|
|
298 |
|
$this->_
cssRegexps
[] = '/' .
$css . '/
i';
|
299 |
|
}
|
|
299 |
|
}
|
300 |
|
re
turn true;
|
|
300 |
|
re
turn true;
|
301 |
|
}
|
|
301 |
|
}
|
302 |
|
|
|
302 |
|
|
303 |
|
/**
|
|
303 |
|
/**
|
304 |
|
* Han
dles the w
riting of
attributes
- called
from $this
->_openHan
dler()
|
|
304 |
|
* Han
dles the w
riting of
attributes
- called
from $this
->_openHan
dler()
|
305 |
|
*
|
|
305 |
|
*
|
306 |
|
* @pa
ram array
$attrs arr
ay of attr
ibutes $na
me => $val
ue
|
|
306 |
|
* @pa
ram array
$attrs arr
ay of attr
ibutes $na
me => $val
ue
|
307 |
|
* @re
turn boole
an
|
|
307 |
|
* @re
turn boole
an
|
308 |
|
* @ac
cess priva
te
|
|
308 |
|
* @ac
cess priva
te
|
309 |
|
*/
|
|
309 |
|
*/
|
310 |
|
functi
on _writeA
ttrs ($att
rs)
|
|
310 |
|
functi
on _writeA
ttrs ($att
rs)
|
311 |
|
{
|
|
311 |
|
{
|
312 |
|
if
(is_array
($attrs))
{
|
|
312 |
|
if
(is_array
($attrs))
{
|
313 |
|
foreach
($attrs as
$name =>
$value) {
|
|
313 |
|
foreach
($attrs as
$name =>
$value) {
|
314 |
|
|
|
314 |
|
|
315 |
|
$nam
e = strtol
ower($name
);
|
|
315 |
|
$nam
e = strtol
ower($name
);
|
316 |
|
|
|
316 |
|
|
317 |
|
if (
strpos($na
me, 'on')
=== 0) {
|
|
317 |
|
if (
strpos($na
me, 'on')
=== 0) {
|
318 |
|
continue;
|
|
318 |
|
continue;
|
319 |
|
}
|
|
319 |
|
}
|
320 |
|
if (
strpos($na
me, 'data'
) === 0) {
|
|
320 |
|
if (
strpos($na
me, 'data'
) === 0) {
|
321 |
|
continue;
|
|
321 |
|
continue;
|
322 |
|
}
|
|
322 |
|
}
|
323 |
|
if (
in_array($
name, $thi
s->attribu
tes)) {
|
|
323 |
|
if (
in_array($
name, $thi
s->attribu
tes)) {
|
324 |
|
continue;
|
|
324 |
|
continue;
|
325 |
|
}
|
|
325 |
|
}
|
326 |
|
if (
!preg_matc
h("/^[a-z0
-9]+$/i",
$name)) {
|
|
326 |
|
if (
!preg_matc
h("/^[a-z0
-9]+$/i",
$name)) {
|
327 |
|
if (!in_ar
ray($name,
$this->at
tributesNS
))
|
|
327 |
|
if (!in_ar
ray($name,
$this->at
tributesNS
))
|
328 |
|
{
|
|
328 |
|
{
|
329 |
|
contin
ue;
|
|
329 |
|
contin
ue;
|
330 |
|
}
|
|
330 |
|
}
|
331 |
|
}
|
|
331 |
|
}
|
332 |
|
|
|
332 |
|
|
333 |
|
if (
($value ==
= TRUE) ||
(is_null(
$value)))
{
|
|
333 |
|
if (
($value ==
= TRUE) ||
(is_null(
$value)))
{
|
334 |
|
$value = $
name;
|
|
334 |
|
$value = $
name;
|
335 |
|
}
|
|
335 |
|
}
|
336 |
|
|
|
336 |
|
|
337 |
|
if (
$name == '
style') {
|
|
337 |
|
if (
$name == '
style') {
|
338 |
|
|
|
338 |
|
|
339 |
|
/
/ removes
insignific
ant backsl
ahes
|
|
339 |
|
/
/ removes
insignific
ant backsl
ahes
|
340 |
|
$
value = st
r_replace(
"\\", '',
$value);
|
|
340 |
|
$
value = st
r_replace(
"\\", '',
$value);
|
341 |
|
|
|
341 |
|
|
342 |
|
/
/ removes
CSS commen
ts
|
|
342 |
|
/
/ removes
CSS commen
ts
|
343 |
|
w
hile (1)
|
|
343 |
|
w
hile (1)
|
344 |
|
{
|
|
344 |
|
{
|
345 |
|
$_value =
preg_repl
ace("!/\*.
*?\*/!s",
'', $value
);
|
|
345 |
|
$_value =
preg_repl
ace("!/\*.
*?\*/!s",
'', $value
);
|
346 |
|
if ($_val
ue == $val
ue) break;
|
|
346 |
|
if ($_val
ue == $val
ue) break;
|
347 |
|
$value =
$_value;
|
|
347 |
|
$value =
$_value;
|
348 |
|
}
|
|
348 |
|
}
|
349 |
|
|
|
349 |
|
|
350 |
|
/
/ replace
all & to &
amp;
|
|
350 |
|
/
/ replace
all & to &
amp;
|
351 |
|
$
value = st
r_replace(
'&', '
&', $value
);
|
|
351 |
|
$
value = st
r_replace(
'&', '
&', $value
);
|
352 |
|
$
value = st
r_replace(
'&', '&
;', $value
);
|
|
352 |
|
$
value = st
r_replace(
'&', '&
;', $value
);
|
353 |
|
|
|
353 |
|
|
354 |
|
f
oreach ($t
his->_cssR
egexps as
$css) {
|
|
354 |
|
f
oreach ($t
his->_cssR
egexps as
$css) {
|
355 |
|
if (pre
g_match($c
ss, $value
)) {
|
|
355 |
|
if (pre
g_match($c
ss, $value
)) {
|
356 |
|
con
tinue 2;
|
|
356 |
|
con
tinue 2;
|
357 |
|
}
|
|
357 |
|
}
|
358 |
|
}
|
|
358 |
|
}
|
359 |
|
f
oreach ($t
his->_prot
oRegexps a
s $proto)
{
|
|
359 |
|
f
oreach ($t
his->_prot
oRegexps a
s $proto)
{
|
360 |
|
if (pre
g_match($p
roto, $val
ue)) {
|
|
360 |
|
if (pre
g_match($p
roto, $val
ue)) {
|
361 |
|
con
tinue 2;
|
|
361 |
|
con
tinue 2;
|
362 |
|
}
|
|
362 |
|
}
|
363 |
|
}
|
|
363 |
|
}
|
364 |
|
}
|
|
364 |
|
}
|
365 |
|
|
|
365 |
|
|
366 |
|
$tem
pval = pre
g_replace(
'/&#(\d+);
?/me', "ch
r('\\1')",
$value);
//"'
|
|
366 |
|
$tem
pval = pre
g_replace(
'/&#(\d+);
?/me', "ch
r('\\1')",
$value);
//"'
|
367 |
|
$tem
pval = pre
g_replace(
'/&#x([0-9
a-f]+);?/m
ei', "chr(
hexdec('\\
1'))", $te
mpval);
|
|
367 |
|
$tem
pval = pre
g_replace(
'/&#x([0-9
a-f]+);?/m
ei', "chr(
hexdec('\\
1'))", $te
mpval);
|
368 |
|
|
|
368 |
|
|
369 |
|
if (
(in_array(
$name, $th
is->protoc
olAttribut
es)) &&
|
|
369 |
|
if (
(in_array(
$name, $th
is->protoc
olAttribut
es)) &&
|
370 |
|
(strpos($t
empval, ':
') !== fal
se))
|
|
370 |
|
(strpos($t
empval, ':
') !== fal
se))
|
371 |
|
{
|
|
371 |
|
{
|
372 |
|
if ($this-
>protocolF
iltering =
= 'black')
{
|
|
372 |
|
if ($this-
>protocolF
iltering =
= 'black')
{
|
373 |
|
foreac
h ($this->
_protoRege
xps as $pr
oto) {
|
|
373 |
|
foreac
h ($this->
_protoRege
xps as $pr
oto) {
|
374 |
|
if
(preg_mat
ch($proto,
$tempval)
) continue
2;
|
|
374 |
|
if
(preg_mat
ch($proto,
$tempval)
) continue
2;
|
375 |
|
}
|
|
375 |
|
}
|
376 |
|
} else {
|
|
376 |
|
} else {
|
377 |
|
$_temp
val = expl
ode(':', $
tempval);
|
|
377 |
|
$_temp
val = expl
ode(':', $
tempval);
|
378 |
|
$proto
= $_tempv
al[0];
|
|
378 |
|
$proto
= $_tempv
al[0];
|
379 |
|
if (!i
n_array($p
roto, $thi
s->whitePr
otocols))
{
|
|
379 |
|
if (!i
n_array($p
roto, $thi
s->whitePr
otocols))
{
|
380 |
|
co
ntinue;
|
|
380 |
|
co
ntinue;
|
381 |
|
}
|
|
381 |
|
}
|
382 |
|
}
|
|
382 |
|
}
|
383 |
|
}
|
|
383 |
|
}
|
384 |
|
|
|
384 |
|
|
385 |
|
$val
ue = str_r
eplace("\"
", ""
", $value)
;
|
|
385 |
|
$val
ue = str_r
eplace("\"
", ""
", $value)
;
|
386 |
|
$thi
s->_xhtml
.= ' ' . $
name . '="
' . $value
. '"';
|
|
386 |
|
$thi
s->_xhtml
.= ' ' . $
name . '="
' . $value
. '"';
|
387 |
|
}
|
|
387 |
|
}
|
388 |
|
}
|
|
388 |
|
}
|
389 |
|
re
turn true;
|
|
389 |
|
re
turn true;
|
390 |
|
}
|
|
390 |
|
}
|
391 |
|
|
|
391 |
|
|
392 |
|
/**
|
|
392 |
|
/**
|
393 |
|
* Ope
ning tag h
andler - c
alled from
HTMLSax
|
|
393 |
|
* Ope
ning tag h
andler - c
alled from
HTMLSax
|
394 |
|
*
|
|
394 |
|
*
|
395 |
|
* @pa
ram object
$parser H
TML Parser
|
|
395 |
|
* @pa
ram object
$parser H
TML Parser
|
396 |
|
* @pa
ram string
$name t
ag name
|
|
396 |
|
* @pa
ram string
$name t
ag name
|
397 |
|
* @pa
ram array
$attrs t
ag attribu
tes
|
|
397 |
|
* @pa
ram array
$attrs t
ag attribu
tes
|
398 |
|
* @re
turn boole
an
|
|
398 |
|
* @re
turn boole
an
|
399 |
|
* @ac
cess priva
te
|
|
399 |
|
* @ac
cess priva
te
|
400 |
|
*/
|
|
400 |
|
*/
|
401 |
|
functi
on _openHa
ndler(&$pa
rser, $nam
e, $attrs)
|
|
401 |
|
functi
on _openHa
ndler(&$pa
rser, $nam
e, $attrs)
|
402 |
|
{
|
|
402 |
|
{
|
403 |
|
$n
ame = strt
olower($na
me);
|
|
403 |
|
$n
ame = strt
olower($na
me);
|
404 |
|
|
|
404 |
|
|
405 |
|
if
(in_array
($name, $t
his->delet
eTagsConte
nt)) {
|
|
405 |
|
if
(in_array
($name, $t
his->delet
eTagsConte
nt)) {
|
406 |
|
array_pu
sh($this->
_dcStack,
$name);
|
|
406 |
|
array_pu
sh($this->
_dcStack,
$name);
|
407 |
|
$this->_
dcCounter[
$name] = i
sset($this
->_dcCount
er[$name])
? $this->
_dcCounter
[$name]+1
: 1;
|
|
407 |
|
$this->_
dcCounter[
$name] = i
sset($this
->_dcCount
er[$name])
? $this->
_dcCounter
[$name]+1
: 1;
|
408 |
|
}
|
|
408 |
|
}
|
409 |
|
if
(count($t
his->_dcSt
ack) != 0)
{
|
|
409 |
|
if
(count($t
his->_dcSt
ack) != 0)
{
|
410 |
|
return t
rue;
|
|
410 |
|
return t
rue;
|
411 |
|
}
|
|
411 |
|
}
|
412 |
|
|
|
412 |
|
|
413 |
|
if
(in_array
($name, $t
his->delet
eTags)) {
|
|
413 |
|
if
(in_array
($name, $t
his->delet
eTags)) {
|
414 |
|
return t
rue;
|
|
414 |
|
return t
rue;
|
415 |
|
}
|
|
415 |
|
}
|
416 |
|
|
|
416 |
|
|
417 |
|
if
(!preg_ma
tch("/^[a-
z0-9]+$/i"
, $name))
{
|
|
417 |
|
if
(!preg_ma
tch("/^[a-
z0-9]+$/i"
, $name))
{
|
418 |
|
if (preg
_match("!(
?:\@|://)!
i", $name)
) {
|
|
418 |
|
if (preg
_match("!(
?:\@|://)!
i", $name)
) {
|
419 |
|
$thi
s->_xhtml
.= '<'
. $name .
'>';
|
|
419 |
|
$thi
s->_xhtml
.= '<'
. $name .
'>';
|
420 |
|
}
|
|
420 |
|
}
|
421 |
|
return t
rue;
|
|
421 |
|
return t
rue;
|
422 |
|
}
|
|
422 |
|
}
|
423 |
|
|
|
423 |
|
|
424 |
|
if
(in_array
($name, $t
his->singl
eTags)) {
|
|
424 |
|
if
(in_array
($name, $t
his->singl
eTags)) {
|
425 |
|
$this->_
xhtml .= '
<' . $name
;
|
|
425 |
|
$this->_
xhtml .= '
<' . $name
;
|
426 |
|
$this->_
writeAttrs
($attrs);
|
|
426 |
|
$this->_
writeAttrs
($attrs);
|
427 |
|
$this->_
xhtml .= '
/>';
|
|
427 |
|
$this->_
xhtml .= '
/>';
|
428 |
|
return t
rue;
|
|
428 |
|
return t
rue;
|
429 |
|
}
|
|
429 |
|
}
|
430 |
|
|
|
430 |
|
|
431 |
|
//
TABLES: c
annot open
table ele
ments when
we are no
t inside t
able
|
|
431 |
|
//
TABLES: c
annot open
table ele
ments when
we are no
t inside t
able
|
432 |
|
if
((isset($
this->_cou
nter['tabl
e'])) && (
$this->_co
unter['tab
le'] <= 0)
|
|
432 |
|
if
((isset($
this->_cou
nter['tabl
e'])) && (
$this->_co
unter['tab
le'] <= 0)
|
433 |
|
&& (in_a
rray($name
, $this->t
ableTags))
)
|
|
433 |
|
&& (in_a
rray($name
, $this->t
ableTags))
)
|
434 |
|
{
|
|
434 |
|
{
|
435 |
|
return t
rue;
|
|
435 |
|
return t
rue;
|
436 |
|
}
|
|
436 |
|
}
|
437 |
|
|
|
437 |
|
|
438 |
|
//
PARAGRAPH
S: close p
aragraph w
hen closeP
aragraph t
ags openin
g
|
|
438 |
|
//
PARAGRAPH
S: close p
aragraph w
hen closeP
aragraph t
ags openin
g
|
439 |
|
if
((in_arra
y($name, $
this->clos
eParagraph
)) && (in_
array('p',
$this->_s
tack))) {
|
|
439 |
|
if
((in_arra
y($name, $
this->clos
eParagraph
)) && (in_
array('p',
$this->_s
tack))) {
|
440 |
|
$this->_
closeHandl
er($parser
, 'p');
|
|
440 |
|
$this->_
closeHandl
er($parser
, 'p');
|
441 |
|
}
|
|
441 |
|
}
|
442 |
|
|
|
442 |
|
|
443 |
|
//
LISTS: we
should cl
ose <li> i
f <li> of
the same l
evel openi
ng
|
|
443 |
|
//
LISTS: we
should cl
ose <li> i
f <li> of
the same l
evel openi
ng
|
444 |
|
if
($name ==
'li' && c
ount($this
->_liStack
) &&
|
|
444 |
|
if
($name ==
'li' && c
ount($this
->_liStack
) &&
|
445 |
|
$this->_
listScope
== $this->
_liStack[c
ount($this
->_liStack
)-1])
|
|
445 |
|
$this->_
listScope
== $this->
_liStack[c
ount($this
->_liStack
)-1])
|
446 |
|
{
|
|
446 |
|
{
|
447 |
|
$this->_
closeHandl
er($parser
, 'li');
|
|
447 |
|
$this->_
closeHandl
er($parser
, 'li');
|
448 |
|
}
|
|
448 |
|
}
|
449 |
|
|
|
449 |
|
|
450 |
|
//
LISTS: we
want to k
now on wha
t nesting
level of l
ists we ar
e
|
|
450 |
|
//
LISTS: we
want to k
now on wha
t nesting
level of l
ists we ar
e
|
451 |
|
if
(in_array
($name, $t
his->listT
ags)) {
|
|
451 |
|
if
(in_array
($name, $t
his->listT
ags)) {
|
452 |
|
$this->_
listScope+
+;
|
|
452 |
|
$this->_
listScope+
+;
|
453 |
|
}
|
|
453 |
|
}
|
454 |
|
if
($name ==
'li') {
|
|
454 |
|
if
($name ==
'li') {
|
455 |
|
array_pu
sh($this->
_liStack,
$this->_li
stScope);
|
|
455 |
|
array_pu
sh($this->
_liStack,
$this->_li
stScope);
|
456 |
|
}
|
|
456 |
|
}
|
457 |
|
|
|
457 |
|
|
458 |
|
$t
his->_xhtm
l .= '<' .
$name;
|
|
458 |
|
$t
his->_xhtm
l .= '<' .
$name;
|
459 |
|
$t
his->_writ
eAttrs($at
trs);
|
|
459 |
|
$t
his->_writ
eAttrs($at
trs);
|
460 |
|
$t
his->_xhtm
l .= '>';
|
|
460 |
|
$t
his->_xhtm
l .= '>';
|
461 |
|
ar
ray_push($
this->_sta
ck,$name);
|
|
461 |
|
ar
ray_push($
this->_sta
ck,$name);
|
462 |
|
$t
his->_coun
ter[$name]
= isset($
this->_cou
nter[$name
]) ? $this
->_counter
[$name]+1
: 1;
|
|
462 |
|
$t
his->_coun
ter[$name]
= isset($
this->_cou
nter[$name
]) ? $this
->_counter
[$name]+1
: 1;
|
463 |
|
re
turn true;
|
|
463 |
|
re
turn true;
|
464 |
|
}
|
|
464 |
|
}
|
465 |
|
|
|
465 |
|
|
466 |
|
/**
|
|
466 |
|
/**
|
467 |
|
* Clo
sing tag h
andler - c
alled from
HTMLSax
|
|
467 |
|
* Clo
sing tag h
andler - c
alled from
HTMLSax
|
468 |
|
*
|
|
468 |
|
*
|
469 |
|
* @pa
ram object
$parsers
HTML parse
r
|
|
469 |
|
* @pa
ram object
$parsers
HTML parse
r
|
470 |
|
* @pa
ram string
$name
tag name
|
|
470 |
|
* @pa
ram string
$name
tag name
|
471 |
|
* @re
turn boole
an
|
|
471 |
|
* @re
turn boole
an
|
472 |
|
* @ac
cess priva
te
|
|
472 |
|
* @ac
cess priva
te
|
473 |
|
*/
|
|
473 |
|
*/
|
474 |
|
functi
on _closeH
andler(&$p
arser, $na
me)
|
|
474 |
|
functi
on _closeH
andler(&$p
arser, $na
me)
|
475 |
|
{
|
|
475 |
|
{
|
476 |
|
|
|
476 |
|
|
477 |
|
$n
ame = strt
olower($na
me);
|
|
477 |
|
$n
ame = strt
olower($na
me);
|
478 |
|
|
|
478 |
|
|
479 |
|
if
(isset($t
his->_dcCo
unter[$nam
e]) && ($t
his->_dcCo
unter[$nam
e] > 0) &&
|
|
479 |
|
if
(isset($t
his->_dcCo
unter[$nam
e]) && ($t
his->_dcCo
unter[$nam
e] > 0) &&
|
480 |
|
(in_arra
y($name, $
this->dele
teTagsCont
ent)))
|
|
480 |
|
(in_arra
y($name, $
this->dele
teTagsCont
ent)))
|
481 |
|
{
|
|
481 |
|
{
|
482 |
|
while ($n
ame != ($t
ag = array
_pop($this
->_dcStack
))) {
|
|
482 |
|
while ($n
ame != ($t
ag = array
_pop($this
->_dcStack
))) {
|
483 |
|
$this->_
dcCounter[
$tag]--;
|
|
483 |
|
$this->_
dcCounter[
$tag]--;
|
484 |
|
}
|
|
484 |
|
}
|
485 |
|
|
|
485 |
|
|
486 |
|
$this->_d
cCounter[$
name]--;
|
|
486 |
|
$this->_d
cCounter[$
name]--;
|
487 |
|
}
|
|
487 |
|
}
|
488 |
|
|
|
488 |
|
|
489 |
|
if
(count($t
his->_dcSt
ack) != 0)
{
|
|
489 |
|
if
(count($t
his->_dcSt
ack) != 0)
{
|
490 |
|
return t
rue;
|
|
490 |
|
return t
rue;
|
491 |
|
}
|
|
491 |
|
}
|
492 |
|
|
|
492 |
|
|
493 |
|
if
((isset($
this->_cou
nter[$name
])) && ($t
his->_coun
ter[$name]
> 0)) {
|
|
493 |
|
if
((isset($
this->_cou
nter[$name
])) && ($t
his->_coun
ter[$name]
> 0)) {
|
494 |
|
while ($n
ame != ($t
ag = array
_pop($this
->_stack))
) {
|
|
494 |
|
while ($n
ame != ($t
ag = array
_pop($this
->_stack))
) {
|
495 |
|
$this
->_closeTa
g($tag);
|
|
495 |
|
$this
->_closeTa
g($tag);
|
496 |
|
}
|
|
496 |
|
}
|
497 |
|
|
|
497 |
|
|
498 |
|
$this->_c
loseTag($n
ame);
|
|
498 |
|
$this->_c
loseTag($n
ame);
|
499 |
|
}
|
|
499 |
|
}
|
500 |
|
re
turn true;
|
|
500 |
|
re
turn true;
|
501 |
|
}
|
|
501 |
|
}
|
502 |
|
|
|
502 |
|
|
503 |
|
/**
|
|
503 |
|
/**
|
504 |
|
* Clo
ses tag
|
|
504 |
|
* Clo
ses tag
|
505 |
|
*
|
|
505 |
|
*
|
506 |
|
* @pa
ram string
$tag tag
name
|
|
506 |
|
* @pa
ram string
$tag tag
name
|
507 |
|
* @re
turn boole
an
|
|
507 |
|
* @re
turn boole
an
|
508 |
|
* @ac
cess priva
te
|
|
508 |
|
* @ac
cess priva
te
|
509 |
|
*/
|
|
509 |
|
*/
|
510 |
|
functi
on _closeT
ag($tag)
|
|
510 |
|
functi
on _closeT
ag($tag)
|
511 |
|
{
|
|
511 |
|
{
|
512 |
|
if
(!in_arra
y($tag, $t
his->noClo
se)) {
|
|
512 |
|
if
(!in_arra
y($tag, $t
his->noClo
se)) {
|
513 |
|
$this->_
xhtml .= '
</' . $tag
. '>';
|
|
513 |
|
$this->_
xhtml .= '
</' . $tag
. '>';
|
514 |
|
}
|
|
514 |
|
}
|
515 |
|
|
|
515 |
|
|
516 |
|
$t
his->_coun
ter[$tag]-
-;
|
|
516 |
|
$t
his->_coun
ter[$tag]-
-;
|
517 |
|
|
|
517 |
|
|
518 |
|
if
(in_array
($tag, $th
is->listTa
gs)) {
|
|
518 |
|
if
(in_array
($tag, $th
is->listTa
gs)) {
|
519 |
|
$this->_
listScope-
-;
|
|
519 |
|
$this->_
listScope-
-;
|
520 |
|
}
|
|
520 |
|
}
|
521 |
|
|
|
521 |
|
|
522 |
|
if
($tag ==
'li') {
|
|
522 |
|
if
($tag ==
'li') {
|
523 |
|
array_po
p($this->_
liStack);
|
|
523 |
|
array_po
p($this->_
liStack);
|
524 |
|
}
|
|
524 |
|
}
|
525 |
|
re
turn true;
|
|
525 |
|
re
turn true;
|
526 |
|
}
|
|
526 |
|
}
|
527 |
|
|
|
527 |
|
|
528 |
|
/**
|
|
528 |
|
/**
|
529 |
|
* Cha
racter dat
a handler
- called f
rom HTMLSa
x
|
|
529 |
|
* Cha
racter dat
a handler
- called f
rom HTMLSa
x
|
530 |
|
*
|
|
530 |
|
*
|
531 |
|
* @pa
ram object
$parser H
TML parser
|
|
531 |
|
* @pa
ram object
$parser H
TML parser
|
532 |
|
* @pa
ram string
$data t
extual dat
a
|
|
532 |
|
* @pa
ram string
$data t
extual dat
a
|
533 |
|
* @re
turn boole
an
|
|
533 |
|
* @re
turn boole
an
|
534 |
|
* @ac
cess priva
te
|
|
534 |
|
* @ac
cess priva
te
|
535 |
|
*/
|
|
535 |
|
*/
|
536 |
|
functi
on _dataHa
ndler(&$pa
rser, $dat
a)
|
|
536 |
|
functi
on _dataHa
ndler(&$pa
rser, $dat
a)
|
537 |
|
{
|
|
537 |
|
{
|
538 |
|
if
(count($t
his->_dcSt
ack) == 0)
{
|
|
538 |
|
if
(count($t
his->_dcSt
ack) == 0)
{
|
539 |
|
$this->_
xhtml .= $
data;
|
|
539 |
|
$this->_
xhtml .= $
data;
|
540 |
|
}
|
|
540 |
|
}
|
541 |
|
re
turn true;
|
|
541 |
|
re
turn true;
|
542 |
|
}
|
|
542 |
|
}
|
543 |
|
|
|
543 |
|
|
544 |
|
/**
|
|
544 |
|
/**
|
545 |
|
* Esc
ape handle
r - called
from HTML
Sax
|
|
545 |
|
* Esc
ape handle
r - called
from HTML
Sax
|
546 |
|
*
|
|
546 |
|
*
|
547 |
|
* @pa
ram object
$parser H
TML parser
|
|
547 |
|
* @pa
ram object
$parser H
TML parser
|
548 |
|
* @pa
ram string
$data c
omments or
other typ
e of data
|
|
548 |
|
* @pa
ram string
$data c
omments or
other typ
e of data
|
549 |
|
* @re
turn boole
an
|
|
549 |
|
* @re
turn boole
an
|
550 |
|
* @ac
cess priva
te
|
|
550 |
|
* @ac
cess priva
te
|
551 |
|
*/
|
|
551 |
|
*/
|
552 |
|
functi
on _escape
Handler(&$
parser, $d
ata)
|
|
552 |
|
functi
on _escape
Handler(&$
parser, $d
ata)
|
553 |
|
{
|
|
553 |
|
{
|
554 |
|
re
turn true;
|
|
554 |
|
re
turn true;
|
555 |
|
}
|
|
555 |
|
}
|
556 |
|
|
|
556 |
|
|
557 |
|
/**
|
|
557 |
|
/**
|
558 |
|
* Ret
urns the X
HTML docum
ent
|
|
558 |
|
* Ret
urns the X
HTML docum
ent
|
559 |
|
*
|
|
559 |
|
*
|
560 |
|
* @re
turn strin
g Processe
d (X)HTML
document
|
|
560 |
|
* @re
turn strin
g Processe
d (X)HTML
document
|
561 |
|
* @ac
cess publi
c
|
|
561 |
|
* @ac
cess publi
c
|
562 |
|
*/
|
|
562 |
|
*/
|
563 |
|
functi
on getXHTM
L ()
|
|
563 |
|
functi
on getXHTM
L ()
|
564 |
|
{
|
|
564 |
|
{
|
565 |
|
wh
ile ($tag
= array_po
p($this->_
stack)) {
|
|
565 |
|
wh
ile ($tag
= array_po
p($this->_
stack)) {
|
566 |
|
$this->_
closeTag($
tag);
|
|
566 |
|
$this->_
closeTag($
tag);
|
567 |
|
}
|
|
567 |
|
}
|
568 |
|
|
|
568 |
|
|
569 |
|
re
turn $this
->_xhtml;
|
|
569 |
|
re
turn $this
->_xhtml;
|
570 |
|
}
|
|
570 |
|
}
|
571 |
|
|
|
571 |
|
|
572 |
|
/**
|
|
572 |
|
/**
|
573 |
|
* Cle
ars curren
t document
data
|
|
573 |
|
* Cle
ars curren
t document
data
|
574 |
|
*
|
|
574 |
|
*
|
575 |
|
* @re
turn boole
an
|
|
575 |
|
* @re
turn boole
an
|
576 |
|
* @ac
cess publi
c
|
|
576 |
|
* @ac
cess publi
c
|
577 |
|
*/
|
|
577 |
|
*/
|
578 |
|
functi
on clear()
|
|
578 |
|
functi
on clear()
|
579 |
|
{
|
|
579 |
|
{
|
580 |
|
$t
his->_xhtm
l = '';
|
|
580 |
|
$t
his->_xhtm
l = '';
|
581 |
|
re
turn true;
|
|
581 |
|
re
turn true;
|
582 |
|
}
|
|
582 |
|
}
|
583 |
|
|
|
583 |
|
|
584 |
|
/**
|
|
584 |
|
/**
|
585 |
|
* Mai
n parsing
fuction
|
|
585 |
|
* Mai
n parsing
fuction
|
586 |
|
*
|
|
586 |
|
*
|
587 |
|
* @pa
ram string
$doc HTML
document
for proces
sing
|
|
587 |
|
* @pa
ram string
$doc HTML
document
for proces
sing
|
588 |
|
* @re
turn strin
g Processe
d (X)HTML
document
|
|
588 |
|
* @re
turn strin
g Processe
d (X)HTML
document
|
589 |
|
* @ac
cess publi
c
|
|
589 |
|
* @ac
cess publi
c
|
590 |
|
*/
|
|
590 |
|
*/
|
591 |
|
functi
on parse($
doc)
|
|
591 |
|
functi
on parse($
doc)
|
592 |
|
{
|
|
592 |
|
{
|
593 |
|
|
|
593 |
|
|
594 |
|
//
Save all '
<' symbols
|
|
594 |
|
//
Save all '
<' symbols
|
595 |
|
$do
c = preg_r
eplace("/<
(?=[^a-zA-
Z\/\!\?\%]
)/", '<
', $doc);
|
|
595 |
|
$do
c = preg_r
eplace("/<
(?=[^a-zA-
Z\/\!\?\%]
)/", '<
', $doc);
|
596 |
|
|
|
596 |
|
|
597 |
|
//
Web docume
nts should
n't contai
ns \x00 sy
mbol
|
|
597 |
|
//
Web docume
nts should
n't contai
ns \x00 sy
mbol
|
598 |
|
$do
c = str_re
place("\x0
0", '', $d
oc);
|
|
598 |
|
$do
c = str_re
place("\x0
0", '', $d
oc);
|
599 |
|
|
|
599 |
|
|
600 |
|
//
Opera6 bug
workaroun
d
|
|
600 |
|
//
Opera6 bug
workaroun
d
|
601 |
|
$do
c = str_re
place("\xC
0\xBC", '&
lt;', $doc
);
|
|
601 |
|
$do
c = str_re
place("\xC
0\xBC", '&
lt;', $doc
);
|
602 |
|
|
|
602 |
|
|
603 |
|
//
UTF-7 enco
ding ASCII
decode
|
|
603 |
|
//
UTF-7 enco
ding ASCII
decode
|
604 |
|
$do
c = $this-
>repackUTF
7($doc);
|
|
604 |
|
$do
c = $this-
>repackUTF
7($doc);
|
605 |
|
|
|
605 |
|
|
606 |
|
//
Instantiat
e the pars
er
|
|
606 |
|
//
Instantiat
e the pars
er
|
607 |
|
$pa
rser=& new
XML_HTMLS
ax3();
|
|
607 |
|
$pa
rser=& new
XML_HTMLS
ax3();
|
608 |
|
|
|
608 |
|
|
609 |
|
//
Set up the
parser
|
|
609 |
|
//
Set up the
parser
|
610 |
|
$pa
rser->set_
object($th
is);
|
|
610 |
|
$pa
rser->set_
object($th
is);
|
611 |
|
|
|
611 |
|
|
612 |
|
$pa
rser->set_
element_ha
ndler('_op
enHandler'
,'_closeHa
ndler');
|
|
612 |
|
$pa
rser->set_
element_ha
ndler('_op
enHandler'
,'_closeHa
ndler');
|
613 |
|
$pa
rser->set_
data_handl
er('_dataH
andler');
|
|
613 |
|
$pa
rser->set_
data_handl
er('_dataH
andler');
|
614 |
|
$pa
rser->set_
escape_han
dler('_esc
apeHandler
');
|
|
614 |
|
$pa
rser->set_
escape_han
dler('_esc
apeHandler
');
|
615 |
|
|
|
615 |
|
|
616 |
|
$pa
rser->pars
e($doc);
|
|
616 |
|
$pa
rser->pars
e($doc);
|
617 |
|
|
|
617 |
|
|
618 |
|
ret
urn $this-
>getXHTML(
);
|
|
618 |
|
ret
urn $this-
>getXHTML(
);
|
619 |
|
|
|
619 |
|
|
620 |
|
}
|
|
620 |
|
}
|
621 |
|
|
|
621 |
|
|
622 |
|
|
|
622 |
|
|
623 |
|
/**
|
|
623 |
|
/**
|
624 |
|
* UTF
-7 decodin
g fuction
|
|
624 |
|
* UTF
-7 decodin
g fuction
|
625 |
|
*
|
|
625 |
|
*
|
626 |
|
* @pa
ram string
$str HTML
document
for recode
ASCII par
t of UTF-7
back to A
SCII
|
|
626 |
|
* @pa
ram string
$str HTML
document
for recode
ASCII par
t of UTF-7
back to A
SCII
|
627 |
|
* @re
turn strin
g Decoded
document
|
|
627 |
|
* @re
turn strin
g Decoded
document
|
628 |
|
* @ac
cess priva
te
|
|
628 |
|
* @ac
cess priva
te
|
629 |
|
*/
|
|
629 |
|
*/
|
630 |
|
functi
on repackU
TF7($str)
|
|
630 |
|
functi
on repackU
TF7($str)
|
631 |
|
{
|
|
631 |
|
{
|
632 |
|
ret
urn preg_r
eplace_cal
lback('!\+
([0-9a-zA-
Z/]+)\-!',
array($th
is, 'repac
kUTF7Callb
ack'), $st
r);
|
|
632 |
|
ret
urn preg_r
eplace_cal
lback('!\+
([0-9a-zA-
Z/]+)\-!',
array($th
is, 'repac
kUTF7Callb
ack'), $st
r);
|
633 |
|
}
|
|
633 |
|
}
|
634 |
|
|
|
634 |
|
|
635 |
|
/**
|
|
635 |
|
/**
|
636 |
|
* Add
itional UT
F-7 decodi
ng fuction
|
|
636 |
|
* Add
itional UT
F-7 decodi
ng fuction
|
637 |
|
*
|
|
637 |
|
*
|
638 |
|
* @pa
ram string
$str Stri
ng for rec
ode ASCII
part of UT
F-7 back t
o ASCII
|
|
638 |
|
* @pa
ram string
$str Stri
ng for rec
ode ASCII
part of UT
F-7 back t
o ASCII
|
639 |
|
* @re
turn strin
g Recoded
string
|
|
639 |
|
* @re
turn strin
g Recoded
string
|
640 |
|
* @ac
cess priva
te
|
|
640 |
|
* @ac
cess priva
te
|
641 |
|
*/
|
|
641 |
|
*/
|
642 |
|
functi
on repackU
TF7Callbac
k($str)
|
|
642 |
|
functi
on repackU
TF7Callbac
k($str)
|
643 |
|
{
|
|
643 |
|
{
|
644 |
|
$st
r = base64
_decode($s
tr[1]);
|
|
644 |
|
$st
r = base64
_decode($s
tr[1]);
|
645 |
|
$st
r = preg_r
eplace_cal
lback('/^(
(?:\x00.)*
)((?:[^\x0
0].)+)/',
array($thi
s, 'repack
UTF7Back')
, $str);
|
|
645 |
|
$st
r = preg_r
eplace_cal
lback('/^(
(?:\x00.)*
)((?:[^\x0
0].)+)/',
array($thi
s, 'repack
UTF7Back')
, $str);
|
646 |
|
ret
urn preg_r
eplace('/\
x00(.)/',
'$1', $str
);
|
|
646 |
|
ret
urn preg_r
eplace('/\
x00(.)/',
'$1', $str
);
|
647 |
|
}
|
|
647 |
|
}
|
648 |
|
|
|
648 |
|
|
649 |
|
/**
|
|
649 |
|
/**
|
650 |
|
* Add
itional UT
F-7 encodi
ng fuction
|
|
650 |
|
* Add
itional UT
F-7 encodi
ng fuction
|
651 |
|
*
|
|
651 |
|
*
|
652 |
|
* @pa
ram string
$str Stri
ng for rec
ode ASCII
part of UT
F-7 back t
o ASCII
|
|
652 |
|
* @pa
ram string
$str Stri
ng for rec
ode ASCII
part of UT
F-7 back t
o ASCII
|
653 |
|
* @re
turn strin
g Recoded
string
|
|
653 |
|
* @re
turn strin
g Recoded
string
|
654 |
|
* @ac
cess priva
te
|
|
654 |
|
* @ac
cess priva
te
|
655 |
|
*/
|
|
655 |
|
*/
|
656 |
|
functi
on repackU
TF7Back($s
tr)
|
|
656 |
|
functi
on repackU
TF7Back($s
tr)
|
657 |
|
{
|
|
657 |
|
{
|
658 |
|
ret
urn $str[1
].'+'.rtri
m(base64_e
ncode($str
[2]), '=')
.'-';
|
|
658 |
|
ret
urn $str[1
].'+'.rtri
m(base64_e
ncode($str
[2]), '=')
.'-';
|
659 |
|
}
|
|
659 |
|
}
|
660 |
|
}
|
|
660 |
|
}
|
661 |
|
|
|
661 |
|
|
662 |
|
/*
|
|
662 |
|
/*
|
663 |
|
* Local v
ariables:
|
|
663 |
|
* Local v
ariables:
|
664 |
|
* tab-wid
th: 4
|
|
664 |
|
* tab-wid
th: 4
|
665 |
|
* c-basic
-offset: 4
|
|
665 |
|
* c-basic
-offset: 4
|
666 |
|
* c-hangi
ng-comment
-ender-p:
nil
|
|
666 |
|
* c-hangi
ng-comment
-ender-p:
nil
|
667 |
|
* End:
|
|
667 |
|
* End:
|
668 |
|
*/
|
|
668 |
|
*/
|
669 |
|
|
|
669 |
|
|
670 |
|
?>
|
|
670 |
|
?>
|