56 * @param string $text |
56 * @param string $text |
57 * @param string $element |
57 * @param string $element |
58 * @return string |
58 * @return string |
59 */ |
59 */ |
60 function fixTagAttributes( $text, $element ) { |
60 function fixTagAttributes( $text, $element ) { |
61 if( trim( $text ) == '' ) { |
61 if( trim( $text ) == '' ) { |
62 return ''; |
62 return ''; |
63 } |
63 } |
64 |
64 |
65 $stripped = validateTagAttributes( |
65 $stripped = validateTagAttributes( |
66 decodeTagAttributes( $text ), $element ); |
66 decodeTagAttributes( $text ), $element ); |
67 |
67 |
68 $attribs = array(); |
68 $attribs = array(); |
69 foreach( $stripped as $attribute => $value ) { |
69 foreach( $stripped as $attribute => $value ) { |
70 $encAttribute = htmlspecialchars( $attribute ); |
70 $encAttribute = htmlspecialchars( $attribute ); |
71 $encValue = safeEncodeAttribute( $value ); |
71 $encValue = safeEncodeAttribute( $value ); |
72 |
72 |
73 $attribs[] = "$encAttribute=".'"'."$encValue".'"'.""; // " |
73 $attribs[] = "$encAttribute=".'"'."$encValue".'"'.""; // " |
74 } |
74 } |
75 return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : ''; |
75 return count( $attribs ) ? ' ' . implode( ' ', $attribs ) : ''; |
76 } |
76 } |
77 |
77 |
78 /** |
78 /** |
79 * Encode an attribute value for HTML tags, with extra armoring |
79 * Encode an attribute value for HTML tags, with extra armoring |
80 * against further wiki processing. |
80 * against further wiki processing. |
81 * @param $text |
81 * @param $text |
82 * @return HTML-encoded text fragment |
82 * @return HTML-encoded text fragment |
83 */ |
83 */ |
84 function safeEncodeAttribute( $text ) { |
84 function safeEncodeAttribute( $text ) { |
85 $encValue= encodeAttribute( $text ); |
85 $encValue= encodeAttribute( $text ); |
86 |
86 |
87 # Templates and links may be expanded in later parsing, |
87 # Templates and links may be expanded in later parsing, |
88 # creating invalid or dangerous output. Suppress this. |
88 # creating invalid or dangerous output. Suppress this. |
89 $encValue = strtr( $encValue, array( |
89 $encValue = strtr( $encValue, array( |
90 '<' => '<', // This should never happen, |
90 '<' => '<', // This should never happen, |
91 '>' => '>', // we've received invalid input |
91 '>' => '>', // we've received invalid input |
92 '"' => '"', // which should have been escaped. |
92 '"' => '"', // which should have been escaped. |
93 '{' => '{', |
93 '{' => '{', |
94 '[' => '[', |
94 '[' => '[', |
95 "''" => '''', |
95 "''" => '''', |
96 'ISBN' => 'ISBN', |
96 'ISBN' => 'ISBN', |
97 'RFC' => 'RFC', |
97 'RFC' => 'RFC', |
98 'PMID' => 'PMID', |
98 'PMID' => 'PMID', |
99 '|' => '|', |
99 '|' => '|', |
100 '__' => '__', |
100 '__' => '__', |
101 ) ); |
101 ) ); |
102 |
102 |
103 return $encValue; |
103 return $encValue; |
104 } |
104 } |
105 |
105 |
106 /** |
106 /** |
107 * Encode an attribute value for HTML output. |
107 * Encode an attribute value for HTML output. |
108 * @param $text |
108 * @param $text |
109 * @return HTML-encoded text fragment |
109 * @return HTML-encoded text fragment |
110 */ |
110 */ |
111 function encodeAttribute( $text ) { |
111 function encodeAttribute( $text ) { |
112 |
112 |
113 // In Enano 1.0.3, added this cheapo hack to keep ampersands |
113 // In Enano 1.0.3, added this cheapo hack to keep ampersands |
114 // from being double-sanitized. Thanks to markybob from #deluge. |
114 // from being double-sanitized. Thanks to markybob from #deluge. |
115 |
115 |
116 // htmlspecialchars() the "manual" way |
116 // htmlspecialchars() the "manual" way |
117 $encValue = strtr( $text, array( |
117 $encValue = strtr( $text, array( |
118 '&' => '&', |
118 '&' => '&', |
119 '"' => '"', |
119 '"' => '"', |
120 '<' => '<', |
120 '<' => '<', |
121 '>' => '>', |
121 '>' => '>', |
122 ''' => "'" |
122 ''' => "'" |
123 ) ); |
123 ) ); |
124 |
124 |
125 $encValue = strtr( $text, array( |
125 $encValue = strtr( $text, array( |
126 '&' => '&', |
126 '&' => '&', |
127 '"' => '"', |
127 '"' => '"', |
128 '<' => '<', |
128 '<' => '<', |
129 '>' => '>', |
129 '>' => '>', |
130 "'" => ''' |
130 "'" => ''' |
131 ) ); |
131 ) ); |
132 |
132 |
133 |
133 |
134 // Whitespace is normalized during attribute decoding, |
134 // Whitespace is normalized during attribute decoding, |
135 // so if we've been passed non-spaces we must encode them |
135 // so if we've been passed non-spaces we must encode them |
136 // ahead of time or they won't be preserved. |
136 // ahead of time or they won't be preserved. |
137 $encValue = strtr( $encValue, array( |
137 $encValue = strtr( $encValue, array( |
138 "\n" => ' ', |
138 "\n" => ' ', |
139 "\r" => ' ', |
139 "\r" => ' ', |
140 "\t" => '	', |
140 "\t" => '	', |
141 ) ); |
141 ) ); |
142 |
142 |
143 return $encValue; |
143 return $encValue; |
144 } |
144 } |
145 |
145 |
146 function unstripForHTML( $text ) { |
146 function unstripForHTML( $text ) { |
147 global $mStripState; |
147 global $mStripState; |
148 $text = unstrip( $text, $mStripState ); |
148 $text = unstrip( $text, $mStripState ); |
149 $text = unstripNoWiki( $text, $mStripState ); |
149 $text = unstripNoWiki( $text, $mStripState ); |
150 return $text; |
150 return $text; |
151 } |
151 } |
152 |
152 |
153 /** |
153 /** |
154 * Always call this after unstrip() to preserve the order |
154 * Always call this after unstrip() to preserve the order |
155 * |
155 * |
156 * @private |
156 * @private |
157 */ |
157 */ |
158 function unstripNoWiki( $text, &$state ) { |
158 function unstripNoWiki( $text, &$state ) { |
159 if ( !isset( $state['nowiki'] ) ) { |
159 if ( !isset( $state['nowiki'] ) ) { |
160 return $text; |
160 return $text; |
161 } |
161 } |
162 |
162 |
163 # TODO: good candidate for FSS |
163 # TODO: good candidate for FSS |
164 $text = strtr( $text, $state['nowiki'] ); |
164 $text = strtr( $text, $state['nowiki'] ); |
165 |
165 |
166 return $text; |
166 return $text; |
167 } |
167 } |
168 |
168 |
169 /** |
169 /** |
170 * Take an array of attribute names and values and normalize or discard |
170 * Take an array of attribute names and values and normalize or discard |
171 * illegal values for the given element type. |
171 * illegal values for the given element type. |
244 * @return string |
244 * @return string |
245 * @access public |
245 * @access public |
246 * @static |
246 * @static |
247 */ |
247 */ |
248 function decodeCharReferences( $text ) { |
248 function decodeCharReferences( $text ) { |
249 return preg_replace_callback( |
249 return preg_replace_callback( |
250 MW_CHAR_REFS_REGEX, |
250 MW_CHAR_REFS_REGEX, |
251 'decodeCharReferencesCallback', |
251 'decodeCharReferencesCallback', |
252 $text ); |
252 $text ); |
253 } |
253 } |
254 |
254 |
255 /** |
255 /** |
256 * Fetch the whitelist of acceptable attributes for a given |
256 * Fetch the whitelist of acceptable attributes for a given |
257 * element name. |
257 * element name. |
258 * |
258 * |
259 * @param string $element |
259 * @param string $element |
260 * @return array |
260 * @return array |
261 */ |
261 */ |
262 function attributeWhitelist( $element ) { |
262 function attributeWhitelist( $element ) { |
263 static $list; |
263 static $list; |
264 if( !isset( $list ) ) { |
264 if( !isset( $list ) ) { |
265 $list = setupAttributeWhitelist(); |
265 $list = setupAttributeWhitelist(); |
266 } |
266 } |
267 return isset( $list[$element] ) |
267 return isset( $list[$element] ) |
268 ? $list[$element] |
268 ? $list[$element] |
269 : array(); |
269 : array(); |
270 } |
270 } |
271 |
271 |
272 /** |
272 /** |
273 * @todo Document it a bit |
273 * @todo Document it a bit |
274 * @return array |
274 * @return array |
275 */ |
275 */ |
276 function setupAttributeWhitelist() { |
276 function setupAttributeWhitelist() { |
277 global $db, $session, $paths, $template, $plugins; |
277 global $db, $session, $paths, $template, $plugins; |
278 $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' ); |
278 $common = array( 'id', 'class', 'lang', 'dir', 'title', 'style' ); |
279 $block = array_merge( $common, array( 'align' ) ); |
279 $block = array_merge( $common, array( 'align' ) ); |
280 $tablealign = array( 'align', 'char', 'charoff', 'valign' ); |
280 $tablealign = array( 'align', 'char', 'charoff', 'valign' ); |
281 $tablecell = array( 'abbr', |
281 $tablecell = array( 'abbr', |
282 'axis', |
282 'axis', |
283 'headers', |
283 'headers', |
284 'scope', |
284 'scope', |
285 'rowspan', |
285 'rowspan', |
286 'colspan', |
286 'colspan', |
287 'nowrap', # deprecated |
287 'nowrap', # deprecated |
288 'width', # deprecated |
288 'width', # deprecated |
289 'height', # deprecated |
289 'height', # deprecated |
290 'bgcolor' # deprecated |
290 'bgcolor' # deprecated |
291 ); |
291 ); |
292 |
292 |
293 # Numbers refer to sections in HTML 4.01 standard describing the element. |
293 # Numbers refer to sections in HTML 4.01 standard describing the element. |
294 # See: http://www.w3.org/TR/html4/ |
294 # See: http://www.w3.org/TR/html4/ |
295 $whitelist = array ( |
295 $whitelist = array ( |
296 # 7.5.4 |
296 # 7.5.4 |
297 'div' => $block, |
297 'div' => $block, |
298 'center' => $common, # deprecated |
298 'center' => $common, # deprecated |
299 'span' => $block, # ?? |
299 'span' => $block, # ?? |
300 |
300 |
301 # 7.5.5 |
301 # 7.5.5 |
302 'h1' => $block, |
302 'h1' => $block, |
303 'h2' => $block, |
303 'h2' => $block, |
304 'h3' => $block, |
304 'h3' => $block, |
305 'h4' => $block, |
305 'h4' => $block, |
306 'h5' => $block, |
306 'h5' => $block, |
307 'h6' => $block, |
307 'h6' => $block, |
308 |
308 |
309 # 7.5.6 |
309 # 7.5.6 |
310 # address |
310 # address |
311 |
311 |
312 # 8.2.4 |
312 # 8.2.4 |
313 # bdo |
313 # bdo |
314 |
314 |
315 # 9.2.1 |
315 # 9.2.1 |
316 'em' => $common, |
316 'em' => $common, |
317 'strong' => $common, |
317 'strong' => $common, |
318 'cite' => $common, |
318 'cite' => $common, |
319 # dfn |
319 # dfn |
320 'code' => $common, |
320 'code' => $common, |
321 # samp |
321 # samp |
322 # kbd |
322 # kbd |
323 'var' => $common, |
323 'var' => $common, |
324 # abbr |
324 # abbr |
325 # acronym |
325 # acronym |
326 |
326 |
327 # 9.2.2 |
327 # 9.2.2 |
328 'blockquote' => array_merge( $common, array( 'cite' ) ), |
328 'blockquote' => array_merge( $common, array( 'cite' ) ), |
329 # q |
329 # q |
330 |
330 |
331 # 9.2.3 |
331 # 9.2.3 |
332 'sub' => $common, |
332 'sub' => $common, |
333 'sup' => $common, |
333 'sup' => $common, |
334 |
334 |
335 # 9.3.1 |
335 # 9.3.1 |
336 'p' => $block, |
336 'p' => $block, |
337 |
337 |
338 # 9.3.2 |
338 # 9.3.2 |
339 'br' => array( 'id', 'class', 'title', 'style', 'clear' ), |
339 'br' => array( 'id', 'class', 'title', 'style', 'clear' ), |
340 |
340 |
341 # 9.3.4 |
341 # 9.3.4 |
342 'pre' => array_merge( $common, array( 'width' ) ), |
342 'pre' => array_merge( $common, array( 'width' ) ), |
343 |
343 |
344 # 9.4 |
344 # 9.4 |
345 'ins' => array_merge( $common, array( 'cite', 'datetime' ) ), |
345 'ins' => array_merge( $common, array( 'cite', 'datetime' ) ), |
346 'del' => array_merge( $common, array( 'cite', 'datetime' ) ), |
346 'del' => array_merge( $common, array( 'cite', 'datetime' ) ), |
347 |
347 |
348 # 10.2 |
348 # 10.2 |
349 'ul' => array_merge( $common, array( 'type' ) ), |
349 'ul' => array_merge( $common, array( 'type' ) ), |
350 'ol' => array_merge( $common, array( 'type', 'start' ) ), |
350 'ol' => array_merge( $common, array( 'type', 'start' ) ), |
351 'li' => array_merge( $common, array( 'type', 'value' ) ), |
351 'li' => array_merge( $common, array( 'type', 'value' ) ), |
352 |
352 |
353 # 10.3 |
353 # 10.3 |
354 'dl' => $common, |
354 'dl' => $common, |
355 'dd' => $common, |
355 'dd' => $common, |
356 'dt' => $common, |
356 'dt' => $common, |
357 |
357 |
358 # 11.2.1 |
358 # 11.2.1 |
359 'table' => array_merge( $common, |
359 'table' => array_merge( $common, |
360 array( 'summary', 'width', 'border', 'frame', |
360 array( 'summary', 'width', 'border', 'frame', |
361 'rules', 'cellspacing', 'cellpadding', |
361 'rules', 'cellspacing', 'cellpadding', |
362 'align', 'bgcolor', |
362 'align', 'bgcolor', |
363 ) ), |
363 ) ), |
364 |
364 |
365 # 11.2.2 |
365 # 11.2.2 |
366 'caption' => array_merge( $common, array( 'align' ) ), |
366 'caption' => array_merge( $common, array( 'align' ) ), |
367 |
367 |
368 # 11.2.3 |
368 # 11.2.3 |
369 'thead' => array_merge( $common, $tablealign ), |
369 'thead' => array_merge( $common, $tablealign ), |
370 'tfoot' => array_merge( $common, $tablealign ), |
370 'tfoot' => array_merge( $common, $tablealign ), |
371 'tbody' => array_merge( $common, $tablealign ), |
371 'tbody' => array_merge( $common, $tablealign ), |
372 |
372 |
373 # 11.2.4 |
373 # 11.2.4 |
374 'colgroup' => array_merge( $common, array( 'span', 'width' ), $tablealign ), |
374 'colgroup' => array_merge( $common, array( 'span', 'width' ), $tablealign ), |
375 'col' => array_merge( $common, array( 'span', 'width' ), $tablealign ), |
375 'col' => array_merge( $common, array( 'span', 'width' ), $tablealign ), |
376 |
376 |
377 # 11.2.5 |
377 # 11.2.5 |
378 'tr' => array_merge( $common, array( 'bgcolor' ), $tablealign ), |
378 'tr' => array_merge( $common, array( 'bgcolor' ), $tablealign ), |
379 |
379 |
380 # 11.2.6 |
380 # 11.2.6 |
381 'td' => array_merge( $common, $tablecell, $tablealign ), |
381 'td' => array_merge( $common, $tablecell, $tablealign ), |
382 'th' => array_merge( $common, $tablecell, $tablealign ), |
382 'th' => array_merge( $common, $tablecell, $tablealign ), |
383 |
383 |
384 # 12.2 |
384 # 12.2 |
385 # added by dan |
385 # added by dan |
386 'a' => array_merge( $common, array( 'href', 'name' ) ), |
386 'a' => array_merge( $common, array( 'href', 'name' ) ), |
387 |
387 |
388 # 13.2 |
388 # 13.2 |
389 # added by dan |
389 # added by dan |
390 'img' => array_merge( $common, array( 'src', 'width', 'height', 'alt' ) ), |
390 'img' => array_merge( $common, array( 'src', 'width', 'height', 'alt' ) ), |
391 |
391 |
392 # 15.2.1 |
392 # 15.2.1 |
393 'tt' => $common, |
393 'tt' => $common, |
394 'b' => $common, |
394 'b' => $common, |
395 'i' => $common, |
395 'i' => $common, |
396 'big' => $common, |
396 'big' => $common, |
397 'small' => $common, |
397 'small' => $common, |
398 'strike' => $common, |
398 'strike' => $common, |
399 's' => $common, |
399 's' => $common, |
400 'u' => $common, |
400 'u' => $common, |
401 |
401 |
402 # 15.2.2 |
402 # 15.2.2 |
403 'font' => array_merge( $common, array( 'size', 'color', 'face' ) ), |
403 'font' => array_merge( $common, array( 'size', 'color', 'face' ) ), |
404 # basefont |
404 # basefont |
405 |
405 |
406 # 15.3 |
406 # 15.3 |
407 'hr' => array_merge( $common, array( 'noshade', 'size', 'width' ) ), |
407 'hr' => array_merge( $common, array( 'noshade', 'size', 'width' ) ), |
408 |
408 |
409 # XHTML Ruby annotation text module, simple ruby only. |
409 # XHTML Ruby annotation text module, simple ruby only. |
410 # http://www.w3c.org/TR/ruby/ |
410 # http://www.w3c.org/TR/ruby/ |
411 'ruby' => $common, |
411 'ruby' => $common, |
412 # rbc |
412 # rbc |
413 # rtc |
413 # rtc |
414 'rb' => $common, |
414 'rb' => $common, |
415 'rt' => $common, #array_merge( $common, array( 'rbspan' ) ), |
415 'rt' => $common, #array_merge( $common, array( 'rbspan' ) ), |
416 'rp' => $common, |
416 'rp' => $common, |
417 |
417 |
418 # For compatibility with the XHTML parser. |
418 # For compatibility with the XHTML parser. |
419 'nowiki' => array(), |
419 'nowiki' => array(), |
420 'noinclude' => array(), |
420 'noinclude' => array(), |
421 'nodisplay' => array(), |
421 'nodisplay' => array(), |
422 'lang' => array('code'), |
422 'lang' => array('code'), |
423 |
423 |
424 # XHTML stuff |
424 # XHTML stuff |
425 'acronym' => $common |
425 'acronym' => $common |
426 ); |
426 ); |
427 |
427 |
428 // custom tags can be added by plugins |
428 // custom tags can be added by plugins |
429 $code = $plugins->setHook('html_attribute_whitelist'); |
429 $code = $plugins->setHook('html_attribute_whitelist'); |
430 foreach ( $code as $cmd ) |
430 foreach ( $code as $cmd ) |
431 { |
431 { |
432 eval($cmd); |
432 eval($cmd); |
433 } |
433 } |
434 |
434 |
435 return $whitelist; |
435 return $whitelist; |
436 } |
436 } |
437 |
437 |
438 /** |
438 /** |
439 * Given a value escape it so that it can be used in an id attribute and |
439 * Given a value escape it so that it can be used in an id attribute and |
440 * return it, this does not validate the value however (see first link) |
440 * return it, this does not validate the value however (see first link) |
450 * |
450 * |
451 * @param string $id |
451 * @param string $id |
452 * @return string |
452 * @return string |
453 */ |
453 */ |
454 function escapeId( $id ) { |
454 function escapeId( $id ) { |
455 static $replace = array( |
455 static $replace = array( |
456 '%3A' => ':', |
456 '%3A' => ':', |
457 '%' => '.' |
457 '%' => '.' |
458 ); |
458 ); |
459 |
459 |
460 $id = urlencode( decodeCharReferences( strtr( $id, ' ', '_' ) ) ); |
460 $id = urlencode( decodeCharReferences( strtr( $id, ' ', '_' ) ) ); |
461 |
461 |
462 return str_replace( array_keys( $replace ), array_values( $replace ), $id ); |
462 return str_replace( array_keys( $replace ), array_values( $replace ), $id ); |
463 } |
463 } |
464 |
464 |
465 /** |
465 /** |
466 * More or less "markup-safe" explode() |
466 * More or less "markup-safe" explode() |
467 * Ignores any instances of the separator inside <...> |
467 * Ignores any instances of the separator inside <...> |
468 * @param string $separator |
468 * @param string $separator |
469 * @param string $text |
469 * @param string $text |
470 * @return array |
470 * @return array |
471 */ |
471 */ |
472 function wfExplodeMarkup( $separator, $text ) { |
472 function wfExplodeMarkup( $separator, $text ) { |
473 $placeholder = "\x00"; |
473 $placeholder = "\x00"; |
474 |
474 |
475 // Just in case... |
475 // Just in case... |
476 $text = str_replace( $placeholder, '', $text ); |
476 $text = str_replace( $placeholder, '', $text ); |
477 |
477 |
478 // Trim stuff |
478 // Trim stuff |
479 $replacer = new ReplacerCallback( $separator, $placeholder ); |
479 $replacer = new ReplacerCallback( $separator, $placeholder ); |
480 $cleaned = preg_replace_callback( '/(<.*?>)/', array( $replacer, 'go' ), $text ); |
480 $cleaned = preg_replace_callback( '/(<.*?>)/', array( $replacer, 'go' ), $text ); |
481 |
481 |
482 $items = explode( $separator, $cleaned ); |
482 $items = explode( $separator, $cleaned ); |
483 foreach( $items as $i => $str ) { |
483 foreach( $items as $i => $str ) { |
484 $items[$i] = str_replace( $placeholder, $separator, $str ); |
484 $items[$i] = str_replace( $placeholder, $separator, $str ); |
485 } |
485 } |
486 |
486 |
487 return $items; |
487 return $items; |
488 } |
488 } |
489 |
489 |
490 class ReplacerCallback { |
490 class ReplacerCallback { |
491 function ReplacerCallback( $from, $to ) { |
491 function ReplacerCallback( $from, $to ) { |
492 $this->from = $from; |
492 $this->from = $from; |
493 $this->to = $to; |
493 $this->to = $to; |
494 } |
494 } |
495 |
495 |
496 function go( $matches ) { |
496 function go( $matches ) { |
497 return str_replace( $this->from, $this->to, $matches[1] ); |
497 return str_replace( $this->from, $this->to, $matches[1] ); |
498 } |
498 } |
499 } |
499 } |
500 |
500 |
501 /** |
501 /** |
502 * Return an associative array of attribute names and values from |
502 * Return an associative array of attribute names and values from |
503 * a partial tag string. Attribute names are forces to lowercase, |
503 * a partial tag string. Attribute names are forces to lowercase, |
581 * used to prevent stipping of <gallery> when saving (fixes bug 2700) |
581 * used to prevent stipping of <gallery> when saving (fixes bug 2700) |
582 * |
582 * |
583 * @access private |
583 * @access private |
584 */ |
584 */ |
585 function mwStrip( $text, &$state, $stripcomments = false , $dontstrip = array () ) { |
585 function mwStrip( $text, &$state, $stripcomments = false , $dontstrip = array () ) { |
586 global $wgRandomKey; |
586 global $wgRandomKey; |
587 $render = true; |
587 $render = true; |
588 |
588 |
589 $wgRandomKey = "\x07UNIQ" . dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff)); |
589 $wgRandomKey = "\x07UNIQ" . dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff)); |
590 $uniq_prefix =& $wgRandomKey; |
590 $uniq_prefix =& $wgRandomKey; |
591 $commentState = array(); |
591 $commentState = array(); |
592 |
592 |
593 $elements = array( 'nowiki', 'gallery' ); |
593 $elements = array( 'nowiki', 'gallery' ); |
594 |
594 |
595 # Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700) |
595 # Removing $dontstrip tags from $elements list (currently only 'gallery', fixing bug 2700) |
596 foreach ( $elements AS $k => $v ) { |
596 foreach ( $elements AS $k => $v ) { |
597 if ( !in_array ( $v , $dontstrip ) ) continue; |
597 if ( !in_array ( $v , $dontstrip ) ) continue; |
598 unset ( $elements[$k] ); |
598 unset ( $elements[$k] ); |
599 } |
599 } |
600 |
600 |
601 $matches = array(); |
601 $matches = array(); |
602 $text = extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); |
602 $text = extractTagsAndParams( $elements, $text, $matches, $uniq_prefix ); |
603 |
603 |
604 foreach( $matches as $marker => $data ) { |
604 foreach( $matches as $marker => $data ) { |
605 list( $element, $content, $params, $tag ) = $data; |
605 list( $element, $content, $params, $tag ) = $data; |
606 if( $render ) { |
606 if( $render ) { |
607 $tagName = strtolower( $element ); |
607 $tagName = strtolower( $element ); |
608 switch( $tagName ) { |
608 switch( $tagName ) { |
609 case '!--': |
609 case '!--': |
610 // Comment |
610 // Comment |
611 if( substr( $tag, -3 ) == '-->' ) { |
611 if( substr( $tag, -3 ) == '-->' ) { |
612 $output = $tag; |
612 $output = $tag; |
613 } else { |
613 } else { |
614 // Unclosed comment in input. |
614 // Unclosed comment in input. |
615 // Close it so later stripping can remove it |
615 // Close it so later stripping can remove it |
616 $output = "$tag-->"; |
616 $output = "$tag-->"; |
617 } |
617 } |
618 break; |
618 break; |
619 case 'html': |
619 case 'html': |
620 if( $wgRawHtml ) { |
620 if( $wgRawHtml ) { |
621 $output = $content; |
621 $output = $content; |
622 break; |
622 break; |
623 } |
623 } |
624 // Shouldn't happen otherwise. :) |
624 // Shouldn't happen otherwise. :) |
625 case 'nowiki': |
625 case 'nowiki': |
626 $output = wfEscapeHTMLTagsOnly( $content ); |
626 $output = wfEscapeHTMLTagsOnly( $content ); |
627 break; |
627 break; |
628 default: |
628 default: |
629 } |
629 } |
630 } else { |
630 } else { |
631 // Just stripping tags; keep the source |
631 // Just stripping tags; keep the source |
632 $output = $tag; |
632 $output = $tag; |
633 } |
633 } |
634 |
634 |
635 // Unstrip the output, because unstrip() is no longer recursive so |
635 // Unstrip the output, because unstrip() is no longer recursive so |
636 // it won't do it itself |
636 // it won't do it itself |
637 $output = unstrip( $output, $state ); |
637 $output = unstrip( $output, $state ); |
638 |
638 |
639 if( !$stripcomments && $element == '!--' ) { |
639 if( !$stripcomments && $element == '!--' ) { |
640 $commentState[$marker] = $output; |
640 $commentState[$marker] = $output; |
641 } elseif ( $element == 'html' || $element == 'nowiki' ) { |
641 } elseif ( $element == 'html' || $element == 'nowiki' ) { |
642 $state['nowiki'][$marker] = $output; |
642 $state['nowiki'][$marker] = $output; |
643 } else { |
643 } else { |
644 $state['general'][$marker] = $output; |
644 $state['general'][$marker] = $output; |
645 } |
645 } |
646 } |
646 } |
647 |
647 |
648 # Unstrip comments unless explicitly told otherwise. |
648 # Unstrip comments unless explicitly told otherwise. |
649 # (The comments are always stripped prior to this point, so as to |
649 # (The comments are always stripped prior to this point, so as to |
650 # not invoke any extension tags / parser hooks contained within |
650 # not invoke any extension tags / parser hooks contained within |
651 # a comment.) |
651 # a comment.) |
652 if ( !$stripcomments ) { |
652 if ( !$stripcomments ) { |
653 // Put them all back and forget them |
653 // Put them all back and forget them |
654 $text = strtr( $text, $commentState ); |
654 $text = strtr( $text, $commentState ); |
655 } |
655 } |
656 |
656 |
657 return $text; |
657 return $text; |
658 } |
658 } |
659 |
659 |
660 /** |
660 /** |
661 * Replaces all occurrences of HTML-style comments and the given tags |
661 * Replaces all occurrences of HTML-style comments and the given tags |
662 * in the text with a random marker and returns teh next text. The output |
662 * in the text with a random marker and returns teh next text. The output |
674 * |
674 * |
675 * @access private |
675 * @access private |
676 * @static |
676 * @static |
677 */ |
677 */ |
678 function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){ |
678 function extractTagsAndParams($elements, $text, &$matches, $uniq_prefix = ''){ |
679 static $n = 1; |
679 static $n = 1; |
680 $stripped = ''; |
680 $stripped = ''; |
681 $matches = array(); |
681 $matches = array(); |
682 |
682 |
683 $taglist = implode( '|', $elements ); |
683 $taglist = implode( '|', $elements ); |
684 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i"; |
684 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i"; |
685 |
685 |
686 while ( '' != $text ) { |
686 while ( '' != $text ) { |
687 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); |
687 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE ); |
688 $stripped .= $p[0]; |
688 $stripped .= $p[0]; |
689 if( count( $p ) < 5 ) { |
689 if( count( $p ) < 5 ) { |
690 break; |
690 break; |
691 } |
691 } |
692 if( count( $p ) > 5 ) { |
692 if( count( $p ) > 5 ) { |
693 // comment |
693 // comment |
694 $element = $p[4]; |
694 $element = $p[4]; |
695 $attributes = ''; |
695 $attributes = ''; |
696 $close = ''; |
696 $close = ''; |
697 $inside = $p[5]; |
697 $inside = $p[5]; |
698 } else { |
698 } else { |
699 // tag |
699 // tag |
700 $element = $p[1]; |
700 $element = $p[1]; |
701 $attributes = $p[2]; |
701 $attributes = $p[2]; |
702 $close = $p[3]; |
702 $close = $p[3]; |
703 $inside = $p[4]; |
703 $inside = $p[4]; |
704 } |
704 } |
705 |
705 |
706 $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU'; |
706 $marker = "$uniq_prefix-$element-" . sprintf('%08X', $n++) . '-QINU'; |
707 $stripped .= $marker; |
707 $stripped .= $marker; |
708 |
708 |
709 if ( $close === '/>' ) { |
709 if ( $close === '/>' ) { |
710 // Empty element tag, <tag /> |
710 // Empty element tag, <tag /> |
711 $content = null; |
711 $content = null; |
712 $text = $inside; |
712 $text = $inside; |
713 $tail = null; |
713 $tail = null; |
714 } else { |
714 } else { |
715 if( $element == '!--' ) { |
715 if( $element == '!--' ) { |
716 $end = '/(-->)/'; |
716 $end = '/(-->)/'; |
717 } else { |
717 } else { |
718 $end = "/(<\\/$element\\s*>)/i"; |
718 $end = "/(<\\/$element\\s*>)/i"; |
719 } |
719 } |
720 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); |
720 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE ); |
721 $content = $q[0]; |
721 $content = $q[0]; |
722 if( count( $q ) < 3 ) { |
722 if( count( $q ) < 3 ) { |
723 # No end tag -- let it run out to the end of the text. |
723 # No end tag -- let it run out to the end of the text. |
724 $tail = ''; |
724 $tail = ''; |
725 $text = ''; |
725 $text = ''; |
726 } else { |
726 } else { |
727 $tail = $q[1]; |
727 $tail = $q[1]; |
728 $text = $q[2]; |
728 $text = $q[2]; |
729 } |
729 } |
730 } |
730 } |
731 |
731 |
732 $matches[$marker] = array( $element, |
732 $matches[$marker] = array( $element, |
733 $content, |
733 $content, |
734 decodeTagAttributes( $attributes ), |
734 decodeTagAttributes( $attributes ), |
735 "<$element$attributes$close$content$tail" ); |
735 "<$element$attributes$close$content$tail" ); |
736 } |
736 } |
737 return $stripped; |
737 return $stripped; |
738 } |
738 } |
739 |
739 |
740 /** |
740 /** |
741 * Escape html tags |
741 * Escape html tags |
742 * Basically replacing " > and < with HTML entities ( ", >, <) |
742 * Basically replacing " > and < with HTML entities ( ", >, <) |
743 * |
743 * |
744 * @param $in String: text that might contain HTML tags. |
744 * @param $in String: text that might contain HTML tags. |
745 * @return string Escaped string |
745 * @return string Escaped string |
746 */ |
746 */ |
747 function wfEscapeHTMLTagsOnly( $in ) { |
747 function wfEscapeHTMLTagsOnly( $in ) { |
748 return str_replace( |
748 return str_replace( |
749 array( '"', '>', '<' ), |
749 array( '"', '>', '<' ), |
750 array( '"', '>', '<' ), |
750 array( '"', '>', '<' ), |
751 $in ); |
751 $in ); |
752 } |
752 } |
753 |
753 |
754 /** |
754 /** |
755 * Restores pre, math, and other extensions removed by strip() |
755 * Restores pre, math, and other extensions removed by strip() |
756 * |
756 * |
757 * always call unstripNoWiki() after this one |
757 * always call unstripNoWiki() after this one |
758 * @private |
758 * @private |
759 */ |
759 */ |
760 function unstrip( $text, &$state ) { |
760 function unstrip( $text, &$state ) { |
761 if ( !isset( $state['general'] ) ) { |
761 if ( !isset( $state['general'] ) ) { |
762 return $text; |
762 return $text; |
763 } |
763 } |
764 |
764 |
765 # TODO: good candidate for FSS |
765 # TODO: good candidate for FSS |
766 $text = strtr( $text, $state['general'] ); |
766 $text = strtr( $text, $state['general'] ); |
767 |
767 |
768 return $text; |
768 return $text; |
769 } |
769 } |
770 |
770 |
771 /** |
771 /** |
772 * Return UTF-8 string for a codepoint if that is a valid |
772 * Return UTF-8 string for a codepoint if that is a valid |
773 * character reference, otherwise U+FFFD REPLACEMENT CHARACTER. |
773 * character reference, otherwise U+FFFD REPLACEMENT CHARACTER. |
774 * @param int $codepoint |
774 * @param int $codepoint |
775 * @return string |
775 * @return string |
776 * @private |
776 * @private |
777 */ |
777 */ |
778 function decodeChar( $codepoint ) { |
778 function decodeChar( $codepoint ) { |
779 if( validateCodepoint( $codepoint ) ) { |
779 if( validateCodepoint( $codepoint ) ) { |
780 return codepointToUtf8( $codepoint ); |
780 return codepointToUtf8( $codepoint ); |
781 } else { |
781 } else { |
782 return UTF8_REPLACEMENT; |
782 return UTF8_REPLACEMENT; |
783 } |
783 } |
784 } |
784 } |
785 |
785 |
786 /** |
786 /** |
787 * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD, |
787 * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD, |
788 * return the UTF-8 encoding of that character. Otherwise, returns |
788 * return the UTF-8 encoding of that character. Otherwise, returns |