includes/functions.php
changeset 162 ad00dc1f8706
parent 145 6f0bbf88c325
child 163 54c79adfb694
equal deleted inserted replaced
161:e1a22031b5bd 162:ad00dc1f8706
  1787  * @return string cleaned HTML
  1787  * @return string cleaned HTML
  1788  */
  1788  */
  1789 
  1789 
  1790 function sanitize_html($html, $filter_php = true)
  1790 function sanitize_html($html, $filter_php = true)
  1791 {
  1791 {
       
  1792   // Random seed for substitution
       
  1793   $rand_seed = md5( sha1(microtime()) . mt_rand() );
       
  1794   
       
  1795   // Strip out comments that are already escaped
       
  1796   preg_match_all('/<!--(.*?)-->/', $html, $comment_match);
       
  1797   $i = 0;
       
  1798   foreach ( $comment_match[0] as $comment )
       
  1799   {
       
  1800     $html = str_replace_once($comment, "{HTMLCOMMENT:$i:$rand_seed}", $html);
       
  1801     $i++;
       
  1802   }
       
  1803   
       
  1804   // Strip out code sections that will be postprocessed by Text_Wiki
       
  1805   preg_match_all(';^<code(\s[^>]*)?>((?:(?R)|.)*?)\n</code>(\s|$);msi', $html, $code_match);
       
  1806   $i = 0;
       
  1807   foreach ( $code_match[0] as $code )
       
  1808   {
       
  1809     $html = str_replace_once($code, "{TW_CODE:$i:$rand_seed}", $html);
       
  1810     $i++;
       
  1811   }
  1792 
  1812 
  1793   $html = preg_replace('#<([a-z]+)([\s]+)([^>]+?)'.htmlalternatives('javascript:').'(.+?)>(.*?)</\\1>#is', '&lt;\\1\\2\\3javascript:\\59&gt;\\60&lt;/\\1&gt;', $html);
  1813   $html = preg_replace('#<([a-z]+)([\s]+)([^>]+?)'.htmlalternatives('javascript:').'(.+?)>(.*?)</\\1>#is', '&lt;\\1\\2\\3javascript:\\59&gt;\\60&lt;/\\1&gt;', $html);
  1794   $html = preg_replace('#<([a-z]+)([\s]+)([^>]+?)'.htmlalternatives('javascript:').'(.+?)>#is', '&lt;\\1\\2\\3javascript:\\59&gt;', $html);
  1814   $html = preg_replace('#<([a-z]+)([\s]+)([^>]+?)'.htmlalternatives('javascript:').'(.+?)>#is', '&lt;\\1\\2\\3javascript:\\59&gt;', $html);
  1795 
  1815 
  1796   if($filter_php)
  1816   if($filter_php)
  1898   // The rule is so specific because everything else will have been filtered by now
  1918   // The rule is so specific because everything else will have been filtered by now
  1899   $html = preg_replace('/<(script|iframe)(.+?)src=([^>]*)</i', '&lt;\\1\\2src=\\3&lt;', $html);
  1919   $html = preg_replace('/<(script|iframe)(.+?)src=([^>]*)</i', '&lt;\\1\\2src=\\3&lt;', $html);
  1900 
  1920 
  1901   // Unstrip comments
  1921   // Unstrip comments
  1902   $html = preg_replace('/&lt;!--([^>]*?)--&gt;/i', '', $html);
  1922   $html = preg_replace('/&lt;!--([^>]*?)--&gt;/i', '', $html);
       
  1923   
       
  1924   // Restore stripped comments
       
  1925   $i = 0;
       
  1926   foreach ( $comment_match[0] as $comment )
       
  1927   {
       
  1928     $html = str_replace_once("{HTMLCOMMENT:$i:$rand_seed}", $comment, $html);
       
  1929     $i++;
       
  1930   }
       
  1931   
       
  1932   // Restore stripped code
       
  1933   $i = 0;
       
  1934   foreach ( $code_match[0] as $code )
       
  1935   {
       
  1936     $html = str_replace_once("{TW_CODE:$i:$rand_seed}", $code, $html);
       
  1937     $i++;
       
  1938   }
  1903 
  1939 
  1904   return $html;
  1940   return $html;
  1905 
  1941 
  1906 }
  1942 }
  1907 
  1943