includes/wikiengine/parse_mediawiki.php
changeset 1142 c522ea3871a3
parent 1138 a7b490f0c418
child 1156 417e66a664d0
equal deleted inserted replaced
1141:5a858d6f3634 1142:c522ea3871a3
   180     // First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html + some Enano extensions)
   180     // First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html + some Enano extensions)
   181     $blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot';
   181     $blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot';
   182     
   182     
   183     // Wrap all block level tags
   183     // Wrap all block level tags
   184     RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
   184     RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
       
   185     
   185     // I'm not sure why I had to go through all these alternatives. Trying to bring it
   186     // I'm not sure why I had to go through all these alternatives. Trying to bring it
   186     // all down to one by ?'ing subpatterns was causing things to return empty and throwing
   187     // all down to one by ?'ing subpatterns was causing things to return empty and throwing
   187     // errors in the parser. Eventually, around ~3:57AM I just settled on this motherf---er
   188     // errors in the parser. Eventually, around ~3:57AM I just settled on this motherf---er
   188     // of a regular expression.
   189     // of a regular expression.
       
   190     
       
   191     // FIXME: This regexp triggers a known PHP stack size issue under win32 and possibly
       
   192     // other platforms (<http://bugs.php.net/bug.php?id=47689>). The workaround is going to
       
   193     // involve writing our own parser that takes care of recursion without using the stack,
       
   194     // which is going to be a bitch, and may not make it in until Caoineag RCs.
       
   195     
   189     $regex = ";
   196     $regex = ";
   190               <($blocklevel)
   197               <($blocklevel)
   191               (?:
   198               (?:
   192                 # self closing, no attributes
   199                 # self closing, no attributes
   193                 [ ]*/>
   200                 [ ]*/>
   204                 >
   211                 >
   205                 (?: (?R) | .*? )*</\\1>
   212                 (?: (?R) | .*? )*</\\1>
   206               )
   213               )
   207                 ;sx";
   214                 ;sx";
   208                 
   215                 
   209     // using preg_replace here sometimes gives us empty strings probably because we're using $0
       
   210     // in the replace formatter. so we'll just take care of it explicitly here with preg_match_all
       
   211     // and good ole str_replace_once.
       
   212     
       
   213     // FIXME this regexp can cause crashes under win32 PHP due to some apache limitations... possibly
       
   214     // write a non-regexp based replacement. same bug as the comment block above, apparently
       
   215     
       
   216     // oh. and we're using this tokens thing because for identical matches, the first match will
   216     // oh. and we're using this tokens thing because for identical matches, the first match will
   217     // get wrapped X number of times instead of all matches getting wrapped once; replacing each
   217     // get wrapped X number of times instead of all matches getting wrapped once; replacing each
   218     // with a unique token id remedies this
   218     // with a unique token id remedies this
   219     
   219     
   220     $tokens = array();
   220     $tokens = array();