includes/wikiengine/parse_mediawiki.php
author Dan Fuhry <dan@enanocms.org>
Tue, 16 Nov 2010 12:31:41 -0500
branch1.1.7-maintenance
changeset 1317 6012710ae538
parent 1174 def792dd9b1b
child 1217 feeb49aa6270
permissions -rw-r--r--
SECURITY: Fix SQL injection in banlist check (1.1.7 branch)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     1
<?php
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     2
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     3
/*
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     4
 * Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
1081
745200a9cc2a Fixed some upgrade bugs; added support for choosing one's own date/time formats; rebrand as 1.1.7
Dan
parents: 1078
diff changeset
     5
 * Copyright (C) 2006-2009 Dan Fuhry
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     6
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     7
 * This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     8
 * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
     9
 *
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    10
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    11
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    12
 */
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    13
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    14
class Carpenter_Parse_MediaWiki
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    15
{
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    16
  public $rules = array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    17
    'bold'   => "/'''(.+?)'''/",
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    18
    'italic' => "/''(.+?)''/",
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    19
    'underline' => '/__(.+?)__/',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    20
    'externalwithtext' => '#\[((?:https?|irc|ftp)://.+?) (.+?)\]#',
1106
01315acbc22b Wikitext: added horizontal rule support
Dan
parents: 1081
diff changeset
    21
    'externalnotext' => '#\[((?:https?|irc|ftp)://.+?)\]#',
1156
417e66a664d0 Wikitext parser: re-added mailto support
Dan
parents: 1138
diff changeset
    22
    'mailtonotext' => '#\[mailto:([^ \]]+?)\]#',
417e66a664d0 Wikitext parser: re-added mailto support
Dan
parents: 1138
diff changeset
    23
    'mailtowithtext' => '#\[mailto:([^ \]]+?) (.+?)\]#',
1174
def792dd9b1b Wikitext parer: re-added the <code> tag thing
Dan
parents: 1156
diff changeset
    24
    'hr' => '/^[-]{4,} *$/m',
def792dd9b1b Wikitext parer: re-added the <code> tag thing
Dan
parents: 1156
diff changeset
    25
    'code' => '/^<code>(?:\r?\n)?(.+?)(?:\r?\n)?<\/code>$/mis'
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    26
  );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    27
  
1078
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
    28
  private $blockquote_rand_id;
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
    29
  
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    30
  public function lang(&$text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    31
  {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    32
    global $lang;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    33
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    34
    preg_match_all('/<lang (?:code|id)="([a-z0-9_-]+)">([\w\W]+?)<\/lang>/', $text, $langmatch);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    35
    foreach ( $langmatch[0] as $i => $match )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    36
    {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    37
      if ( $langmatch[1][$i] == $lang->lang_code )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    38
      {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    39
        $text = str_replace_once($match, $langmatch[2][$i], $text);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    40
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    41
      else
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    42
      {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    43
        $text = str_replace_once($match, '', $text);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    44
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    45
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    46
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    47
    return array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    48
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    49
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    50
  public function templates(&$text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    51
  {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    52
    $template_regex = "/\{\{(.+)((\n|\|[ ]*([A-z0-9]+)[ ]*=[ ]*(.+))*)\}\}/isU";
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    53
    $i = 0;
1054
e6b14d33ac55 Renderer: added "smart paragraphs" for templates. <p><b>Foo</b> {bar}</p> where bar is multiline is basically turned into proper XHTML paragraphs.
Dan
parents: 1044
diff changeset
    54
    while ( preg_match($template_regex, $text, $match) )
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    55
    {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    56
      $i++;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    57
      if ( $i == 5 )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    58
        break;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    59
      $text = RenderMan::include_templates($text);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    60
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    61
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    62
    return array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    63
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    64
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    65
  public function heading(&$text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    66
  {
1031
8a4b75e73137 Wiki formatting: Headings: tolerate spaces after line; added disable_rule method (required for rev. 1029)
Dan
parents: 1027
diff changeset
    67
    if ( !preg_match_all('/^(={1,6}) *(.+?) *\\1 *$/m', $text, $results) )
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    68
      return array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    69
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    70
    $headings = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    71
    foreach ( $results[0] as $i => $match )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    72
    {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    73
      $headings[] = array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    74
          'level' => strlen($results[1][$i]),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    75
          'text' => $results[2][$i]
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    76
        );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    77
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    78
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    79
    $text = Carpenter::tokenize($text, $results[0]);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    80
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    81
    return $headings;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    82
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    83
  
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    84
  public function multilist(&$text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    85
  {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    86
    // Match entire lists
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    87
    $regex = '/^
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    88
                ([:#\*])+     # Initial list delimiter
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    89
                [ ]*
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    90
                .+?
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    91
                (?:
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    92
                  \r?\n
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    93
                  (?:\\1|[ ]{2,})
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    94
                  [ ]*
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    95
                  .+?)*
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    96
                $/mx';
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    97
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    98
    if ( !preg_match_all($regex, $text, $lists) )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
    99
      return array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   100
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   101
    $types = array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   102
        '*' => 'unordered',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   103
        '#' => 'ordered',
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   104
        ':' => 'indent'
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   105
      );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   106
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   107
    $pieces = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   108
    foreach ( $lists[0] as $i => $list )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   109
    {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   110
      $token = $lists[1][$i];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   111
      $piece = array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   112
          'type' => $types[$token],
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   113
          'items' => array()
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   114
        );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   115
      
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   116
      // convert windows newlines to unix
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   117
      $list = str_replace("\r\n", "\n", $list);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   118
      $items_pre = explode("\n", $list);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   119
      $items = array();
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   120
      // first pass, go through and combine items that are newlined
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   121
      foreach ( $items_pre as $item )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   122
      {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   123
        if ( substr($item, 0, 1) == $token )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   124
        {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   125
          $items[] = $item;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   126
        }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   127
        else
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   128
        {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   129
          // it's a continuation of the previous LI. Don't need to worry about
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   130
          // undefined indices here since the regex should filter out all invalid
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   131
          // markup. Just append this line to the previous.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   132
          $items[ count($items) - 1 ] .= "\n" . trim($item);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   133
        }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   134
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   135
      
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   136
      // second pass, separate items and tokens
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   137
      unset($items_pre);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   138
      foreach ( $items as $item )
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   139
      {
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   140
        // get the depth
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   141
        $itemtoken = preg_replace('/^([#:\*]+).*$/s', '$1', $item);
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   142
        // get the text
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   143
        $itemtext = trim(substr($item, strlen($itemtoken)));
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   144
        $piece['items'][] = array(
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   145
            // depth starts at 1
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   146
            'depth' => strlen($itemtoken),
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   147
            'text' => $itemtext
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   148
          );
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   149
      }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   150
      $pieces[] = $piece;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   151
    }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   152
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   153
    $text = Carpenter::tokenize($text, $lists[0]);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   154
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   155
    return $pieces;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   156
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   157
  
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   158
  public function blockquote(&$text)
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   159
  {
1078
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   160
    $rand_id = hexencode(AESCrypt::randkey(16), '', '');
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   161
    
1078
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   162
    while ( preg_match_all('/^(?:(>+) *.+(?:\r?\n|$))+/m', $text, $quotes) )
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   163
    {
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   164
      foreach ( $quotes[0] as $quote )
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   165
      {
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   166
        $piece = trim(preg_replace('/^> */m', '', $quote));
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   167
        $text = str_replace_once($quote, "{blockquote:$rand_id}\n$piece\n{/blockquote:$rand_id}\n", $text);
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   168
      }
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   169
    }
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   170
    //die('<pre>' . htmlspecialchars($text) . '</pre>');
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   171
    
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   172
    $this->blockquote_rand_id = $rand_id;
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   173
  }
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   174
  
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   175
  public function blockquotepost(&$text)
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   176
  {
67a4c839c7e1 Blockquote functionality in wikitext parser now allows rendering of other block level elements properly
Dan
parents: 1073
diff changeset
   177
    return $this->blockquote_rand_id;
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   178
  }
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   179
  
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   180
  public function paragraph(&$text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   181
  {
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   182
    // The trick with paragraphs is to not turn things into them when a block level element already wraps the block of text.
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   183
    // First we need a list of block level elements (http://htmlhelp.com/reference/html40/block.html + some Enano extensions)
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   184
    $blocklevel = 'address|blockquote|center|code|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|li|ol|p|pre|table|ul|tr|td|th|tbody|thead|tfoot';
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   185
    
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   186
    // Wrap all block level tags
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   187
    RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
1138
a7b490f0c418 parse_mediawiki: Marked the paragraph bug as non-blocker, delayed until RC1. I have higher priorities than a minor win32 only parsing issue.
Dan
parents: 1134
diff changeset
   188
    
1127
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   189
    // I'm not sure why I had to go through all these alternatives. Trying to bring it
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   190
    // all down to one by ?'ing subpatterns was causing things to return empty and throwing
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   191
    // errors in the parser. Eventually, around ~3:57AM I just settled on this motherf---er
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   192
    // of a regular expression.
1138
a7b490f0c418 parse_mediawiki: Marked the paragraph bug as non-blocker, delayed until RC1. I have higher priorities than a minor win32 only parsing issue.
Dan
parents: 1134
diff changeset
   193
    
a7b490f0c418 parse_mediawiki: Marked the paragraph bug as non-blocker, delayed until RC1. I have higher priorities than a minor win32 only parsing issue.
Dan
parents: 1134
diff changeset
   194
    // FIXME: This regexp triggers a known PHP stack size issue under win32 and possibly
a7b490f0c418 parse_mediawiki: Marked the paragraph bug as non-blocker, delayed until RC1. I have higher priorities than a minor win32 only parsing issue.
Dan
parents: 1134
diff changeset
   195
    // other platforms (<http://bugs.php.net/bug.php?id=47689>). The workaround is going to
a7b490f0c418 parse_mediawiki: Marked the paragraph bug as non-blocker, delayed until RC1. I have higher priorities than a minor win32 only parsing issue.
Dan
parents: 1134
diff changeset
   196
    // involve writing our own parser that takes care of recursion without using the stack,
a7b490f0c418 parse_mediawiki: Marked the paragraph bug as non-blocker, delayed until RC1. I have higher priorities than a minor win32 only parsing issue.
Dan
parents: 1134
diff changeset
   197
    // which is going to be a bitch, and may not make it in until Caoineag RCs.
a7b490f0c418 parse_mediawiki: Marked the paragraph bug as non-blocker, delayed until RC1. I have higher priorities than a minor win32 only parsing issue.
Dan
parents: 1134
diff changeset
   198
    
1127
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   199
    $regex = ";
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   200
              <($blocklevel)
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   201
              (?:
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   202
                # self closing, no attributes
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   203
                [ ]*/>
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   204
              |
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   205
                # self closing, attributes
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   206
                [ ][^>]+? />
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   207
              |
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   208
                # with inner text, no attributes
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   209
                >
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   210
                (?: (?R) | .*? )*</\\1>
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   211
              |
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   212
                # with inner text and attributes
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   213
                [ ][^>]+?     # attributes
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   214
                >
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   215
                (?: (?R) | .*? )*</\\1>
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   216
              )
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   217
                ;sx";
4b858862c35c More parser work: fixed a few bugs with [[intlinks]] in headers, a bug that caused the paragraph parser to return an empty string, and added a warning/backup-and-restore for when a render stage returns an empty string.
Dan
parents: 1106
diff changeset
   218
                
1131
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   219
    // oh. and we're using this tokens thing because for identical matches, the first match will
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   220
    // get wrapped X number of times instead of all matches getting wrapped once; replacing each
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   221
    // with a unique token id remedies this
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   222
    
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   223
    $tokens = array();
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   224
    $rand_id = sha1(microtime() . mt_rand());
1134
4fdd92da4fe8 Hack: temporarily disabled PCRE recursion for Win32 in the paragraph block tag parser until a better parser can be written/adapted
dan
parents: 1131
diff changeset
   225
    
4fdd92da4fe8 Hack: temporarily disabled PCRE recursion for Win32 in the paragraph block tag parser until a better parser can be written/adapted
dan
parents: 1131
diff changeset
   226
    // Temporary hack to fix crashes under win32. Sometime I'll write a loop based
4fdd92da4fe8 Hack: temporarily disabled PCRE recursion for Win32 in the paragraph block tag parser until a better parser can be written/adapted
dan
parents: 1131
diff changeset
   227
    // parser for this whole section. Maybe. Perhaps the Apache folks will fix their
4fdd92da4fe8 Hack: temporarily disabled PCRE recursion for Win32 in the paragraph block tag parser until a better parser can be written/adapted
dan
parents: 1131
diff changeset
   228
    // Windows binaries first.
4fdd92da4fe8 Hack: temporarily disabled PCRE recursion for Win32 in the paragraph block tag parser until a better parser can be written/adapted
dan
parents: 1131
diff changeset
   229
    if ( PHP_OS == 'WIN32' || PHP_OS == 'WINNT' )
4fdd92da4fe8 Hack: temporarily disabled PCRE recursion for Win32 in the paragraph block tag parser until a better parser can be written/adapted
dan
parents: 1131
diff changeset
   230
    {
4fdd92da4fe8 Hack: temporarily disabled PCRE recursion for Win32 in the paragraph block tag parser until a better parser can be written/adapted
dan
parents: 1131
diff changeset
   231
      $regex = str_replace("(?: (?R) | .*? )*", "(?: .*? )", $regex);
4fdd92da4fe8 Hack: temporarily disabled PCRE recursion for Win32 in the paragraph block tag parser until a better parser can be written/adapted
dan
parents: 1131
diff changeset
   232
    }
1130
c308b471ed82 OK, I'm done with the preg_replace() in the paragraph parser. It's too buggy. Replaced with preg_match_all()/str_replace_once().
Dan
parents: 1127
diff changeset
   233
    if ( preg_match_all($regex, $text, $matches) )
c308b471ed82 OK, I'm done with the preg_replace() in the paragraph parser. It's too buggy. Replaced with preg_match_all()/str_replace_once().
Dan
parents: 1127
diff changeset
   234
    {
1131
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   235
      foreach ( $matches[0] as $i => $match )
1130
c308b471ed82 OK, I'm done with the preg_replace() in the paragraph parser. It's too buggy. Replaced with preg_match_all()/str_replace_once().
Dan
parents: 1127
diff changeset
   236
      {
1131
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   237
        $text = str_replace_once($match, "{_pb_:$rand_id:$i}", $text);
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   238
        $tokens[$i] = '<_paragraph_bypass>' . $match . '</_paragraph_bypass>';
1130
c308b471ed82 OK, I'm done with the preg_replace() in the paragraph parser. It's too buggy. Replaced with preg_match_all()/str_replace_once().
Dan
parents: 1127
diff changeset
   239
      }
c308b471ed82 OK, I'm done with the preg_replace() in the paragraph parser. It's too buggy. Replaced with preg_match_all()/str_replace_once().
Dan
parents: 1127
diff changeset
   240
    }
c308b471ed82 OK, I'm done with the preg_replace() in the paragraph parser. It's too buggy. Replaced with preg_match_all()/str_replace_once().
Dan
parents: 1127
diff changeset
   241
    
1131
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   242
    foreach ( $tokens as $i => $match )
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   243
    {
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   244
      $text = str_replace_once("{_pb_:$rand_id:$i}", $match, $text);
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   245
    }
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   246
    
adfbe522c95f Another fix to paragraph bypass behavior, for when the same substring appears more than once in the text
Dan
parents: 1130
diff changeset
   247
    // die('<pre>' . htmlspecialchars($text) . '</pre>');
1134
4fdd92da4fe8 Hack: temporarily disabled PCRE recursion for Win32 in the paragraph block tag parser until a better parser can be written/adapted
dan
parents: 1131
diff changeset
   248
	
1073
b19a9bcb6a45 More work on rendering engine. Fixed some bugs with paragraph skipping and added (incomplete) support for blockquotes.
Dan
parents: 1054
diff changeset
   249
    RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw, true);
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   250
    
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   251
    // This is potentially a hack. It allows the parser to stick in <_paragraph_bypass> tags
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   252
    // to prevent the paragraph parser from interfering with pretty HTML generated elsewhere.
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   253
    RenderMan::tag_strip('_paragraph_bypass', $text, $_nw);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   254
    
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   255
    $startcond = "(?!(?:[\\r\\n]|\{_paragraph_bypass:[a-f0-9]{32}:[0-9]+\}|[ ]*<\/?(?:$blocklevel)(?: .+>|>)))";
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   256
    $regex = "/^
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   257
                $startcond        # line start condition - do not match if the line starts with the condition above
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   258
                .+?               # body text
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   259
                (?:
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   260
                  \\n             # additional lines
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   261
                  $startcond      # make sure of only one newline in a row, and end the paragraph if a new line fails the start condition
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   262
                  .*?
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   263
                )*                # keep going until it fails
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   264
              $
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   265
              /mx";
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   266
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   267
    if ( !preg_match_all($regex, $text, $matches) )
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   268
    {
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   269
      RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   270
      return array();
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   271
    }
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   272
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   273
    // Debugging :)
1044
ad6a22377507 Wiki engine: improved behavior in block level element finding/wrapping algorithm
Dan
parents: 1031
diff changeset
   274
    // die('<pre>' . htmlspecialchars($text) . "\n-----------------------------------------------------------\n" . htmlspecialchars(print_r($matches, true)) . '</pre>');
1027
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   275
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   276
    // restore stripped
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   277
    RenderMan::tag_unstrip('_paragraph_bypass', $text, $_nw);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   278
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   279
    // tokenize
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   280
    $text = Carpenter::tokenize($text, $matches[0]);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   281
    
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   282
    return $matches[0];
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   283
  }
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   284
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   285
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   286
function parser_mediawiki_xhtml_image($text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   287
{
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   288
  $text = RenderMan::process_image_tags($text, $taglist);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   289
  $text = RenderMan::process_imgtags_stage2($text, $taglist);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   290
  return $text;
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   291
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   292
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   293
function parser_mediawiki_xhtml_tables($text)
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   294
{
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   295
  return process_tables($text);
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   296
}
98c052fc3337 First implementation of new parser; Text_Wiki is now gone. VERY BETA! WiP.
Dan
parents:
diff changeset
   297