<?php
/**
*
* Parse for URLS in the source text.
*
* @category Text
*
* @package Text_Wiki
*
* @author Paul M. Jones <pmjones@php.net>
*
* @license LGPL
*
* @version $Id: Url.php,v 1.3 2005/02/23 17:38:29 pmjones Exp $
*
*/
/**
*
* Parse for URLS in the source text.
*
* Various URL markings are supported: inline (the URL by itself),
* numbered or footnote reference (where the URL is enclosed in square
* brackets), and named reference (where the URL is enclosed in square
* brackets and has a name included inside the brackets). E.g.:
*
* inline -- http://example.com
* numbered -- [http://example.com]
* described -- [http://example.com Example Description]
*
* When rendering a URL token, this will convert URLs pointing to a .gif,
* .jpg, or .png image into an inline <img /> tag (for the 'xhtml'
* format).
*
* Token options are:
*
* 'type' => ['inline'|'footnote'|'descr'] the type of URL
*
* 'href' => the URL link href portion
*
* 'text' => the displayed text of the URL link
*
* @category Text
*
* @package Text_Wiki
*
* @author Paul M. Jones <pmjones@php.net>
*
*/
class Text_Wiki_Parse_Url extends Text_Wiki_Parse {
/**
*
* Keeps a running count of numbered-reference URLs.
*
* @access public
*
* @var int
*
*/
var $footnoteCount = 0;
/**
*
* URL schemes recognized by this rule.
*
* @access public
*
* @var array
*
*/
var $conf = array(
'schemes' => array(
'http://',
'https://',
'ftp://',
'gopher://',
'news://',
'mailto:'
)
);
/**
*
* Constructor.
*
* We override the constructor so we can comment the regex nicely.
*
* @access public
*
*/
function Text_Wiki_Parse_Url(&$obj)
{
parent::Text_Wiki_Parse($obj);
// convert the list of recognized schemes to a regex-safe string,
// where the pattern delim is a slash
$tmp = array();
$list = $this->getConf('schemes', array());
foreach ($list as $val) {
$tmp[] = preg_quote($val, '/');
}
$schemes = implode('|', $tmp);
// build the regex
$this->regex =
"($schemes)" . // allowed schemes
"(" . // start pattern
"[^ \\/".'"'."\'{$this->wiki->delim}]*\\/" . // no spaces, backslashes, slashes, double-quotes, single quotes, or delimiters;
")*" . // end pattern
"[^ \\t\\n\\/".'"'."\'{$this->wiki->delim}]*" .
"[A-Za-z0-9\\/?=&~_]";
}
/**
*
* Find three different kinds of URLs in the source text.
*
* @access public
*
*/
function parse()
{
// -------------------------------------------------------------
//
// Described-reference (named) URLs.
//
// the regular expression for this kind of URL
$tmp_regex = '/\[(' . $this->regex . ') ([^\]]+)\]/';
// use a custom callback processing method to generate
// the replacement text for matches.
$this->wiki->source = preg_replace_callback(
$tmp_regex,
array(&$this, 'processDescr'),
$this->wiki->source
);
// -------------------------------------------------------------
//
// Numbered-reference (footnote-style) URLs.
//
// the regular expression for this kind of URL
$tmp_regex = '/\[(' . $this->regex . ')\]/U';
// use a custom callback processing method to generate
// the replacement text for matches.
$this->wiki->source = preg_replace_callback(
$tmp_regex,
array(&$this, 'processFootnote'),
$this->wiki->source
);
// -------------------------------------------------------------
//
// Normal inline URLs.
//
// the regular expression for this kind of URL
$tmp_regex = '/(^|[^A-Za-z])(' . $this->regex . ')(.*?)/';
// use the standard callback for inline URLs
$this->wiki->source = preg_replace_callback(
$tmp_regex,
array(&$this, 'process'),
$this->wiki->source
);
}
/**
*
* Process inline URLs.
*
* @param array &$matches
*
* @param array $matches An array of matches from the parse() method
* as generated by preg_replace_callback. $matches[0] is the full
* matched string, $matches[1] is the first matched pattern,
* $matches[2] is the second matched pattern, and so on.
*
* @return string The processed text replacement.
*
*/
function process(&$matches)
{
// set options
$options = array(
'type' => 'inline',
'href' => $matches[2],
'text' => $matches[2]
);
// tokenize
return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[5];
}
/**
*
* Process numbered (footnote) URLs.
*
* Token options are:
* @param array &$matches
*
* @param array $matches An array of matches from the parse() method
* as generated by preg_replace_callback. $matches[0] is the full
* matched string, $matches[1] is the first matched pattern,
* $matches[2] is the second matched pattern, and so on.
*
* @return string The processed text replacement.
*
*/
function processFootnote(&$matches)
{
// keep a running count for footnotes
$this->footnoteCount++;
// set options
$options = array(
'type' => 'footnote',
'href' => $matches[1],
'text' => $this->footnoteCount
);
// tokenize
return $this->wiki->addToken($this->rule, $options);
}
/**
*
* Process described-reference (named-reference) URLs.
*
* Token options are:
* 'type' => ['inline'|'footnote'|'descr'] the type of URL
* 'href' => the URL link href portion
* 'text' => the displayed text of the URL link
*
* @param array &$matches
*
* @param array $matches An array of matches from the parse() method
* as generated by preg_replace_callback. $matches[0] is the full
* matched string, $matches[1] is the first matched pattern,
* $matches[2] is the second matched pattern, and so on.
*
* @return string The processed text replacement.
*
*/
function processDescr(&$matches)
{
// set options
$options = array(
'type' => 'descr',
'href' => $matches[1],
'text' => $matches[4]
);
// tokenize
return $this->wiki->addToken($this->rule, $options);
}
}
?>