1
|
1 |
<?php
|
|
2 |
|
|
3 |
/**
|
|
4 |
*
|
|
5 |
* Parse for URLS in the source text.
|
|
6 |
*
|
|
7 |
* @category Text
|
|
8 |
*
|
|
9 |
* @package Text_Wiki
|
|
10 |
*
|
|
11 |
* @author Paul M. Jones <pmjones@php.net>
|
|
12 |
*
|
|
13 |
* @author Moritz Venn <moritz.venn@freaque.net>
|
|
14 |
*
|
|
15 |
* @license LGPL
|
|
16 |
*
|
|
17 |
* @version $Id: Url.php,v 1.1 2005/12/06 15:54:56 ritzmo Exp $
|
|
18 |
*
|
|
19 |
*/
|
|
20 |
|
|
21 |
/**
|
|
22 |
*
|
|
23 |
* Parse for URLS in the source text.
|
|
24 |
*
|
|
25 |
* Various URL markings are supported: inline (the URL by itself),
|
|
26 |
* inline (where the URL is enclosed in square brackets), and named
|
|
27 |
* reference (where the URL is enclosed in square brackets and has a
|
|
28 |
* name included inside the brackets). E.g.:
|
|
29 |
*
|
|
30 |
* inline -- http://example.com
|
|
31 |
* undescribed -- [http://example.com]
|
|
32 |
* described -- [http://example.com Example Description]
|
|
33 |
* described -- [http://www.example.com|Example Description]
|
|
34 |
*
|
|
35 |
* When rendering a URL token, this will convert URLs pointing to a .gif,
|
|
36 |
* .jpg, or .png image into an inline <img /> tag (for the 'xhtml'
|
|
37 |
* format).
|
|
38 |
*
|
|
39 |
* Token options are:
|
|
40 |
*
|
|
41 |
* 'type' => ['inline'|'footnote'|'descr'] the type of URL
|
|
42 |
*
|
|
43 |
* 'href' => the URL link href portion
|
|
44 |
*
|
|
45 |
* 'text' => the displayed text of the URL link
|
|
46 |
*
|
|
47 |
* @category Text
|
|
48 |
*
|
|
49 |
* @package Text_Wiki
|
|
50 |
*
|
|
51 |
* @author Paul M. Jones <pmjones@php.net>
|
|
52 |
*
|
|
53 |
* @author Moritz Venn <moritz.venn@freaque.net>
|
|
54 |
*
|
|
55 |
*/
|
|
56 |
|
|
57 |
class Text_Wiki_Parse_Url extends Text_Wiki_Parse {
|
|
58 |
|
|
59 |
|
|
60 |
/**
|
|
61 |
*
|
|
62 |
* Keeps a running count of numbered-reference URLs.
|
|
63 |
*
|
|
64 |
* @access public
|
|
65 |
*
|
|
66 |
* @var int
|
|
67 |
*
|
|
68 |
*/
|
|
69 |
|
|
70 |
var $footnoteCount = 0;
|
|
71 |
|
|
72 |
|
|
73 |
/**
|
|
74 |
*
|
|
75 |
* URL schemes recognized by this rule.
|
|
76 |
*
|
|
77 |
* @access public
|
|
78 |
*
|
|
79 |
* @var array
|
|
80 |
*
|
|
81 |
*/
|
|
82 |
|
|
83 |
var $conf = array(
|
|
84 |
'schemes' => array(
|
|
85 |
'http://',
|
|
86 |
'https://',
|
|
87 |
'ftp://',
|
|
88 |
'gopher://',
|
|
89 |
'news://',
|
|
90 |
'mailto:',
|
|
91 |
'irc://'
|
|
92 |
)
|
|
93 |
);
|
|
94 |
|
|
95 |
|
|
96 |
/**
|
|
97 |
*
|
|
98 |
* Constructor.
|
|
99 |
*
|
|
100 |
* We override the constructor so we can comment the regex nicely.
|
|
101 |
*
|
|
102 |
* @access public
|
|
103 |
*
|
|
104 |
*/
|
|
105 |
|
|
106 |
function Text_Wiki_Parse_Url(&$obj)
|
|
107 |
{
|
|
108 |
parent::Text_Wiki_Parse($obj);
|
|
109 |
|
|
110 |
// convert the list of recognized schemes to a regex-safe string,
|
|
111 |
// where the pattern delim is a slash
|
|
112 |
$tmp = array();
|
|
113 |
$list = $this->getConf('schemes', array());
|
|
114 |
foreach ($list as $val) {
|
|
115 |
$tmp[] = preg_quote($val, '/');
|
|
116 |
}
|
|
117 |
$schemes = implode('|', $tmp);
|
|
118 |
|
|
119 |
// build the regex
|
|
120 |
$this->regex =
|
|
121 |
"($schemes)" . // allowed schemes
|
|
122 |
"(" . // start pattern
|
|
123 |
"[^ \\/\"\'{$this->wiki->delim}]*\\/" . // no spaces, backslashes, slashes, double-quotes, single quotes, or delimiters;
|
|
124 |
")*" . // end pattern
|
|
125 |
"[^ \\t\\n\\/\"\'{$this->wiki->delim}]*" .
|
|
126 |
"[A-Za-z0-9\\/?=&~_]";
|
|
127 |
// fix for jEdit syntax highlighting bug: \"
|
|
128 |
}
|
|
129 |
|
|
130 |
|
|
131 |
/**
|
|
132 |
*
|
|
133 |
* Find three different kinds of URLs in the source text.
|
|
134 |
*
|
|
135 |
* @access public
|
|
136 |
*
|
|
137 |
*/
|
|
138 |
|
|
139 |
function parse()
|
|
140 |
{
|
|
141 |
// -------------------------------------------------------------
|
|
142 |
//
|
|
143 |
// Described-reference (named) URLs.
|
|
144 |
//
|
|
145 |
|
|
146 |
// the regular expression for this kind of URL
|
|
147 |
$tmp_regex = '/\[(' . $this->regex . ')[ |]([^\]]+)\]/';
|
|
148 |
|
|
149 |
// use a custom callback processing method to generate
|
|
150 |
// the replacement text for matches.
|
|
151 |
$this->wiki->source = preg_replace_callback(
|
|
152 |
$tmp_regex,
|
|
153 |
array(&$this, 'processDescr'),
|
|
154 |
$this->wiki->source
|
|
155 |
);
|
|
156 |
|
|
157 |
|
|
158 |
// -------------------------------------------------------------
|
|
159 |
//
|
|
160 |
// Unnamed-reference ('Ordinary'-style) URLs.
|
|
161 |
//
|
|
162 |
|
|
163 |
// the regular expression for this kind of URL
|
|
164 |
$tmp_regex = '/\[(' . $this->regex . ')\]/U';
|
|
165 |
|
|
166 |
// use a custom callback processing method to generate
|
|
167 |
// the replacement text for matches.
|
|
168 |
$this->wiki->source = preg_replace_callback(
|
|
169 |
$tmp_regex,
|
|
170 |
//array(&$this, 'processFootnote'),
|
|
171 |
array(&$this, 'processOrdinary'),
|
|
172 |
$this->wiki->source
|
|
173 |
);
|
|
174 |
|
|
175 |
|
|
176 |
// -------------------------------------------------------------
|
|
177 |
//
|
|
178 |
// Normal inline URLs.
|
|
179 |
//
|
|
180 |
|
|
181 |
/*
|
|
182 |
|
|
183 |
## DISABLED FOR ENANO
|
|
184 |
## This messes up HTML links.
|
|
185 |
|
|
186 |
// the regular expression for this kind of URL
|
|
187 |
|
|
188 |
$tmp_regex = '/(^|[^A-Za-z])(' . $this->regex . ')(.*?)/';
|
|
189 |
|
|
190 |
// use the standard callback for inline URLs
|
|
191 |
$this->wiki->source = preg_replace_callback(
|
|
192 |
$tmp_regex,
|
|
193 |
array(&$this, 'process'),
|
|
194 |
$this->wiki->source
|
|
195 |
);
|
|
196 |
|
|
197 |
//$tmp_regex = '/(^|[^A-Za-z])([a-zA-Z])(.*?)/';
|
|
198 |
$tmp_regex = '/(^|\s)([a-zA-Z0-9\-]+\.[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)($|\s)/';
|
|
199 |
|
|
200 |
// use the standard callback for inline URLs
|
|
201 |
$this->wiki->source = preg_replace_callback(
|
|
202 |
$tmp_regex,
|
|
203 |
array(&$this, 'processWithoutProtocol'),
|
|
204 |
$this->wiki->source
|
|
205 |
);
|
|
206 |
|
|
207 |
$tmp_regex = '/(^|\s|'.$this->wiki->delim.')<([a-zA-Z0-9\-\.%_\+\!\*\'\(\)\,]+@[a-zA-Z0-9\-]+(\.[a-zA-Z0-9\-]+)+)>(\s|'.$this->wiki->delim.'|$)/';
|
|
208 |
|
|
209 |
// use the standard callback for inline URLs
|
|
210 |
$this->wiki->source = preg_replace_callback(
|
|
211 |
$tmp_regex,
|
|
212 |
array(&$this, 'processInlineEmail'),
|
|
213 |
$this->wiki->source
|
|
214 |
);
|
|
215 |
*/
|
|
216 |
}
|
|
217 |
|
|
218 |
|
|
219 |
/**
|
|
220 |
*
|
|
221 |
* Process inline URLs.
|
|
222 |
*
|
|
223 |
* @param array &$matches
|
|
224 |
*
|
|
225 |
* @param array $matches An array of matches from the parse() method
|
|
226 |
* as generated by preg_replace_callback. $matches[0] is the full
|
|
227 |
* matched string, $matches[1] is the first matched pattern,
|
|
228 |
* $matches[2] is the second matched pattern, and so on.
|
|
229 |
*
|
|
230 |
* @return string The processed text replacement.
|
|
231 |
*
|
|
232 |
*/
|
|
233 |
|
|
234 |
function process(&$matches)
|
|
235 |
{
|
|
236 |
// set options
|
|
237 |
$options = array(
|
|
238 |
'type' => 'inline',
|
|
239 |
'href' => $matches[2],
|
|
240 |
'text' => $matches[2]
|
|
241 |
);
|
|
242 |
|
|
243 |
// tokenize
|
|
244 |
return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[5];
|
|
245 |
}
|
|
246 |
|
|
247 |
function processWithoutProtocol(&$matches)
|
|
248 |
{
|
|
249 |
// set options
|
|
250 |
$options = array(
|
|
251 |
'type' => 'inline',
|
|
252 |
'href' => 'http://'.$matches[2],
|
|
253 |
'text' => $matches[2]
|
|
254 |
);
|
|
255 |
|
|
256 |
// tokenize
|
|
257 |
return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4];
|
|
258 |
}
|
|
259 |
|
|
260 |
function processInlineEmail(&$matches)
|
|
261 |
{
|
|
262 |
// set options
|
|
263 |
$options = array(
|
|
264 |
'type' => 'inline',
|
|
265 |
'href' => 'mailto://'.$matches[2],
|
|
266 |
'text' => $matches[2]
|
|
267 |
);
|
|
268 |
|
|
269 |
// tokenize
|
|
270 |
return $matches[1] . $this->wiki->addToken($this->rule, $options) . $matches[4];
|
|
271 |
}
|
|
272 |
|
|
273 |
/**
|
|
274 |
*
|
|
275 |
* Process numbered (footnote) URLs.
|
|
276 |
*
|
|
277 |
* Token options are:
|
|
278 |
* @param array &$matches
|
|
279 |
*
|
|
280 |
* @param array $matches An array of matches from the parse() method
|
|
281 |
* as generated by preg_replace_callback. $matches[0] is the full
|
|
282 |
* matched string, $matches[1] is the first matched pattern,
|
|
283 |
* $matches[2] is the second matched pattern, and so on.
|
|
284 |
*
|
|
285 |
* @return string The processed text replacement.
|
|
286 |
*
|
|
287 |
*/
|
|
288 |
|
|
289 |
function processFootnote(&$matches)
|
|
290 |
{
|
|
291 |
// keep a running count for footnotes
|
|
292 |
$this->footnoteCount++;
|
|
293 |
|
|
294 |
// set options
|
|
295 |
$options = array(
|
|
296 |
'type' => 'footnote',
|
|
297 |
'href' => $matches[1],
|
|
298 |
'text' => $this->footnoteCount
|
|
299 |
);
|
|
300 |
|
|
301 |
// tokenize
|
|
302 |
return $this->wiki->addToken($this->rule, $options);
|
|
303 |
}
|
|
304 |
|
|
305 |
function processOrdinary(&$matches)
|
|
306 |
{
|
|
307 |
// keep a running count for footnotes
|
|
308 |
$this->footnoteCount++;
|
|
309 |
|
|
310 |
// set options
|
|
311 |
$options = array(
|
|
312 |
'type' => 'descr',
|
|
313 |
'href' => $matches[1],
|
|
314 |
'text' => $matches[1]
|
|
315 |
);
|
|
316 |
|
|
317 |
// tokenize
|
|
318 |
return $this->wiki->addToken($this->rule, $options);
|
|
319 |
}
|
|
320 |
|
|
321 |
|
|
322 |
/**
|
|
323 |
*
|
|
324 |
* Process described-reference (named-reference) URLs.
|
|
325 |
*
|
|
326 |
* Token options are:
|
|
327 |
* 'type' => ['inline'|'footnote'|'descr'] the type of URL
|
|
328 |
* 'href' => the URL link href portion
|
|
329 |
* 'text' => the displayed text of the URL link
|
|
330 |
*
|
|
331 |
* @param array &$matches
|
|
332 |
*
|
|
333 |
* @param array $matches An array of matches from the parse() method
|
|
334 |
* as generated by preg_replace_callback. $matches[0] is the full
|
|
335 |
* matched string, $matches[1] is the first matched pattern,
|
|
336 |
* $matches[2] is the second matched pattern, and so on.
|
|
337 |
*
|
|
338 |
* @return string The processed text replacement.
|
|
339 |
*
|
|
340 |
*/
|
|
341 |
|
|
342 |
function processDescr(&$matches)
|
|
343 |
{
|
|
344 |
// set options
|
|
345 |
$options = array(
|
|
346 |
'type' => 'descr',
|
|
347 |
'href' => $matches[1],
|
|
348 |
'text' => $matches[4]
|
|
349 |
);
|
|
350 |
|
|
351 |
// tokenize
|
|
352 |
return $this->wiki->addToken($this->rule, $options);
|
|
353 |
}
|
|
354 |
}
|
|
355 |
?> |