| 26 | | # input html |
|---|
| 27 | | var $html = ''; |
|---|
| 28 | | # output markdown |
|---|
| 29 | | var $outtext = ''; |
|---|
| 30 | | # some control structures |
|---|
| 31 | | var $p_p = 0; |
|---|
| 32 | | var $start = 1; |
|---|
| 33 | | var $space = 0; |
|---|
| 34 | | var $force_html = false; |
|---|
| 35 | | var $force_html_start = array('tag'=>'','parents'=>0); |
|---|
| 36 | | # links |
|---|
| 37 | | var $a = array(); |
|---|
| 38 | | var $astack = array(); |
|---|
| 39 | | var $acount = 0; |
|---|
| 40 | | # lists |
|---|
| 41 | | var $list = array (); |
|---|
| 42 | | var $list_depth = 0; |
|---|
| 43 | | var $lastWasNL = false; |
|---|
| 44 | | # indenting and appending |
|---|
| 45 | | var $append = ''; |
|---|
| 46 | | var $indent = ''; |
|---|
| 47 | | # these elements will be dropped with all subelements |
|---|
| 48 | | var $drop = array( |
|---|
| 49 | | 'script', |
|---|
| 50 | | 'head', |
|---|
| 51 | | 'style', |
|---|
| 52 | | 'form', |
|---|
| 53 | | ); |
|---|
| 54 | | # these elements will be quietly ignored, their children will be parsed |
|---|
| 55 | | var $ignore = array( |
|---|
| 56 | | 'wrapper', # important! |
|---|
| 57 | | 'html', # closing html tag |
|---|
| 58 | | 'body', # closing body tag |
|---|
| 59 | | 'thead', |
|---|
| 60 | | 'tbody', |
|---|
| 61 | | 'tfoot', |
|---|
| 62 | | ); |
|---|
| 63 | | # these elements can have some attributes |
|---|
| 64 | | var $has_attrs = array( |
|---|
| 65 | | # tag => list of allowed attrs |
|---|
| 66 | | 'h1' => array('id'), |
|---|
| 67 | | 'h2' => array('id'), |
|---|
| 68 | | 'h3' => array('id'), |
|---|
| 69 | | 'h4' => array('id'), |
|---|
| 70 | | 'h5' => array('id'), |
|---|
| 71 | | 'h6' => array('id'), |
|---|
| 72 | | 'a' => array('href','title'), |
|---|
| 73 | | 'img' => array('src','alt','title'), |
|---|
| 74 | | # tables |
|---|
| 75 | | 'th' => array('align'), |
|---|
| 76 | | 'td' => array('align'), |
|---|
| 77 | | # footnotes |
|---|
| 78 | | 'sup' => array('id'), |
|---|
| 79 | | 'footnote' => array('nr'), |
|---|
| 80 | | # abbrevations |
|---|
| 81 | | 'abbr' => array('title'), |
|---|
| 82 | | 'acronym' => array('title'), |
|---|
| 83 | | ); |
|---|
| 84 | | # table |
|---|
| 85 | | var $max_len = array(); |
|---|
| 86 | | var $align = array(); |
|---|
| 87 | | var $cols = array(); |
|---|
| 88 | | var $rows = array(); |
|---|
| 89 | | var $col = 0; |
|---|
| 90 | | var $row = 0; |
|---|
| 91 | | var $header = array(); |
|---|
| 92 | | # parents |
|---|
| 93 | | var $parents = array(); |
|---|
| 94 | | # abbrevations |
|---|
| 95 | | var $abbrs = array(); |
|---|
| 96 | | # buffer |
|---|
| 97 | | var $buffer = array(); |
|---|
| 98 | | var $buffer_lvl = 0; |
|---|
| 99 | | # options |
|---|
| 100 | | var $LINKS_EACH_PARAGRAPH; |
|---|
| 101 | | var $BODY_WIDTH; |
|---|
| 102 | | var $KEEP_HTML; |
|---|
| 103 | | # global xml parser |
|---|
| 104 | | var $xml_parser; |
|---|
| 105 | | /** |
|---|
| 106 | | * setup the xml_parser |
|---|
| 107 | | * $links_each_paragraph: if set to true, the list of links will be |
|---|
| 108 | | * displayed after each paragraph, else it will be displayed on the end of |
|---|
| 109 | | * the file |
|---|
| 110 | | * $body_width: if set to a integer greater 0 the output text will be |
|---|
| 111 | | * wrapped to that width (in characters) |
|---|
| 112 | | * $keep_html: if set to true, all unrecognized html tags will be kept, else |
|---|
| 113 | | * they'll be removed |
|---|
| 114 | | * |
|---|
| 115 | | * @param bool $links_each_paragraph default true |
|---|
| 116 | | * @param integer $body_width default 0 |
|---|
| 117 | | * @param bool $keep_html default true |
|---|
| 118 | | * @return void |
|---|
| 119 | | */ |
|---|
| 120 | | function html2text($links_each_paragraph = true,$body_width = 0,$keep_html = true) { |
|---|
| 121 | | $this->LINKS_EACH_PARAGRAPH = $links_each_paragraph; |
|---|
| 122 | | $this->BODY_WIDTH = $body_width; |
|---|
| 123 | | $this->KEEP_HTML = $keep_html; |
|---|
| 124 | | $this->xml_parser = xml_parser_create(); |
|---|
| 125 | | xml_set_object($this->xml_parser, $this); |
|---|
| 126 | | xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, 0); |
|---|
| 127 | | xml_set_element_handler($this->xml_parser, 'starttag', 'endtag'); |
|---|
| 128 | | xml_set_character_data_handler($this->xml_parser, 'handle_data'); |
|---|
| 129 | | xml_set_default_handler($this->xml_parser,'handle_default'); |
|---|
| 130 | | } |
|---|
| 131 | | /** |
|---|
| 132 | | * parse a html string to text |
|---|
| 133 | | * |
|---|
| 134 | | * @param string $html |
|---|
| 135 | | * @return string |
|---|
| 136 | | */ |
|---|
| 137 | | function load_string($html) { |
|---|
| 138 | | $html = trim($html); |
|---|
| 139 | | if(empty($html)){ |
|---|
| 140 | | return ''; |
|---|
| 141 | | } |
|---|
| 142 | | # use unix style newlines |
|---|
| 143 | | $html = str_replace("\r","\n",str_replace("\r\n","\n",$html)); |
|---|
| 144 | | # remove doctype and xml tags |
|---|
| 145 | | $html = preg_replace('#^.*<body[^>]*>#Us','<html><body>',$html); |
|---|
| 146 | | /* |
|---|
| 147 | | * cope with bad html |
|---|
| 148 | | */ |
|---|
| 149 | | $html = preg_replace('#&(?!amp;)#','&',$html); |
|---|
| 150 | | $html = str_replace('<','<',$html); |
|---|
| 151 | | $html = preg_replace('#<([a-z]+[^>]*) ?/>#Us','<$1 />',$html); |
|---|
| 152 | | # unmatched tags (poor performance) |
|---|
| 153 | | preg_match_all('#<(([a-z]|h[1-6])+)(?= |>)#',$html,$matches); |
|---|
| 154 | | foreach($matches[1] as $tag){ |
|---|
| 155 | | $html = preg_replace('#<'.$tag.'( |>)(.*)</'.$tag.'>#Us','<'.$tag.'$1$2</'.$tag.'>',$html,1); |
|---|
| 156 | | } |
|---|
| 157 | | # encode < to < and & to & inside <pre>|<code> |
|---|
| 158 | | $html = preg_replace_callback('#(<pre[^>]*>\s*<code[^>]*>|<code[^>]*>|<pre[^>]*>)(.*)(</pre>\s*</code>|</code>|</pre>)#Us', |
|---|
| 159 | | create_function( |
|---|
| 160 | | '$matches', |
|---|
| 161 | | 'return $matches[1].str_replace(\'<\',\'<\',$matches[2]).$matches[3];' |
|---|
| 162 | | ),$html); |
|---|
| 163 | | # handle empty attributes (e.g. <input checked>) |
|---|
| 164 | | $html = preg_replace_callback('#<([a-z]+)(?>[^>]* [^=]+(?> [^>]*)?) ?/?>#s',array(&$this,'parse_empty_attribs'),$html); |
|---|
| 165 | | # fake wrapper |
|---|
| 166 | | $html = '<wrapper>'.$html.'</wrapper>'; |
|---|
| 167 | | # footnotes |
|---|
| 168 | | $html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(<li id="fn:\d+">.+</li>)\s*</ol>\s*</div>#Us',array(&$this,'footnotes'),$html); |
|---|
| 169 | | # last newline inside <pre> should not be parsed |
|---|
| 170 | | $html = preg_replace('#\n</code></pre>#s','</code></pre>',$html); |
|---|
| 171 | | # some html elements should not be parsed if their children wont be parsed: |
|---|
| 172 | | if($this->KEEP_HTML){ |
|---|
| 173 | | # <ul|ol><li class="asdf">, complex because we need to handle nested lists |
|---|
| 174 | | if(preg_match('#<li [^>]+>#',$html)){ |
|---|
| 175 | | preg_match_all('#(?:<li [^>]+>|</?(?:ul|ol)[^>]*>)#',$html,$matches,PREG_OFFSET_CAPTURE); |
|---|
| 176 | | $lists = array(); |
|---|
| 177 | | $offset = 0; |
|---|
| 178 | | $ins = ' forcehtml="1"'; |
|---|
| 179 | | $add = strlen($ins); |
|---|
| 180 | | foreach($matches[0] as $k => $a){ |
|---|
| 181 | | if(substr($a[0],0,3) == '<li'){ |
|---|
| 182 | | $list = &$lists[count($lists)-1]; |
|---|
| 183 | | if(!$list['forced']){ |
|---|
| 184 | | $list['forced'] = true; |
|---|
| 185 | | $html = substr_replace($html,$ins,$list['offset']+$offset,0); |
|---|
| 186 | | $offset += $add; |
|---|
| 187 | | } |
|---|
| 188 | | } else { |
|---|
| 189 | | if(substr($a[0],0,2) == '</'){ # close tag |
|---|
| 190 | | array_pop($lists); |
|---|
| 191 | | } else { # open tag |
|---|
| 192 | | array_push($lists,array( |
|---|
| 193 | | 'offset' => $a[1]+3, |
|---|
| 194 | | 'forced' => strstr($a[0],'forcehtml='), |
|---|
| 195 | | )); |
|---|
| 196 | | } |
|---|
| 197 | | } |
|---|
| 198 | | } |
|---|
| 199 | | } |
|---|
| 200 | | # <pre><code class="asdf"> |
|---|
| 201 | | $html = preg_replace('#(?><pre>)\s*(<code .+>)#Us','<pre forcehtml="1">$1',$html); |
|---|
| 202 | | } |
|---|
| 203 | | $this->html = $html; |
|---|
| 204 | | # ok, now lets start parsing! |
|---|
| 205 | | #echo dump($html); |
|---|
| 206 | | $this->parse(); |
|---|
| 207 | | return $this->close(); |
|---|
| 208 | | } |
|---|
| 209 | | /** |
|---|
| 210 | | * clean up footnotes |
|---|
| 211 | | * |
|---|
| 212 | | * @param array $matches |
|---|
| 213 | | * @return string |
|---|
| 214 | | */ |
|---|
| 215 | | function footnotes($matches){ |
|---|
| 216 | | # remove footnote link |
|---|
| 217 | | $matches = preg_replace('@<a href="#fnref:\d+" rev="footnote"[^>]*>&#8617;</a>@U','',$matches[1]); |
|---|
| 218 | | # remove empty paragraph |
|---|
| 219 | | $matches = str_replace('<p></p>','',$matches); |
|---|
| 220 | | # wrap in footnotes tag |
|---|
| 221 | | $matches = '<footnotes>'.$matches.'</footnotes>'; |
|---|
| 222 | | # <li id="fn:1">...</li> -> <footnote nr="1">...</footnote> |
|---|
| 223 | | $matches = str_replace('<li id="fn:','<footnote nr="',$matches); |
|---|
| 224 | | return preg_replace('#</li>\s*(<footnote|</footnotes)#s','</footnote>$1',$matches); |
|---|
| 225 | | } |
|---|
| 226 | | /** |
|---|
| 227 | | * @param array $matches |
|---|
| 228 | | * @return string |
|---|
| 229 | | */ |
|---|
| 230 | | function parse_empty_attribs($matches){ |
|---|
| 231 | | if(preg_match('#^<[a-z]+(?: [a-z]+=(?:"[^"]*"|\'[^\']*\'))+ ?/?>$#s',$matches[0])){ |
|---|
| 232 | | # mismatch, this tag is correct |
|---|
| 233 | | return $matches[0]; |
|---|
| 234 | | } |
|---|
| 235 | | echo dump($matches[0]); |
|---|
| 236 | | die(); |
|---|
| 237 | | $rep = $this->KEEP_HTML ? '$1="$1"' : ''; |
|---|
| 238 | | return '<'.$matches[1].preg_replace('#(?<= )([^ =>]{2,})(?= |$)#Us',$rep,$matches[2]).'>'; |
|---|
| 239 | | } |
|---|
| 240 | | /** |
|---|
| 241 | | * parse a html file to text |
|---|
| 242 | | * |
|---|
| 243 | | * @param string $file |
|---|
| 244 | | * @return string |
|---|
| 245 | | */ |
|---|
| 246 | | function load_file($file) { |
|---|
| 247 | | $contents = file_get_contents($file); |
|---|
| 248 | | if(!$contents){ |
|---|
| 249 | | trigger_error('could not open XML input',E_USER_WARNING); |
|---|
| 250 | | return false; |
|---|
| 251 | | } |
|---|
| 252 | | return $this->load_string($contents); |
|---|
| 253 | | } |
|---|
| 254 | | /** |
|---|
| 255 | | * start parsing html to text |
|---|
| 256 | | * |
|---|
| 257 | | * @param void |
|---|
| 258 | | * @return void |
|---|
| 259 | | */ |
|---|
| 260 | | function parse() { |
|---|
| 261 | | $html = explode("\n", $this->html); |
|---|
| 262 | | foreach ($html as $line) { |
|---|
| 263 | | if (!xml_parse($this->xml_parser, $line . "\n")) { |
|---|
| 264 | | $errcode = xml_get_error_code($this->xml_parser); |
|---|
| 265 | | trigger_error(sprintf("XML error #%d: %s at line %d:<br /><pre><code>%s</code></pre>", $errcode,xml_error_string($errcode), xml_get_current_line_number($this->xml_parser),htmlspecialchars($line)),E_USER_WARNING); |
|---|
| 266 | | #return; |
|---|
| 267 | | } |
|---|
| 268 | | } |
|---|
| 269 | | } |
|---|
| 270 | | /** |
|---|
| 271 | | * close parser and return text |
|---|
| 272 | | * |
|---|
| 273 | | * @param void |
|---|
| 274 | | * @return string |
|---|
| 275 | | */ |
|---|
| 276 | | function close() { |
|---|
| 277 | | xml_parser_free($this->xml_parser); |
|---|
| 278 | | $this->pbr(); |
|---|
| 279 | | $this->o('', false, 'end'); |
|---|
| 280 | | $this->out("\n"); |
|---|
| 281 | | $this->links(); |
|---|
| 282 | | # blockquotes |
|---|
| 283 | | $this->outtext = preg_replace_callback('#^(\s*)((> )+)#m',array(&$this,'cleanup_bq'),$this->outtext); |
|---|
| 284 | | # cleanup |
|---|
| 285 | | $this->outtext = str_replace('&','&',str_replace('<','<',str_replace('>','>',$this->outtext))); |
|---|
| 286 | | # empty lines (not preformatted) |
|---|
| 287 | | $this->outtext = preg_replace('#^\s{1,4}$#m','',$this->outtext); |
|---|
| 288 | | # empty quoted lines |
|---|
| 289 | | $this->outtext = preg_replace('#^(>+)\s{1,5}$#m','$1',$this->outtext); |
|---|
| 290 | | return rtrim($this->optwrap($this->outtext)); |
|---|
| 291 | | } |
|---|
| 292 | | /** |
|---|
| 293 | | * replace "> > > " with ">>> " |
|---|
| 294 | | * |
|---|
| 295 | | * @param array $m matches |
|---|
| 296 | | * @return string |
|---|
| 297 | | */ |
|---|
| 298 | | function cleanup_bq($m){ |
|---|
| 299 | | return $m[1].str_repeat('>',strlen($m[2])/2).' '; |
|---|
| 300 | | } |
|---|
| 301 | | /** |
|---|
| 302 | | * handles html comments |
|---|
| 303 | | * |
|---|
| 304 | | * @param resource $parser |
|---|
| 305 | | * @param string $data |
|---|
| 306 | | * @return void |
|---|
| 307 | | */ |
|---|
| 308 | | function handle_default($parser,$data){ |
|---|
| 309 | | if(substr($data,0,4) == '<!--' && substr($data,-3) == '-->'){ |
|---|
| 310 | | $this->outtext .= "\n\n".$data."\n"; |
|---|
| 311 | | } |
|---|
| 312 | | } |
|---|
| 313 | | /** |
|---|
| 314 | | * adds pure data to the output (e.g. <p>DATA</p>) |
|---|
| 315 | | * |
|---|
| 316 | | * @param resource $parser |
|---|
| 317 | | * @param string $data |
|---|
| 318 | | * @return void |
|---|
| 319 | | */ |
|---|
| 320 | | function handle_data($parser, $data) { |
|---|
| 321 | | $this->o($data, true); |
|---|
| 322 | | } |
|---|
| 323 | | /** |
|---|
| 324 | | * start tags (e.g. <p>) |
|---|
| 325 | | * |
|---|
| 326 | | * @param resource $parser |
|---|
| 327 | | * @param string $tag |
|---|
| 328 | | * @param array $attrs |
|---|
| 329 | | * @return void |
|---|
| 330 | | */ |
|---|
| 331 | | function starttag($parser, $tag, $attrs) { |
|---|
| 332 | | $this->handle_tag($tag, $attrs, true); |
|---|
| 333 | | } |
|---|
| 334 | | /** |
|---|
| 335 | | * end tags (e.g. </p>) |
|---|
| 336 | | * |
|---|
| 337 | | * @param resource $parser |
|---|
| 338 | | * @param string $tag |
|---|
| 339 | | * @return void |
|---|
| 340 | | */ |
|---|
| 341 | | function endtag($parser, $tag) { |
|---|
| 342 | | $this->handle_tag($tag, null, false); |
|---|
| 343 | | } |
|---|
| 344 | | /** |
|---|
| 345 | | * force html output of all children |
|---|
| 346 | | * |
|---|
| 347 | | * @param $tag |
|---|
| 348 | | * @return void |
|---|
| 349 | | */ |
|---|
| 350 | | function force_html($tag){ |
|---|
| 351 | | $this->force_html = true; |
|---|
| 352 | | $this->force_html_start = array( |
|---|
| 353 | | 'tag' => $tag, |
|---|
| 354 | | 'parents' => isset($this->parents[$tag]) ? strlen($this->parents[$tag]) : 0 |
|---|
| 355 | | ); |
|---|
| 356 | | } |
|---|
| 357 | | /** |
|---|
| 358 | | * parsing logic based on tag name |
|---|
| 359 | | * |
|---|
| 360 | | * @param string $tag |
|---|
| 361 | | * @param array $attrs |
|---|
| 362 | | * @param bool $start |
|---|
| 363 | | * @return void |
|---|
| 364 | | */ |
|---|
| 365 | | function handle_tag($tag, $attrs, $start) { |
|---|
| 366 | | if(in_array($tag,$this->drop)){ # drop tags with content |
|---|
| 367 | | if($start){ |
|---|
| 368 | | $this->buffer(); |
|---|
| 369 | | } else { |
|---|
| 370 | | $this->unbuffer(); |
|---|
| 371 | | } |
|---|
| 372 | | return; |
|---|
| 373 | | } |
|---|
| 374 | | if(in_array($tag,$this->ignore)){ # drop tags but keep content |
|---|
| 375 | | return; |
|---|
| 376 | | } |
|---|
| 377 | | # keeping the original html |
|---|
| 378 | | if($this->KEEP_HTML){ |
|---|
| 379 | | if($start){ |
|---|
| 380 | | # is the force html attr set? |
|---|
| 381 | | if(!$this->force_html && isset($attrs['forcehtml'])){ |
|---|
| 382 | | $this->force_html($tag); |
|---|
| 383 | | } |
|---|
| 384 | | # we'll have to keep this tag |
|---|
| 385 | | if($this->force_html) { |
|---|
| 386 | | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| 387 | | return; |
|---|
| 388 | | } else { |
|---|
| 389 | | # tag has attrs which can't be converted |
|---|
| 390 | | if(!empty($attrs) && $this->keep_tag($tag,$attrs,$start)){ |
|---|
| 391 | | return; |
|---|
| 392 | | } |
|---|
| 393 | | } |
|---|
| 394 | | } else { |
|---|
| 395 | | if($this->force_html){ |
|---|
| 396 | | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| 397 | | if($tag == $this->force_html_start['tag'] && strlen($this->parents[$tag]) == $this->force_html_start['parents']){ |
|---|
| 398 | | $this->force_html = false; |
|---|
| 399 | | } |
|---|
| 400 | | return; |
|---|
| 401 | | } elseif($this->parent($tag,'kept') && $this->keep_tag($tag,$attrs,$start)) { |
|---|
| 402 | | return; |
|---|
| 403 | | } |
|---|
| 404 | | } |
|---|
| 405 | | } |
|---|
| 406 | | switch ($tag) { |
|---|
| 407 | | case 'h1' : |
|---|
| 408 | | case 'h2' : |
|---|
| 409 | | case 'h3' : |
|---|
| 410 | | case 'h4' : |
|---|
| 411 | | case 'h5' : |
|---|
| 412 | | case 'h6' : |
|---|
| 413 | | $this->p(); |
|---|
| 414 | | if ($start) { |
|---|
| 415 | | $this->o(str_repeat('#', intval($tag[1])) . ' '); |
|---|
| 416 | | if(!empty($attrs['id'])){ |
|---|
| 417 | | $this->append = ' {#'.$attrs['id'].'}'; |
|---|
| 418 | | } |
|---|
| 419 | | } else { |
|---|
| 420 | | $this->out($this->append); |
|---|
| 421 | | $this->append = ''; |
|---|
| 422 | | } |
|---|
| 423 | | break; |
|---|
| 424 | | case 'div' : |
|---|
| 425 | | $this->p(); |
|---|
| 426 | | break; |
|---|
| 427 | | case 'p' : |
|---|
| 428 | | $this->p(); |
|---|
| 429 | | break; |
|---|
| 430 | | case 'br' : |
|---|
| 431 | | if ($start) { |
|---|
| 432 | | $this->o(" \n"); |
|---|
| 433 | | } |
|---|
| 434 | | break; |
|---|
| 435 | | case 'hr' : |
|---|
| 436 | | if ($start) { |
|---|
| 437 | | $this->p(); |
|---|
| 438 | | $this->o('* * *'); |
|---|
| 439 | | $this->p(); |
|---|
| 440 | | } |
|---|
| 441 | | break; |
|---|
| 442 | | case 'blockquote' : |
|---|
| 443 | | $this->indent('> ',$start); |
|---|
| 444 | | if ($start) { |
|---|
| 445 | | $this->start = true; |
|---|
| 446 | | $this->out("\n\n".$this->indent); |
|---|
| 447 | | } |
|---|
| 448 | | break; |
|---|
| 449 | | case 'em' : |
|---|
| 450 | | case 'i' : |
|---|
| 451 | | case 'u' : |
|---|
| 452 | | $this->o('_'); |
|---|
| 453 | | break; |
|---|
| 454 | | case 'strong' : |
|---|
| 455 | | case 'b' : |
|---|
| 456 | | $this->o('**'); |
|---|
| 457 | | break; |
|---|
| 458 | | # footnotes |
|---|
| 459 | | case 'sup': |
|---|
| 460 | | if($start){ |
|---|
| 461 | | if(count($attrs) != 1 || !isset($attrs['id']) || !preg_match('#^fnref:(\d+)$#',$attrs['id'],$matches)){ |
|---|
| 462 | | # keep tag |
|---|
| 463 | | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| 464 | | return; |
|---|
| 465 | | } |
|---|
| 466 | | # parse footnote |
|---|
| 467 | | $this->out('[^'.$matches[1].']'); |
|---|
| 468 | | # omit output of link (<a href="#fn:1" rel="footnote">1</a>) |
|---|
| 469 | | $this->buffer(); |
|---|
| 470 | | } else { |
|---|
| 471 | | # last sup was not parsed -> keep tag |
|---|
| 472 | | if(!$this->parent('sup')){ |
|---|
| 473 | | $this->keep_tag($tag,$attrs,$start); |
|---|
| 474 | | return; |
|---|
| 475 | | } |
|---|
| 476 | | # sup was parsed -> reset buffer |
|---|
| 477 | | $this->unbuffer(); |
|---|
| 478 | | } |
|---|
| 479 | | break; |
|---|
| 480 | | case 'footnotes': |
|---|
| 481 | | $this->p(); |
|---|
| 482 | | break; |
|---|
| 483 | | case 'footnote': |
|---|
| 484 | | if($start){ |
|---|
| 485 | | $this->o('[^'.$attrs['nr']."]:\n".$this->indent.' '); |
|---|
| 486 | | $this->start = true; |
|---|
| 487 | | } |
|---|
| 488 | | $this->indent(' ',$start); |
|---|
| 489 | | break; |
|---|
| 490 | | case 'a': |
|---|
| 491 | | if($start) { |
|---|
| 492 | | # buffer to check for inline links like <foo@bar.com> and the like |
|---|
| 493 | | if (isset ($attrs['href'])) { |
|---|
| 494 | | $this->buffer(); |
|---|
| 495 | | array_push($this->astack, $attrs); |
|---|
| 496 | | } else { |
|---|
| 497 | | array_push($this->astack, null); |
|---|
| 498 | | } |
|---|
| 499 | | } else { |
|---|
| 500 | | if($this->astack) { |
|---|
| 501 | | $a = array_pop($this->astack); |
|---|
| 502 | | if ($a) { |
|---|
| 503 | | # for emails |
|---|
| 504 | | $a['href'] = $this->decode($a['href']); |
|---|
| 505 | | $buffer = $this->unbuffer(); |
|---|
| 506 | | $buffer_check = $this->decode(trim($buffer)); |
|---|
| 507 | | if((substr($a['href'],0,7) == 'mailto:' && 'mailto:'.$buffer_check == $a['href']) || $a['href'] == $buffer_check){ |
|---|
| 508 | | # inline link |
|---|
| 509 | | $this->out('<'.$buffer_check.'>',true); |
|---|
| 510 | | } else { |
|---|
| 511 | | # block link |
|---|
| 512 | | $this->previousIndex($a); |
|---|
| 513 | | $this->out('['.$buffer.']['.$a['count'].']',true); |
|---|
| 514 | | } |
|---|
| 515 | | } |
|---|
| 516 | | } |
|---|
| 517 | | } |
|---|
| 518 | | break; |
|---|
| 519 | | # abbrevations |
|---|
| 520 | | case 'abbr': |
|---|
| 521 | | case 'acronym': |
|---|
| 522 | | if($start){ |
|---|
| 523 | | $this->buffer(); |
|---|
| 524 | | array_push($this->abbrs,isset($attrs['title'])?$attrs['title']:''); |
|---|
| 525 | | } else { |
|---|
| 526 | | $abbr = $this->unbuffer(); |
|---|
| 527 | | $def = array_pop($this->abbrs); |
|---|
| 528 | | # only add abbr if its not already defined |
|---|
| 529 | | if(!isset($this->abbrs[$abbr])){ |
|---|
| 530 | | $this->abbrs[$abbr] = $def; |
|---|
| 531 | | } |
|---|
| 532 | | $this->o($abbr); |
|---|
| 533 | | } |
|---|
| 534 | | break; |
|---|
| 535 | | case 'img' : |
|---|
| 536 | | if ($start) { |
|---|
| 537 | | if (isset ($attrs['src'])) { |
|---|
| 538 | | $attrs['href'] = $attrs['src']; |
|---|
| 539 | | $alt = ''; |
|---|
| 540 | | if (isset ($attrs['alt'])) { |
|---|
| 541 | | $alt = $attrs['alt']; |
|---|
| 542 | | } elseif(isset($attrs['title'])){ |
|---|
| 543 | | $alt = $attrs['title']; |
|---|
| 544 | | } |
|---|
| 545 | | $this->previousIndex($attrs); |
|---|
| 546 | | $this->o('!['.$alt.'][' . $attrs['count'] . ']'); |
|---|
| 547 | | } |
|---|
| 548 | | } |
|---|
| 549 | | break; |
|---|
| 550 | | case 'code': |
|---|
| 551 | | # do we have to keep this tag? |
|---|
| 552 | | # or is a parent <pre> element existing? |
|---|
| 553 | | if($this->keep_tag($tag,$attrs,$start) || $this->parent('pre')){ |
|---|
| 554 | | return; |
|---|
| 555 | | } |
|---|
| 556 | | # convert to `code` and handle backticks inside code block |
|---|
| 557 | | # <code>foo`bar</code> has to get ``foo`bar`` and so forth |
|---|
| 558 | | if($start){ |
|---|
| 559 | | $this->buffer(); |
|---|
| 560 | | } else { |
|---|
| 561 | | $str = $this->unbuffer(); |
|---|
| 562 | | preg_match_all('#`+#',$str,$matches); |
|---|
| 563 | | if(!empty($matches[0])){ |
|---|
| 564 | | rsort($matches[0]); |
|---|
| 565 | | $len = strlen($matches[0][0])+1; |
|---|
| 566 | | } else { |
|---|
| 567 | | $len = 1; |
|---|
| 568 | | } |
|---|
| 569 | | $ticks = str_repeat('`',$len); |
|---|
| 570 | | $this->out($ticks.$str.$ticks); |
|---|
| 571 | | } |
|---|
| 572 | | break; |
|---|
| 573 | | case 'dl' : |
|---|
| 574 | | # note: if <dl> gets parsed, its direct children (<dd> and <dt>) will be parsed as well |
|---|
| 575 | | if ($start) { |
|---|
| 576 | | $this->p(); |
|---|
| 577 | | } |
|---|
| 578 | | break; |
|---|
| 579 | | case 'dd' : |
|---|
| 580 | | # is the parent dl parsed? |
|---|
| 581 | | if(!$this->parent('dl')){ |
|---|
| 582 | | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| 583 | | return; |
|---|
| 584 | | } |
|---|
| 585 | | if ($start) { |
|---|
| 586 | | $this->o(': '); |
|---|
| 587 | | $this->start = true; |
|---|
| 588 | | } else { |
|---|
| 589 | | $this->outtext .= "\n"; |
|---|
| 590 | | $this->pbr(); |
|---|
| 591 | | } |
|---|
| 592 | | $this->indent(' ',$start); |
|---|
| 593 | | break; |
|---|
| 594 | | case 'dt' : |
|---|
| 595 | | # is the parent dl parsed? |
|---|
| 596 | | if(!$this->parent('dl')){ |
|---|
| 597 | | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| 598 | | return; |
|---|
| 599 | | } |
|---|
| 600 | | if (!$start) { |
|---|
| 601 | | $this->pbr(); |
|---|
| 602 | | } |
|---|
| 603 | | break; |
|---|
| 604 | | case 'ol' : |
|---|
| 605 | | case 'ul' : |
|---|
| 606 | | # note: if this element gets parsed, its direct children <li>s will be parsed as well |
|---|
| 607 | | if ($start) { |
|---|
| 608 | | array_push($this->list, array ( |
|---|
| 609 | | 'name' => $tag, |
|---|
| 610 | | 'num' => 0 |
|---|
| 611 | | )); |
|---|
| 612 | | } else { |
|---|
| 613 | | array_pop($this->list); |
|---|
| 614 | | $this->pbr(); |
|---|
| 615 | | } |
|---|
| 616 | | break; |
|---|
| 617 | | case 'li' : |
|---|
| 618 | | if ($this->list) { |
|---|
| 619 | | $li = &$this->list[count($this->list) - 1]; |
|---|
| 620 | | } |
|---|
| 621 | | # not inside a list or the list tag was not parsed |
|---|
| 622 | | if(!isset($li) || !$this->parent($li['name'])){ |
|---|
| 623 | | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| 624 | | return; |
|---|
| 625 | | } |
|---|
| 626 | | if ($start) { |
|---|
| 627 | | $this->pbr(); |
|---|
| 628 | | if($li['name'] == 'ul'){ |
|---|
| 629 | | $this->o('* '); |
|---|
| 630 | | } else { |
|---|
| 631 | | $li['num']++; |
|---|
| 632 | | /** |
|---|
| 633 | | * @todo line up <ol><li>s > 9 correctly. |
|---|
| 634 | | */ |
|---|
| 635 | | $this->o($li['num'].'. '); |
|---|
| 636 | | } |
|---|
| 637 | | $this->start = true; |
|---|
| 638 | | $this->indent(' ',$start); |
|---|
| 639 | | } else { |
|---|
| 640 | | $this->indent(' ',$start); |
|---|
| 641 | | } |
|---|
| 642 | | break; |
|---|
| 643 | | case 'table': |
|---|
| 644 | | # NOTE: if the <table> tag gets parsed, all its children will be as well! |
|---|
| | 35 | # input html |
|---|
| | 36 | var $html = ''; |
|---|
| | 37 | # output markdown |
|---|
| | 38 | var $outtext = ''; |
|---|
| | 39 | # some control structures |
|---|
| | 40 | var $p_p = 0; |
|---|
| | 41 | var $start = 1; |
|---|
| | 42 | var $space = 0; |
|---|
| | 43 | var $force_html = false; |
|---|
| | 44 | var $force_html_start = array('tag'=>'','parents'=>0); |
|---|
| | 45 | # links |
|---|
| | 46 | var $a = array(); |
|---|
| | 47 | var $astack = array(); |
|---|
| | 48 | var $acount = 0; |
|---|
| | 49 | # lists |
|---|
| | 50 | var $list = array (); |
|---|
| | 51 | var $list_depth = 0; |
|---|
| | 52 | var $lastWasNL = false; |
|---|
| | 53 | # indenting and appending |
|---|
| | 54 | var $append = ''; |
|---|
| | 55 | var $indent = ''; |
|---|
| | 56 | # these elements will be dropped with all subelements |
|---|
| | 57 | var $drop = array( |
|---|
| | 58 | 'script', |
|---|
| | 59 | 'head', |
|---|
| | 60 | 'style', |
|---|
| | 61 | 'form', |
|---|
| | 62 | ); |
|---|
| | 63 | # these elements will be quietly ignored, their children will be parsed |
|---|
| | 64 | var $ignore = array( |
|---|
| | 65 | 'wrapper', # important! |
|---|
| | 66 | 'html', # closing html tag |
|---|
| | 67 | 'body', # closing body tag |
|---|
| | 68 | 'thead', |
|---|
| | 69 | 'tbody', |
|---|
| | 70 | 'tfoot', |
|---|
| | 71 | ); |
|---|
| | 72 | # these elements can have some attributes |
|---|
| | 73 | var $has_attrs = array( |
|---|
| | 74 | # tag => list of allowed attrs |
|---|
| | 75 | 'h1' => array('id'), |
|---|
| | 76 | 'h2' => array('id'), |
|---|
| | 77 | 'h3' => array('id'), |
|---|
| | 78 | 'h4' => array('id'), |
|---|
| | 79 | 'h5' => array('id'), |
|---|
| | 80 | 'h6' => array('id'), |
|---|
| | 81 | 'a' => array('href','title'), |
|---|
| | 82 | 'img' => array('src','alt','title'), |
|---|
| | 83 | # tables |
|---|
| | 84 | 'th' => array('align'), |
|---|
| | 85 | 'td' => array('align'), |
|---|
| | 86 | # footnotes |
|---|
| | 87 | 'sup' => array('id'), |
|---|
| | 88 | 'footnote' => array('nr'), |
|---|
| | 89 | # abbrevations |
|---|
| | 90 | 'abbr' => array('title'), |
|---|
| | 91 | 'acronym' => array('title'), |
|---|
| | 92 | ); |
|---|
| | 93 | # table |
|---|
| | 94 | var $max_len = array(); |
|---|
| | 95 | var $align = array(); |
|---|
| | 96 | var $cols = array(); |
|---|
| | 97 | var $rows = array(); |
|---|
| | 98 | var $col = 0; |
|---|
| | 99 | var $row = 0; |
|---|
| | 100 | var $header = array(); |
|---|
| | 101 | # parents |
|---|
| | 102 | var $parents = array(); |
|---|
| | 103 | # abbrevations |
|---|
| | 104 | var $abbrs = array(); |
|---|
| | 105 | # buffer |
|---|
| | 106 | var $buffer = array(); |
|---|
| | 107 | var $buffer_lvl = 0; |
|---|
| | 108 | # options |
|---|
| | 109 | var $LINKS_EACH_PARAGRAPH; |
|---|
| | 110 | var $BODY_WIDTH; |
|---|
| | 111 | var $KEEP_HTML; |
|---|
| | 112 | # global xml parser |
|---|
| | 113 | var $xml_parser; |
|---|
| | 114 | /** |
|---|
| | 115 | * setup the xml_parser |
|---|
| | 116 | * $links_each_paragraph: if set to true, the list of links will be |
|---|
| | 117 | * displayed after each paragraph, else it will be displayed on the end of |
|---|
| | 118 | * the file |
|---|
| | 119 | * $body_width: if set to a integer greater 0 the output text will be |
|---|
| | 120 | * wrapped to that width (in characters) |
|---|
| | 121 | * $keep_html: if set to true, all unrecognized html tags will be kept, else |
|---|
| | 122 | * they'll be removed |
|---|
| | 123 | * |
|---|
| | 124 | * @param bool $links_each_paragraph default true |
|---|
| | 125 | * @param integer $body_width default 0 |
|---|
| | 126 | * @param bool $keep_html default true |
|---|
| | 127 | * @return void |
|---|
| | 128 | */ |
|---|
| | 129 | function html2text($links_each_paragraph = true,$body_width = 0,$keep_html = true) { |
|---|
| | 130 | $this->LINKS_EACH_PARAGRAPH = $links_each_paragraph; |
|---|
| | 131 | $this->BODY_WIDTH = $body_width; |
|---|
| | 132 | $this->KEEP_HTML = $keep_html; |
|---|
| | 133 | $this->xml_parser = xml_parser_create(); |
|---|
| | 134 | xml_set_object($this->xml_parser, $this); |
|---|
| | 135 | xml_parser_set_option($this->xml_parser, XML_OPTION_CASE_FOLDING, 0); |
|---|
| | 136 | xml_set_element_handler($this->xml_parser, 'starttag', 'endtag'); |
|---|
| | 137 | xml_set_character_data_handler($this->xml_parser, 'handle_data'); |
|---|
| | 138 | xml_set_default_handler($this->xml_parser,'handle_default'); |
|---|
| | 139 | } |
|---|
| | 140 | /** |
|---|
| | 141 | * parse a html string to text |
|---|
| | 142 | * |
|---|
| | 143 | * @param string $html |
|---|
| | 144 | * @return string |
|---|
| | 145 | */ |
|---|
| | 146 | function load_string($html) { |
|---|
| | 147 | $html = trim($html); |
|---|
| | 148 | if(empty($html)){ |
|---|
| | 149 | return ''; |
|---|
| | 150 | } |
|---|
| | 151 | # use unix style newlines |
|---|
| | 152 | $html = str_replace("\r","\n",str_replace("\r\n","\n",$html)); |
|---|
| | 153 | # remove doctype and xml tags |
|---|
| | 154 | $html = preg_replace('#^.*<body[^>]*>#Us','<html><body>',$html); |
|---|
| | 155 | /* |
|---|
| | 156 | * cope with bad html |
|---|
| | 157 | */ |
|---|
| | 158 | $html = preg_replace('/<img(?!.*\/>)([^>]*)>/Us','<img$1 />',$html); |
|---|
| | 159 | $html = preg_replace('#&(?!amp;)#','&',$html); |
|---|
| | 160 | $html = str_replace('<','<',$html); |
|---|
| | 161 | $html = preg_replace('#<([a-z]+[^>]*) ?/>#Us','<$1 />',$html); |
|---|
| | 162 | # unmatched tags (poor performance) |
|---|
| | 163 | preg_match_all('#<(([a-z]|h[1-6])+)(?= |>)#',$html,$matches); |
|---|
| | 164 | foreach($matches[1] as $tag){ |
|---|
| | 165 | $html = preg_replace('#<'.$tag.'( |>)(.*)</'.$tag.'>#Us','<'.$tag.'$1$2</'.$tag.'>',$html,1); |
|---|
| | 166 | } |
|---|
| | 167 | # encode < to < and & to & inside <pre>|<code> |
|---|
| | 168 | $html = preg_replace_callback('#(<pre[^>]*>\s*<code[^>]*>|<code[^>]*>|<pre[^>]*>)(.*)(</pre>\s*</code>|</code>|</pre>)#Us', |
|---|
| | 169 | create_function( |
|---|
| | 170 | '$matches', |
|---|
| | 171 | 'return $matches[1].str_replace(\'<\',\'<\',$matches[2]).$matches[3];' |
|---|
| | 172 | ),$html); |
|---|
| | 173 | # handle empty attributes (e.g. <input checked>) |
|---|
| | 174 | $html = preg_replace_callback('#<([a-z]+)(?>[^>]* [^=]+(?> [^>]*)?) ?/?>#s',array(&$this,'parse_empty_attribs'),$html); |
|---|
| | 175 | # fake wrapper |
|---|
| | 176 | $html = '<wrapper>'.$html.'</wrapper>'; |
|---|
| | 177 | # footnotes |
|---|
| | 178 | $html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(<li id="fn:\d+">.+</li>)\s*</ol>\s*</div>#Us',array(&$this,'footnotes'),$html); |
|---|
| | 179 | # last newline inside <pre> should not be parsed |
|---|
| | 180 | $html = preg_replace('#\n</code></pre>#s','</code></pre>',$html); |
|---|
| | 181 | # some html elements should not be parsed if their children wont be parsed: |
|---|
| | 182 | if($this->KEEP_HTML){ |
|---|
| | 183 | # <ul|ol><li class="asdf">, complex because we need to handle nested lists |
|---|
| | 184 | if(preg_match('#<li [^>]+>#',$html)){ |
|---|
| | 185 | preg_match_all('#(?:<li [^>]+>|</?(?:ul|ol)[^>]*>)#',$html,$matches,PREG_OFFSET_CAPTURE); |
|---|
| | 186 | $lists = array(); |
|---|
| | 187 | $offset = 0; |
|---|
| | 188 | $ins = ' forcehtml="1"'; |
|---|
| | 189 | $add = strlen($ins); |
|---|
| | 190 | foreach($matches[0] as $k => $a){ |
|---|
| | 191 | if(substr($a[0],0,3) == '<li'){ |
|---|
| | 192 | $list = &$lists[count($lists)-1]; |
|---|
| | 193 | if(!$list['forced']){ |
|---|
| | 194 | $list['forced'] = true; |
|---|
| | 195 | $html = substr_replace($html,$ins,$list['offset']+$offset,0); |
|---|
| | 196 | $offset += $add; |
|---|
| | 197 | } |
|---|
| | 198 | } else { |
|---|
| | 199 | if(substr($a[0],0,2) == '</'){ # close tag |
|---|
| | 200 | array_pop($lists); |
|---|
| | 201 | } else { # open tag |
|---|
| | 202 | array_push($lists,array( |
|---|
| | 203 | 'offset' => $a[1]+3, |
|---|
| | 204 | 'forced' => strstr($a[0],'forcehtml='), |
|---|
| | 205 | )); |
|---|
| | 206 | } |
|---|
| | 207 | } |
|---|
| | 208 | } |
|---|
| | 209 | } |
|---|
| | 210 | # <pre><code class="asdf"> |
|---|
| | 211 | $html = preg_replace('#(?><pre>)\s*(<code .+>)#Us','<pre forcehtml="1">$1',$html); |
|---|
| | 212 | } |
|---|
| | 213 | $this->html = $html; |
|---|
| | 214 | # ok, now lets start parsing! |
|---|
| | 215 | #echo dump($html); |
|---|
| | 216 | $this->parse(); |
|---|
| | 217 | return $this->close(); |
|---|
| | 218 | } |
|---|
| | 219 | /** |
|---|
| | 220 | * clean up footnotes |
|---|
| | 221 | * |
|---|
| | 222 | * @param array $matches |
|---|
| | 223 | * @return string |
|---|
| | 224 | */ |
|---|
| | 225 | function footnotes($matches){ |
|---|
| | 226 | # remove footnote link |
|---|
| | 227 | $matches = preg_replace('@<a href="#fnref:\d+" rev="footnote"[^>]*>&#8617;</a>@U','',$matches[1]); |
|---|
| | 228 | # remove empty paragraph |
|---|
| | 229 | $matches = str_replace('<p></p>','',$matches); |
|---|
| | 230 | # wrap in footnotes tag |
|---|
| | 231 | $matches = '<footnotes>'.$matches.'</footnotes>'; |
|---|
| | 232 | # <li id="fn:1">...</li> -> <footnote nr="1">...</footnote> |
|---|
| | 233 | $matches = str_replace('<li id="fn:','<footnote nr="',$matches); |
|---|
| | 234 | return preg_replace('#</li>\s*(<footnote|</footnotes)#s','</footnote>$1',$matches); |
|---|
| | 235 | } |
|---|
| | 236 | /** |
|---|
| | 237 | * @param array $matches |
|---|
| | 238 | * @return string |
|---|
| | 239 | */ |
|---|
| | 240 | function parse_empty_attribs($matches){ |
|---|
| | 241 | if(preg_match('#^<[a-z]+(?: [a-z]+=(?:"[^"]*"|\'[^\']*\'))+ ?/?>$#s',$matches[0])){ |
|---|
| | 242 | # mismatch, this tag is correct |
|---|
| | 243 | return $matches[0]; |
|---|
| | 244 | } |
|---|
| | 245 | echo dump($matches[0]); |
|---|
| | 246 | die(); |
|---|
| | 247 | $rep = $this->KEEP_HTML ? '$1="$1"' : ''; |
|---|
| | 248 | return '<'.$matches[1].preg_replace('#(?<= )([^ =>]{2,})(?= |$)#Us',$rep,$matches[2]).'>'; |
|---|
| | 249 | } |
|---|
| | 250 | /** |
|---|
| | 251 | * parse a html file to text |
|---|
| | 252 | * |
|---|
| | 253 | * @param string $file |
|---|
| | 254 | * @return string |
|---|
| | 255 | */ |
|---|
| | 256 | function load_file($file) { |
|---|
| | 257 | $contents = file_get_contents($file); |
|---|
| | 258 | if(!$contents){ |
|---|
| | 259 | trigger_error('could not open XML input',E_USER_WARNING); |
|---|
| | 260 | return false; |
|---|
| | 261 | } |
|---|
| | 262 | return $this->load_string($contents); |
|---|
| | 263 | } |
|---|
| | 264 | /** |
|---|
| | 265 | * start parsing html to text |
|---|
| | 266 | * |
|---|
| | 267 | * @param void |
|---|
| | 268 | * @return void |
|---|
| | 269 | */ |
|---|
| | 270 | function parse() { |
|---|
| | 271 | $html = explode("\n", $this->html); |
|---|
| | 272 | foreach ($html as $line) { |
|---|
| | 273 | if (!xml_parse($this->xml_parser, $line . "\n")) { |
|---|
| | 274 | $errcode = xml_get_error_code($this->xml_parser); |
|---|
| | 275 | trigger_error(sprintf("XML error #%d: %s at line %d:<br /><pre><code>%s</code></pre>", $errcode,xml_error_string($errcode), xml_get_current_line_number($this->xml_parser),htmlspecialchars($line)),E_USER_WARNING); |
|---|
| | 276 | #return; |
|---|
| | 277 | } |
|---|
| | 278 | } |
|---|
| | 279 | } |
|---|
| | 280 | /** |
|---|
| | 281 | * close parser and return text |
|---|
| | 282 | * |
|---|
| | 283 | * @param void |
|---|
| | 284 | * @return string |
|---|
| | 285 | */ |
|---|
| | 286 | function close() { |
|---|
| | 287 | xml_parser_free($this->xml_parser); |
|---|
| | 288 | $this->pbr(); |
|---|
| | 289 | $this->o('', false, 'end'); |
|---|
| | 290 | $this->out("\n"); |
|---|
| | 291 | $this->links(); |
|---|
| | 292 | # blockquotes |
|---|
| | 293 | $this->outtext = preg_replace_callback('#^(\s*)((> )+)#m',array(&$this,'cleanup_bq'),$this->outtext); |
|---|
| | 294 | # cleanup |
|---|
| | 295 | $this->outtext = str_replace('&','&',str_replace('<','<',str_replace('>','>',$this->outtext))); |
|---|
| | 296 | # empty lines (not preformatted) |
|---|
| | 297 | $this->outtext = preg_replace('#^\s{1,4}$#m','',$this->outtext); |
|---|
| | 298 | # empty quoted lines |
|---|
| | 299 | $this->outtext = preg_replace('#^(>+)\s{1,5}$#m','$1',$this->outtext); |
|---|
| | 300 | return rtrim($this->optwrap($this->outtext)); |
|---|
| | 301 | } |
|---|
| | 302 | /** |
|---|
| | 303 | * replace "> > > " with ">>> " |
|---|
| | 304 | * |
|---|
| | 305 | * @param array $m matches |
|---|
| | 306 | * @return string |
|---|
| | 307 | */ |
|---|
| | 308 | function cleanup_bq($m){ |
|---|
| | 309 | return $m[1].str_repeat('>',strlen($m[2])/2).' '; |
|---|
| | 310 | } |
|---|
| | 311 | /** |
|---|
| | 312 | * handles html comments |
|---|
| | 313 | * |
|---|
| | 314 | * @param resource $parser |
|---|
| | 315 | * @param string $data |
|---|
| | 316 | * @return void |
|---|
| | 317 | */ |
|---|
| | 318 | function handle_default($parser,$data){ |
|---|
| | 319 | if(substr($data,0,4) == '<!--' && substr($data,-3) == '-->'){ |
|---|
| | 320 | $this->outtext .= "\n\n".$data."\n"; |
|---|
| | 321 | } |
|---|
| | 322 | } |
|---|
| | 323 | /** |
|---|
| | 324 | * adds pure data to the output (e.g. <p>DATA</p>) |
|---|
| | 325 | * |
|---|
| | 326 | * @param resource $parser |
|---|
| | 327 | * @param string $data |
|---|
| | 328 | * @return void |
|---|
| | 329 | */ |
|---|
| | 330 | function handle_data($parser, $data) { |
|---|
| | 331 | $this->o($data, true); |
|---|
| | 332 | } |
|---|
| | 333 | /** |
|---|
| | 334 | * start tags (e.g. <p>) |
|---|
| | 335 | * |
|---|
| | 336 | * @param resource $parser |
|---|
| | 337 | * @param string $tag |
|---|
| | 338 | * @param array $attrs |
|---|
| | 339 | * @return void |
|---|
| | 340 | */ |
|---|
| | 341 | function starttag($parser, $tag, $attrs) { |
|---|
| | 342 | $this->handle_tag($tag, $attrs, true); |
|---|
| | 343 | } |
|---|
| | 344 | /** |
|---|
| | 345 | * end tags (e.g. </p>) |
|---|
| | 346 | * |
|---|
| | 347 | * @param resource $parser |
|---|
| | 348 | * @param string $tag |
|---|
| | 349 | * @return void |
|---|
| | 350 | */ |
|---|
| | 351 | function endtag($parser, $tag) { |
|---|
| | 352 | $this->handle_tag($tag, null, false); |
|---|
| | 353 | } |
|---|
| | 354 | /** |
|---|
| | 355 | * force html output of all children |
|---|
| | 356 | * |
|---|
| | 357 | * @param $tag |
|---|
| | 358 | * @return void |
|---|
| | 359 | */ |
|---|
| | 360 | function force_html($tag){ |
|---|
| | 361 | $this->force_html = true; |
|---|
| | 362 | $this->force_html_start = array( |
|---|
| | 363 | 'tag' => $tag, |
|---|
| | 364 | 'parents' => isset($this->parents[$tag]) ? strlen($this->parents[$tag]) : 0 |
|---|
| | 365 | ); |
|---|
| | 366 | } |
|---|
| | 367 | /** |
|---|
| | 368 | * parsing logic based on tag name |
|---|
| | 369 | * |
|---|
| | 370 | * @param string $tag |
|---|
| | 371 | * @param array $attrs |
|---|
| | 372 | * @param bool $start |
|---|
| | 373 | * @return void |
|---|
| | 374 | */ |
|---|
| | 375 | function handle_tag($tag, $attrs, $start) { |
|---|
| | 376 | if(in_array($tag,$this->drop)){ # drop tags with content |
|---|
| | 377 | if($start){ |
|---|
| | 378 | $this->buffer(); |
|---|
| | 379 | } else { |
|---|
| | 380 | $this->unbuffer(); |
|---|
| | 381 | } |
|---|
| | 382 | return; |
|---|
| | 383 | } |
|---|
| | 384 | if(in_array($tag,$this->ignore)){ # drop tags but keep content |
|---|
| | 385 | return; |
|---|
| | 386 | } |
|---|
| | 387 | # keeping the original html |
|---|
| | 388 | if($this->KEEP_HTML){ |
|---|
| | 389 | if($start){ |
|---|
| | 390 | # is the force html attr set? |
|---|
| | 391 | if(!$this->force_html && isset($attrs['forcehtml'])){ |
|---|
| | 392 | $this->force_html($tag); |
|---|
| | 393 | } |
|---|
| | 394 | # we'll have to keep this tag |
|---|
| | 395 | if($this->force_html) { |
|---|
| | 396 | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| | 397 | return; |
|---|
| | 398 | } else { |
|---|
| | 399 | # tag has attrs which can't be converted |
|---|
| | 400 | if(!empty($attrs) && $this->keep_tag($tag,$attrs,$start)){ |
|---|
| | 401 | return; |
|---|
| | 402 | } |
|---|
| | 403 | } |
|---|
| | 404 | } else { |
|---|
| | 405 | if($this->force_html){ |
|---|
| | 406 | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| | 407 | if($tag == $this->force_html_start['tag'] && strlen($this->parents[$tag]) == $this->force_html_start['parents']){ |
|---|
| | 408 | $this->force_html = false; |
|---|
| | 409 | } |
|---|
| | 410 | return; |
|---|
| | 411 | } elseif($this->parent($tag,'kept') && $this->keep_tag($tag,$attrs,$start)) { |
|---|
| | 412 | return; |
|---|
| | 413 | } |
|---|
| | 414 | } |
|---|
| | 415 | } |
|---|
| | 416 | switch ($tag) { |
|---|
| | 417 | case 'h1' : |
|---|
| | 418 | case 'h2' : |
|---|
| | 419 | case 'h3' : |
|---|
| | 420 | case 'h4' : |
|---|
| | 421 | case 'h5' : |
|---|
| | 422 | case 'h6' : |
|---|
| | 423 | $this->p(); |
|---|
| | 424 | if ($start) { |
|---|
| | 425 | $this->o(str_repeat('#', intval($tag[1])) . ' '); |
|---|
| | 426 | if(!empty($attrs['id'])){ |
|---|
| | 427 | $this->append = ' {#'.$attrs['id'].'}'; |
|---|
| | 428 | } |
|---|
| | 429 | } else { |
|---|
| | 430 | $this->out($this->append); |
|---|
| | 431 | $this->append = ''; |
|---|
| | 432 | } |
|---|
| | 433 | break; |
|---|
| | 434 | case 'div' : |
|---|
| | 435 | $this->p(); |
|---|
| | 436 | break; |
|---|
| | 437 | case 'p' : |
|---|
| | 438 | $this->p(); |
|---|
| | 439 | break; |
|---|
| | 440 | case 'br' : |
|---|
| | 441 | if ($start) { |
|---|
| | 442 | $this->o(" \n"); |
|---|
| | 443 | } |
|---|
| | 444 | break; |
|---|
| | 445 | case 'hr' : |
|---|
| | 446 | if ($start) { |
|---|
| | 447 | $this->p(); |
|---|
| | 448 | $this->o('* * *'); |
|---|
| | 449 | $this->p(); |
|---|
| | 450 | } |
|---|
| | 451 | break; |
|---|
| | 452 | case 'blockquote' : |
|---|
| | 453 | $this->indent('> ',$start); |
|---|
| | 454 | if ($start) { |
|---|
| | 455 | $this->start = true; |
|---|
| | 456 | $this->out("\n\n".$this->indent); |
|---|
| | 457 | } |
|---|
| | 458 | break; |
|---|
| | 459 | case 'em' : |
|---|
| | 460 | case 'i' : |
|---|
| | 461 | case 'u' : |
|---|
| | 462 | $this->o('_'); |
|---|
| | 463 | break; |
|---|
| | 464 | case 'strong' : |
|---|
| | 465 | case 'b' : |
|---|
| | 466 | $this->o('**'); |
|---|
| | 467 | break; |
|---|
| | 468 | # footnotes |
|---|
| | 469 | case 'sup': |
|---|
| | 470 | if($start){ |
|---|
| | 471 | if(count($attrs) != 1 || !isset($attrs['id']) || !preg_match('#^fnref:(\d+)$#',$attrs['id'],$matches)){ |
|---|
| | 472 | # keep tag |
|---|
| | 473 | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| | 474 | return; |
|---|
| | 475 | } |
|---|
| | 476 | # parse footnote |
|---|
| | 477 | $this->out('[^'.$matches[1].']'); |
|---|
| | 478 | # omit output of link (<a href="#fn:1" rel="footnote">1</a>) |
|---|
| | 479 | $this->buffer(); |
|---|
| | 480 | } else { |
|---|
| | 481 | # last sup was not parsed -> keep tag |
|---|
| | 482 | if(!$this->parent('sup')){ |
|---|
| | 483 | $this->keep_tag($tag,$attrs,$start); |
|---|
| | 484 | return; |
|---|
| | 485 | } |
|---|
| | 486 | # sup was parsed -> reset buffer |
|---|
| | 487 | $this->unbuffer(); |
|---|
| | 488 | } |
|---|
| | 489 | break; |
|---|
| | 490 | case 'footnotes': |
|---|
| | 491 | $this->p(); |
|---|
| | 492 | break; |
|---|
| | 493 | case 'footnote': |
|---|
| | 494 | if($start){ |
|---|
| | 495 | $this->o('[^'.$attrs['nr']."]:\n".$this->indent.' '); |
|---|
| | 496 | $this->start = true; |
|---|
| | 497 | } |
|---|
| | 498 | $this->indent(' ',$start); |
|---|
| | 499 | break; |
|---|
| | 500 | case 'a': |
|---|
| | 501 | if($start) { |
|---|
| | 502 | # buffer to check for inline links like <foo@bar.com> and the like |
|---|
| | 503 | if (isset ($attrs['href'])) { |
|---|
| | 504 | $this->buffer(); |
|---|
| | 505 | array_push($this->astack, $attrs); |
|---|
| | 506 | } else { |
|---|
| | 507 | array_push($this->astack, null); |
|---|
| | 508 | } |
|---|
| | 509 | } else { |
|---|
| | 510 | if($this->astack) { |
|---|
| | 511 | $a = array_pop($this->astack); |
|---|
| | 512 | if ($a) { |
|---|
| | 513 | # for emails |
|---|
| | 514 | $a['href'] = $this->decode($a['href']); |
|---|
| | 515 | $buffer = $this->unbuffer(); |
|---|
| | 516 | $buffer_check = $this->decode(trim($buffer)); |
|---|
| | 517 | if((substr($a['href'],0,7) == 'mailto:' && 'mailto:'.$buffer_check == $a['href']) || $a['href'] == $buffer_check){ |
|---|
| | 518 | # inline link |
|---|
| | 519 | $this->out('<'.$buffer_check.'>',true); |
|---|
| | 520 | } else { |
|---|
| | 521 | # block link |
|---|
| | 522 | $this->previousIndex($a); |
|---|
| | 523 | $this->out('['.$buffer.']['.$a['count'].']',true); |
|---|
| | 524 | } |
|---|
| | 525 | } |
|---|
| | 526 | } |
|---|
| | 527 | } |
|---|
| | 528 | break; |
|---|
| | 529 | # abbrevations |
|---|
| | 530 | case 'abbr': |
|---|
| | 531 | case 'acronym': |
|---|
| | 532 | if($start){ |
|---|
| | 533 | $this->buffer(); |
|---|
| | 534 | array_push($this->abbrs,isset($attrs['title'])?$attrs['title']:''); |
|---|
| | 535 | } else { |
|---|
| | 536 | $abbr = $this->unbuffer(); |
|---|
| | 537 | $def = array_pop($this->abbrs); |
|---|
| | 538 | # only add abbr if its not already defined |
|---|
| | 539 | if(!isset($this->abbrs[$abbr])){ |
|---|
| | 540 | $this->abbrs[$abbr] = $def; |
|---|
| | 541 | } |
|---|
| | 542 | $this->o($abbr); |
|---|
| | 543 | } |
|---|
| | 544 | break; |
|---|
| | 545 | case 'img' : |
|---|
| | 546 | if ($start) { |
|---|
| | 547 | if (isset ($attrs['src'])) { |
|---|
| | 548 | $attrs['href'] = $attrs['src']; |
|---|
| | 549 | $alt = ''; |
|---|
| | 550 | if (isset ($attrs['alt'])) { |
|---|
| | 551 | $alt = $attrs['alt']; |
|---|
| | 552 | } elseif(isset($attrs['title'])){ |
|---|
| | 553 | $alt = $attrs['title']; |
|---|
| | 554 | } |
|---|
| | 555 | $this->previousIndex($attrs); |
|---|
| | 556 | $this->o('!['.$alt.'][' . $attrs['count'] . ']'); |
|---|
| | 557 | } |
|---|
| | 558 | } |
|---|
| | 559 | break; |
|---|
| | 560 | case 'code': |
|---|
| | 561 | # do we have to keep this tag? |
|---|
| | 562 | # or is a parent <pre> element existing? |
|---|
| | 563 | if($this->keep_tag($tag,$attrs,$start) || $this->parent('pre')){ |
|---|
| | 564 | return; |
|---|
| | 565 | } |
|---|
| | 566 | # convert to `code` and handle backticks inside code block |
|---|
| | 567 | # <code>foo`bar</code> has to get ``foo`bar`` and so forth |
|---|
| | 568 | if($start){ |
|---|
| | 569 | $this->buffer(); |
|---|
| | 570 | } else { |
|---|
| | 571 | $str = $this->unbuffer(); |
|---|
| | 572 | preg_match_all('#`+#',$str,$matches); |
|---|
| | 573 | if(!empty($matches[0])){ |
|---|
| | 574 | rsort($matches[0]); |
|---|
| | 575 | $len = strlen($matches[0][0])+1; |
|---|
| | 576 | } else { |
|---|
| | 577 | $len = 1; |
|---|
| | 578 | } |
|---|
| | 579 | $ticks = str_repeat('`',$len); |
|---|
| | 580 | $this->out($ticks.$str.$ticks); |
|---|
| | 581 | } |
|---|
| | 582 | break; |
|---|
| | 583 | case 'dl' : |
|---|
| | 584 | # note: if <dl> gets parsed, its direct children (<dd> and <dt>) will be parsed as well |
|---|
| | 585 | if ($start) { |
|---|
| | 586 | $this->p(); |
|---|
| | 587 | } |
|---|
| | 588 | break; |
|---|
| | 589 | case 'dd' : |
|---|
| | 590 | # is the parent dl parsed? |
|---|
| | 591 | if(!$this->parent('dl')){ |
|---|
| | 592 | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| | 593 | return; |
|---|
| | 594 | } |
|---|
| | 595 | if ($start) { |
|---|
| | 596 | $this->o(': '); |
|---|
| | 597 | $this->start = true; |
|---|
| | 598 | } else { |
|---|
| | 599 | $this->outtext .= "\n"; |
|---|
| | 600 | $this->pbr(); |
|---|
| | 601 | } |
|---|
| | 602 | $this->indent(' ',$start); |
|---|
| | 603 | break; |
|---|
| | 604 | case 'dt' : |
|---|
| | 605 | # is the parent dl parsed? |
|---|
| | 606 | if(!$this->parent('dl')){ |
|---|
| | 607 | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| | 608 | return; |
|---|
| | 609 | } |
|---|
| | 610 | if (!$start) { |
|---|
| | 611 | $this->pbr(); |
|---|
| | 612 | } |
|---|
| | 613 | break; |
|---|
| | 614 | case 'ol' : |
|---|
| | 615 | case 'ul' : |
|---|
| | 616 | # note: if this element gets parsed, its direct children <li>s will be parsed as well |
|---|
| | 617 | if ($start) { |
|---|
| | 618 | array_push($this->list, array ( |
|---|
| | 619 | 'name' => $tag, |
|---|
| | 620 | 'num' => 0 |
|---|
| | 621 | )); |
|---|
| | 622 | } else { |
|---|
| | 623 | array_pop($this->list); |
|---|
| | 624 | $this->pbr(); |
|---|
| | 625 | } |
|---|
| | 626 | break; |
|---|
| | 627 | case 'li' : |
|---|
| | 628 | if ($this->list) { |
|---|
| | 629 | $li = &$this->list[count($this->list) - 1]; |
|---|
| | 630 | } |
|---|
| | 631 | # not inside a list or the list tag was not parsed |
|---|
| | 632 | if(!isset($li) || !$this->parent($li['name'])){ |
|---|
| | 633 | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| | 634 | return; |
|---|
| | 635 | } |
|---|
| | 636 | if ($start) { |
|---|
| | 637 | $this->pbr(); |
|---|
| | 638 | if($li['name'] == 'ul'){ |
|---|
| | 639 | $this->o('* '); |
|---|
| | 640 | } else { |
|---|
| | 641 | $li['num']++; |
|---|
| | 642 | /** |
|---|
| | 643 | * @todo line up <ol><li>s > 9 correctly. |
|---|
| | 644 | */ |
|---|
| | 645 | $this->o($li['num'].'. '); |
|---|
| | 646 | } |
|---|
| | 647 | $this->start = true; |
|---|
| | 648 | $this->indent(' ',$start); |
|---|
| | 649 | } else { |
|---|
| | 650 | $this->indent(' ',$start); |
|---|
| | 651 | } |
|---|
| | 652 | break; |
|---|
| | 653 | case 'table': |
|---|
| | 654 | # NOTE: if the <table> tag gets parsed, all its children will be as well! |
|---|
| 646 | | # finally: parse the whole table |
|---|
| 647 | | if(!$start){ |
|---|
| 648 | | $this->outtext .= "\n\n"; |
|---|
| 649 | | $separator = array(); |
|---|
| 650 | | # seperator with correct align identifikators |
|---|
| 651 | | foreach($this->cols as $col => $arr){ |
|---|
| 652 | | $this->max_len[$col] = max($arr); |
|---|
| 653 | | $left = $right = ''; |
|---|
| 654 | | switch($this->align[$col]){ |
|---|
| 655 | | case 'center': |
|---|
| 656 | | $right = ':'; |
|---|
| 657 | | case 'left': |
|---|
| 658 | | $left = ':'; |
|---|
| 659 | | break; |
|---|
| 660 | | case 'right': |
|---|
| 661 | | $right = ':'; |
|---|
| 662 | | break; |
|---|
| 663 | | } |
|---|
| 664 | | array_push($separator,$left.str_repeat('-',$this->max_len[$col]).$right); |
|---|
| 665 | | } |
|---|
| 666 | | $separator = '| '.implode(' | ',$separator).' |'; |
|---|
| 667 | | # set equal width |
|---|
| 668 | | array_walk($this->rows,array(&$this,'fill_td')); |
|---|
| 669 | | $rows = $this->rows; |
|---|
| 670 | | foreach($rows as $row => $cols){ |
|---|
| 671 | | $this->pbr(); |
|---|
| 672 | | $this->o('| '.implode(' | ',$cols).' |'); |
|---|
| 673 | | if(in_array($row,$this->header)){ |
|---|
| 674 | | $this->pbr(); |
|---|
| 675 | | $this->o($separator); |
|---|
| 676 | | } |
|---|
| 677 | | } |
|---|
| 678 | | $this->cols = array(); |
|---|
| 679 | | $this->rows = array(); |
|---|
| 680 | | $this->align = array(); |
|---|
| 681 | | $this->pbr(); |
|---|
| 682 | | } |
|---|
| 683 | | break; |
|---|
| 684 | | case 'tr': |
|---|
| 685 | | # not inside a table or the parent table was not parsed |
|---|
| 686 | | if(!$this->parent('table')){ |
|---|
| 687 | | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| 688 | | return; |
|---|
| 689 | | } |
|---|
| 690 | | if($start){ |
|---|
| 691 | | $this->row++; |
|---|
| 692 | | } else { |
|---|
| 693 | | $this->col = 0; |
|---|
| 694 | | } |
|---|
| 695 | | break; |
|---|
| 696 | | case 'th': |
|---|
| 697 | | # not inside a table or the parent table was not parsed |
|---|
| 698 | | if(!$this->parent('table')){ |
|---|
| 699 | | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| 700 | | return; |
|---|
| 701 | | } |
|---|
| 702 | | if($start){ |
|---|
| 703 | | if(!in_array($this->row,$this->header)){ |
|---|
| 704 | | array_push($this->header,$this->row); |
|---|
| 705 | | } |
|---|
| 706 | | $this->col++; |
|---|
| 707 | | $this->align[$this->col] = !empty($attrs['align']) ? $attrs['align'] : null; |
|---|
| 708 | | } |
|---|
| 709 | | break; |
|---|
| 710 | | case 'td': |
|---|
| 711 | | # not inside a table or the parent table was not parsed |
|---|
| 712 | | if(!$this->parent('table')){ |
|---|
| 713 | | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| 714 | | return; |
|---|
| 715 | | } |
|---|
| 716 | | if($start){ |
|---|
| 717 | | $this->col++; |
|---|
| 718 | | if(!empty($attrs['align']) && is_null($this->align[$this->col])){ |
|---|
| 719 | | $this->align[$this->col] = $attrs['align']; |
|---|
| 720 | | if($attrs['align'] == 'center'){ |
|---|
| 721 | | $this->max_len[$this->col] +=2; |
|---|
| 722 | | } |
|---|
| 723 | | } |
|---|
| 724 | | } |
|---|
| 725 | | break; |
|---|
| 726 | | case 'pre': |
|---|
| 727 | | $this->indent(' ',$start,true); |
|---|
| 728 | | if ($start) { |
|---|
| 729 | | $this->pbr(); |
|---|
| 730 | | } |
|---|
| 731 | | break; |
|---|
| 732 | | default: |
|---|
| 733 | | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| 734 | | return; |
|---|
| 735 | | } |
|---|
| 736 | | # if we want to keep all non convertible html this function has to know if some parent elemts |
|---|
| 737 | | # were parsed or not (also some elements need to know if) |
|---|
| 738 | | if($start){ |
|---|
| 739 | | if(!isset($this->parents[$tag])){ |
|---|
| 740 | | $this->parents[$tag] = '1'; |
|---|
| 741 | | } else { |
|---|
| 742 | | $this->parents[$tag] .= '1'; |
|---|
| 743 | | } |
|---|
| 744 | | } else { |
|---|
| 745 | | if($this->LINKS_EACH_PARAGRAPH && in_array($tag,array('p','ul','blockquote','ol','dl','table','h1','h2','h3','h4','h5','h6'))){ |
|---|
| 746 | | $this->links(); |
|---|
| 747 | | } |
|---|
| 748 | | $this->parents[$tag] = substr($this->parents[$tag],0,-1); |
|---|
| 749 | | } |
|---|
| 750 | | return; |
|---|
| 751 | | } |
|---|
| 752 | | /** |
|---|
| 753 | | * adds a string to the output ($this->outtext) |
|---|
| 754 | | * also copes with tables |
|---|
| 755 | | * |
|---|
| 756 | | * @param string $str |
|---|
| 757 | | * @return void |
|---|
| 758 | | */ |
|---|
| 759 | | function out($str) { |
|---|
| 760 | | # buffering |
|---|
| 761 | | if($this->buffer_lvl){ |
|---|
| 762 | | $this->buffer[$this->buffer_lvl] .= $str; |
|---|
| 763 | | return; |
|---|
| 764 | | } |
|---|
| 765 | | # this is for tables (see php markdown extra by michel fortin) |
|---|
| 766 | | if(($this->parent('th') || $this->parent('td'))){ |
|---|
| 767 | | $str = trim($str); |
|---|
| 768 | | if(!isset($this->rows[$this->row][$this->col])){ |
|---|
| 769 | | $this->rows[$this->row][$this->col] = $str; |
|---|
| 770 | | } else { |
|---|
| 771 | | $this->rows[$this->row][$this->col] .= $str; |
|---|
| 772 | | } |
|---|
| 773 | | if(!isset($this->cols[$this->col][$this->row])){ |
|---|
| 774 | | $this->cols[$this->col][$this->row] = strlen($str); |
|---|
| 775 | | } else { |
|---|
| 776 | | $this->cols[$this->col][$this->row] += strlen($str); |
|---|
| 777 | | } |
|---|
| 778 | | return; |
|---|
| 779 | | } |
|---|
| 780 | | $this->outtext .= $str; |
|---|
| 781 | | } |
|---|
| 782 | | /** |
|---|
| 783 | | * further parse the output and add newlines, remove whitespaces and such |
|---|
| 784 | | * |
|---|
| 785 | | * @param string $data |
|---|
| 786 | | * @param bool $puredata |
|---|
| 787 | | * @param string $force |
|---|
| 788 | | * @return void |
|---|
| 789 | | */ |
|---|
| 790 | | function o($data, $puredata = false, $force = false) { |
|---|
| 791 | | if($this->parent('table') && trim($data) == ''){ # drop whitespaces inside tables |
|---|
| 792 | | return; |
|---|
| 793 | | } elseif ($puredata && !$this->parent('code','both') && !$this->parent('pre','both')) { # keep whitespace for code |
|---|
| 794 | | $data = preg_replace('#\s+#', ' ', $data); |
|---|
| 795 | | } |
|---|
| 796 | | if (!$data && !$force) { |
|---|
| 797 | | return; |
|---|
| 798 | | } |
|---|
| 799 | | if (!empty($this->indent)) { |
|---|
| 800 | | $data = str_replace("\n", "\n".$this->indent, $data); |
|---|
| 801 | | } |
|---|
| 802 | | if ($this->start) { |
|---|
| 803 | | if($data == ' '){ |
|---|
| 804 | | return; |
|---|
| 805 | | } |
|---|
| 806 | | $this->p_p = 0; |
|---|
| 807 | | $this->start = 0; |
|---|
| 808 | | } |
|---|
| 809 | | if ($force == 'end') { |
|---|
| 810 | | # It's the end. |
|---|
| 811 | | $this->p_p = 0; |
|---|
| 812 | | $this->out("\n"); |
|---|
| 813 | | } |
|---|
| 814 | | if ($this->p_p) { |
|---|
| 815 | | if($data == ' '){ |
|---|
| 816 | | return; |
|---|
| 817 | | } |
|---|
| 818 | | $data = ltrim($data); |
|---|
| 819 | | $this->out(str_repeat("\n".$this->indent, $this->p_p)); |
|---|
| 820 | | } |
|---|
| 821 | | $this->p_p = 0; |
|---|
| 822 | | $this->out($data); |
|---|
| 823 | | if($data){ |
|---|
| 824 | | $this->lastWasNL = substr($data, -1) == "\n"; |
|---|
| 825 | | } |
|---|
| 826 | | } |
|---|
| 827 | | /** |
|---|
| 828 | | * display block links after paragraph etc. |
|---|
| 829 | | * also handle abbrs |
|---|
| 830 | | * |
|---|
| 831 | | * @param void |
|---|
| 832 | | * @return void |
|---|
| 833 | | */ |
|---|
| 834 | | function links(){ |
|---|
| 835 | | $this->abbrs(); |
|---|
| 836 | | if(empty($this->a)){ |
|---|
| 837 | | return; # no links stored |
|---|
| 838 | | } |
|---|
| 839 | | $pre = ''; |
|---|
| 840 | | $this->out("\n\n"); |
|---|
| 841 | | foreach($this->a as $links){ |
|---|
| 842 | | /** |
|---|
| 843 | | * @todo base href |
|---|
| 844 | | */ |
|---|
| 845 | | foreach($links as $link){ |
|---|
| 846 | | $a = $pre.' [' . $link['count'] . ']: ' . $link['href']; |
|---|
| 847 | | if (isset ($link['title'])) { |
|---|
| 848 | | $a .= ' (' . $link['title'] . ')'; |
|---|
| 849 | | } |
|---|
| 850 | | $this->out($a."\n"); |
|---|
| 851 | | } |
|---|
| 852 | | } |
|---|
| 853 | | $this->a = array(); |
|---|
| 854 | | $this->out("\n"); |
|---|
| 855 | | $this->lastWasNL = true; |
|---|
| 856 | | } |
|---|
| 857 | | /** |
|---|
| 858 | | * display abbr list |
|---|
| 859 | | * |
|---|
| 860 | | * @param void |
|---|
| 861 | | * @return void |
|---|
| 862 | | */ |
|---|
| 863 | | function abbrs(){ |
|---|
| 864 | | if(empty($this->abbrs)){ |
|---|
| 865 | | return; # no abbrs stored |
|---|
| 866 | | } |
|---|
| 867 | | $this->out("\n\n"); |
|---|
| 868 | | foreach($this->abbrs as $abbr => $def){ |
|---|
| 869 | | $this->out('*['.$abbr.']: '.$def."\n"); |
|---|
| 870 | | } |
|---|
| 871 | | $this->abbrs = array(); |
|---|
| 872 | | $this->out("\n"); |
|---|
| 873 | | $this->lastWasNL = true; |
|---|
| 874 | | } |
|---|
| 875 | | /** |
|---|
| 876 | | * if the link is already set use its count, else increase acount |
|---|
| 877 | | * |
|---|
| 878 | | * @param array &$attrs link attributes |
|---|
| 879 | | * @return void |
|---|
| 880 | | */ |
|---|
| 881 | | function previousIndex(&$attrs) { |
|---|
| 882 | | # check for existing link |
|---|
| 883 | | if(isset($this->a[$attrs['href']])){ |
|---|
| 884 | | foreach($this->a[$attrs['href']] as $a){ |
|---|
| 885 | | if (!empty($attrs['title']) || !empty($a['title'])){ |
|---|
| 886 | | if($a['title'] == $attrs['title']) { |
|---|
| 887 | | $attrs = $a; |
|---|
| 888 | | return; |
|---|
| 889 | | } |
|---|
| 890 | | } else { |
|---|
| 891 | | $attrs = $a; |
|---|
| 892 | | return; |
|---|
| 893 | | } |
|---|
| 894 | | } |
|---|
| 895 | | } |
|---|
| 896 | | # if we come here, no matching link was found |
|---|
| 897 | | $this->acount++; |
|---|
| 898 | | $attrs['count'] = $this->acount; |
|---|
| 899 | | if(isset($this->a[$attrs['href']])){ |
|---|
| 900 | | array_push($this->a[$attrs['href']],$attrs); |
|---|
| 901 | | } else { |
|---|
| 902 | | $this->a[$attrs['href']] = array($attrs); |
|---|
| 903 | | } |
|---|
| 904 | | } |
|---|
| 905 | | /** |
|---|
| 906 | | * handles bad html to avoid xml parse errors |
|---|
| 907 | | * |
|---|
| 908 | | * @param string $html |
|---|
| 909 | | * @return string |
|---|
| 910 | | */ |
|---|
| 911 | | function handle_bad_html($html){ |
|---|
| 912 | | return preg_replace_callback('#<([a-z1-6]+)( [^>]*)?>(.*(?R).*)</\\1>#Us',array(&$this,'replace_bad_html'),$html); |
|---|
| 913 | | } |
|---|
| 914 | | /** |
|---|
| 915 | | * callback function which is used in handle_bad_html() |
|---|
| 916 | | * |
|---|
| 917 | | * @param array $matches |
|---|
| 918 | | * @return string |
|---|
| 919 | | */ |
|---|
| 920 | | function replace_bad_html($matches){ |
|---|
| 921 | | # recursion |
|---|
| 922 | | $matches[3] = $this->handle_bad_html($matches[3]); |
|---|
| 923 | | return '<'.$matches[1].$matches[2].'>'.$matches[3].'</'.$matches[1].'>'; |
|---|
| 924 | | } |
|---|
| 925 | | /** |
|---|
| 926 | | * if the option BODY_WIDTH is set, this option will wrap text to the |
|---|
| 927 | | * provided width |
|---|
| 928 | | * |
|---|
| 929 | | * @param string $text |
|---|
| 930 | | * @return string |
|---|
| 931 | | * |
|---|
| 932 | | * @todo wrapping of code (also kept code blocks) |
|---|
| 933 | | */ |
|---|
| 934 | | function optwrap($text) { |
|---|
| 935 | | if ($this->BODY_WIDTH < 30) { |
|---|
| 936 | | return $text; |
|---|
| 937 | | } |
|---|
| 938 | | $result = ''; |
|---|
| 939 | | $split = explode("\n", $text); |
|---|
| 940 | | foreach ($split as $para) { |
|---|
| 941 | | if (strlen($para) > 0) { |
|---|
| 942 | | if (preg_match('#^(\s*): #',$para,$indent)) { # definition lists |
|---|
| 943 | | $indent = isset($indent[1]) ? $indent[1] : ''; |
|---|
| 944 | | $result .= wordwrap($para, $this->BODY_WIDTH - strlen($indent) - 4, "\n".$indent.' ')."\n"; |
|---|
| 945 | | } elseif(preg_match('#^(\s*>+)#',$para,$indent)){ # blockquote |
|---|
| 946 | | $result .= wordwrap($para,$this->BODY_WIDTH - (strlen($indent[0])+1),"\n".$indent[0].' ')."\n"; |
|---|
| 947 | | } elseif(preg_match('#^\s*\|#',$para)){ # table |
|---|
| 948 | | $result .= $para."\n"; # dont wrap |
|---|
| 949 | | } elseif(preg_match('#^(\s*)\*#',$para,$indent)) { # list item @todo: ol |
|---|
| 950 | | $indent = isset($indent[1]) ? $indent[1] : ''; |
|---|
| 951 | | $indent.= ' '; |
|---|
| 952 | | $result .= wordwrap($para,$this->BODY_WIDTH - strlen($indent),"\n".$indent). "\n"; |
|---|
| 953 | | } elseif(preg_match('#^ \[[^\]]+\]:#',$para)){ # block links |
|---|
| 954 | | # don't wrap at the moment |
|---|
| 955 | | $result .= $para."\n"; |
|---|
| 956 | | continue; |
|---|
| 957 | | } else { # something else |
|---|
| 958 | | preg_match('#^\s+#',$para,$indent); |
|---|
| 959 | | $indent = isset($indent[0]) ? $indent[0] : ''; |
|---|
| 960 | | $result .= wordwrap($para,$this->BODY_WIDTH - strlen($indent),"\n".$indent). "\n"; |
|---|
| 961 | | } |
|---|
| 962 | | } else { |
|---|
| 963 | | $result .= "\n"; |
|---|
| 964 | | } |
|---|
| 965 | | } |
|---|
| 966 | | return $result; |
|---|
| 967 | | } |
|---|
| 968 | | /** |
|---|
| 969 | | * handles html tags which are not represented by the parser logic |
|---|
| 970 | | * if $this->KEEP_HTML is set to true, the tag will be appended to the |
|---|
| 971 | | * output and `markdown="1"` added to its attributes |
|---|
| 972 | | * |
|---|
| 973 | | * @param string $tag |
|---|
| 974 | | * @param array $attrs |
|---|
| 975 | | * @param bool $start |
|---|
| 976 | | * @param array $known_attrs these attrs can be handled by markdown |
|---|
| 977 | | * @return bool |
|---|
| 978 | | */ |
|---|
| 979 | | function keep_tag($tag,$attrs,$start,$force = false){ |
|---|
| 980 | | if(!$force && !$this->KEEP_HTML){ |
|---|
| 981 | | return false; |
|---|
| 982 | | } |
|---|
| | 656 | # finally: parse the whole table |
|---|
| | 657 | if(!$start){ |
|---|
| | 658 | $this->outtext .= "\n\n"; |
|---|
| | 659 | $separator = array(); |
|---|
| | 660 | # seperator with correct align identifikators |
|---|
| | 661 | foreach($this->cols as $col => $arr){ |
|---|
| | 662 | $this->max_len[$col] = max($arr); |
|---|
| | 663 | $left = $right = ''; |
|---|
| | 664 | switch($this->align[$col]){ |
|---|
| | 665 | case 'center': |
|---|
| | 666 | $right = ':'; |
|---|
| | 667 | case 'left': |
|---|
| | 668 | $left = ':'; |
|---|
| | 669 | break; |
|---|
| | 670 | case 'right': |
|---|
| | 671 | $right = ':'; |
|---|
| | 672 | break; |
|---|
| | 673 | } |
|---|
| | 674 | array_push($separator,$left.str_repeat('-',$this->max_len[$col]).$right); |
|---|
| | 675 | } |
|---|
| | 676 | $separator = '| '.implode(' | ',$separator).' |'; |
|---|
| | 677 | # set equal width |
|---|
| | 678 | array_walk($this->rows,array(&$this,'fill_td')); |
|---|
| | 679 | $rows = $this->rows; |
|---|
| | 680 | foreach($rows as $row => $cols){ |
|---|
| | 681 | $this->pbr(); |
|---|
| | 682 | $this->o('| '.implode(' | ',$cols).' |'); |
|---|
| | 683 | if(in_array($row,$this->header)){ |
|---|
| | 684 | $this->pbr(); |
|---|
| | 685 | $this->o($separator); |
|---|
| | 686 | } |
|---|
| | 687 | } |
|---|
| | 688 | $this->cols = array(); |
|---|
| | 689 | $this->rows = array(); |
|---|
| | 690 | $this->align = array(); |
|---|
| | 691 | $this->pbr(); |
|---|
| | 692 | } |
|---|
| | 693 | break; |
|---|
| | 694 | case 'tr': |
|---|
| | 695 | # not inside a table or the parent table was not parsed |
|---|
| | 696 | if(!$this->parent('table')){ |
|---|
| | 697 | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| | 698 | return; |
|---|
| | 699 | } |
|---|
| | 700 | if($start){ |
|---|
| | 701 | $this->row++; |
|---|
| | 702 | } else { |
|---|
| | 703 | $this->col = 0; |
|---|
| | 704 | } |
|---|
| | 705 | break; |
|---|
| | 706 | case 'th': |
|---|
| | 707 | # not inside a table or the parent table was not parsed |
|---|
| | 708 | if(!$this->parent('table')){ |
|---|
| | 709 | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| | 710 | return; |
|---|
| | 711 | } |
|---|
| | 712 | if($start){ |
|---|
| | 713 | if(!in_array($this->row,$this->header)){ |
|---|
| | 714 | array_push($this->header,$this->row); |
|---|
| | 715 | } |
|---|
| | 716 | $this->col++; |
|---|
| | 717 | $this->align[$this->col] = !empty($attrs['align']) ? $attrs['align'] : null; |
|---|
| | 718 | } |
|---|
| | 719 | break; |
|---|
| | 720 | case 'td': |
|---|
| | 721 | # not inside a table or the parent table was not parsed |
|---|
| | 722 | if(!$this->parent('table')){ |
|---|
| | 723 | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| | 724 | return; |
|---|
| | 725 | } |
|---|
| | 726 | if($start){ |
|---|
| | 727 | $this->col++; |
|---|
| | 728 | if(!empty($attrs['align']) && is_null($this->align[$this->col])){ |
|---|
| | 729 | $this->align[$this->col] = $attrs['align']; |
|---|
| | 730 | if($attrs['align'] == 'center'){ |
|---|
| | 731 | $this->max_len[$this->col] +=2; |
|---|
| | 732 | } |
|---|
| | 733 | } |
|---|
| | 734 | } |
|---|
| | 735 | break; |
|---|
| | 736 | case 'pre': |
|---|
| | 737 | $this->indent(' ',$start,true); |
|---|
| | 738 | if ($start) { |
|---|
| | 739 | $this->pbr(); |
|---|
| | 740 | } |
|---|
| | 741 | break; |
|---|
| | 742 | default: |
|---|
| | 743 | $this->keep_tag($tag,$attrs,$start,true); |
|---|
| | 744 | return; |
|---|
| | 745 | } |
|---|
| | 746 | # if we want to keep all non convertible html this function has to know if some parent elemts |
|---|
| | 747 | # were parsed or not (also some elements need to know if) |
|---|
| | 748 | if($start){ |
|---|
| | 749 | if(!isset($this->parents[$tag])){ |
|---|
| | 750 | $this->parents[$tag] = '1'; |
|---|
| | 751 | } else { |
|---|
| | 752 | $this->parents[$tag] .= '1'; |
|---|
| | 753 | } |
|---|
| | 754 | } else { |
|---|
| | 755 | if($this->LINKS_EACH_PARAGRAPH && in_array($tag,array('p','ul','blockquote','ol','dl','table','h1','h2','h3','h4','h5','h6'))){ |
|---|
| | 756 | $this->links(); |
|---|
| | 757 | } |
|---|
| | 758 | $this->parents[$tag] = substr($this->parents[$tag],0,-1); |
|---|
| | 759 | } |
|---|
| | 760 | return; |
|---|
| | 761 | } |
|---|
| | 762 | /** |
|---|
| | 763 | * adds a string to the output ($this->outtext) |
|---|
| | 764 | * also copes with tables |
|---|
| | 765 | * |
|---|
| | 766 | * @param string $str |
|---|
| | 767 | * @return void |
|---|
| | 768 | */ |
|---|
| | 769 | function out($str) { |
|---|
| | 770 | # buffering |
|---|
| | 771 | if($this->buffer_lvl){ |
|---|
| | 772 | $this->buffer[$this->buffer_lvl] .= $str; |
|---|
| | 773 | return; |
|---|
| | 774 | } |
|---|
| | 775 | # this is for tables (see php markdown extra by michel fortin) |
|---|
| | 776 | if(($this->parent('th') || $this->parent('td'))){ |
|---|
| | 777 | $str = trim($str); |
|---|
| | 778 | if(!isset($this->rows[$this->row][$this->col])){ |
|---|
| | 779 | $this->rows[$this->row][$this->col] = $str; |
|---|
| | 780 | } else { |
|---|
| | 781 | $this->rows[$this->row][$this->col] .= $str; |
|---|
| | 782 | } |
|---|
| | 783 | if(!isset($this->cols[$this->col][$this->row])){ |
|---|
| | 784 | $this->cols[$this->col][$this->row] = strlen($str); |
|---|
| | 785 | } else { |
|---|
| | 786 | $this->cols[$this->col][$this->row] += strlen($str); |
|---|
| | 787 | } |
|---|
| | 788 | return; |
|---|
| | 789 | } |
|---|
| | 790 | $this->outtext .= $str; |
|---|
| | 791 | } |
|---|
| | 792 | /** |
|---|
| | 793 | * further parse the output and add newlines, remove whitespaces and such |
|---|
| | 794 | * |
|---|
| | 795 | * @param string $data |
|---|
| | 796 | * @param bool $puredata |
|---|
| | 797 | * @param string $force |
|---|
| | 798 | * @return void |
|---|
| | 799 | */ |
|---|
| | 800 | function o($data, $puredata = false, $force = false) { |
|---|
| | 801 | if($this->parent('table') && trim($data) == ''){ # drop whitespaces inside tables |
|---|
| | 802 | return; |
|---|
| | 803 | } elseif ($puredata && !$this->parent('code','both') && !$this->parent('pre','both')) { # keep whitespace for code |
|---|
| | 804 | $data = preg_replace('#\s+#', ' ', $data); |
|---|
| | 805 | } |
|---|
| | 806 | if (!$data && !$force) { |
|---|
| | 807 | return; |
|---|
| | 808 | } |
|---|
| | 809 | if (!empty($this->indent)) { |
|---|
| | 810 | $data = str_replace("\n", "\n".$this->indent, $data); |
|---|
| | 811 | } |
|---|
| | 812 | if ($this->start) { |
|---|
| | 813 | if($data == ' '){ |
|---|
| | 814 | return; |
|---|
| | 815 | } |
|---|
| | 816 | $this->p_p = 0; |
|---|
| | 817 | $this->start = 0; |
|---|
| | 818 | } |
|---|
| | 819 | if ($force == 'end') { |
|---|
| | 820 | # It's the end. |
|---|
| | 821 | $this->p_p = 0; |
|---|
| | 822 | $this->out("\n"); |
|---|
| | 823 | } |
|---|
| | 824 | if ($this->p_p) { |
|---|
| | 825 | if($data == ' '){ |
|---|
| | 826 | return; |
|---|
| | 827 | } |
|---|
| | 828 | $data = ltrim($data); |
|---|
| | 829 | $this->out(str_repeat("\n".$this->indent, $this->p_p)); |
|---|
| | 830 | } |
|---|
| | 831 | $this->p_p = 0; |
|---|
| | 832 | $this->out($data); |
|---|
| | 833 | if($data){ |
|---|
| | 834 | $this->lastWasNL = substr($data, -1) == "\n"; |
|---|
| | 835 | } |
|---|
| | 836 | } |
|---|
| | 837 | /** |
|---|
| | 838 | * display block links after paragraph etc. |
|---|
| | 839 | * also handle abbrs |
|---|
| | 840 | * |
|---|
| | 841 | * @param void |
|---|
| | 842 | * @return void |
|---|
| | 843 | */ |
|---|
| | 844 | function links(){ |
|---|
| | 845 | $this->abbrs(); |
|---|
| | 846 | if(empty($this->a)){ |
|---|
| | 847 | return; # no links stored |
|---|
| | 848 | } |
|---|
| | 849 | $pre = ''; |
|---|
| | 850 | $this->out("\n\n"); |
|---|
| | 851 | foreach($this->a as $links){ |
|---|
| | 852 | /** |
|---|
| | 853 | * @todo base href |
|---|
| | 854 | */ |
|---|
| | 855 | foreach($links as $link){ |
|---|
| | 856 | $a = $pre.' [' . $link['count'] . ']: ' . $link['href']; |
|---|
| | 857 | if (isset ($link['title'])) { |
|---|
| | 858 | $a .= ' (' . $link['title'] . ')'; |
|---|
| | 859 | } |
|---|
| | 860 | $this->out($a."\n"); |
|---|
| | 861 | } |
|---|
| | 862 | } |
|---|
| | 863 | $this->a = array(); |
|---|
| | 864 | $this->out("\n"); |
|---|
| | 865 | $this->lastWasNL = true; |
|---|
| | 866 | } |
|---|
| | 867 | /** |
|---|
| | 868 | * display abbr list |
|---|
| | 869 | * |
|---|
| | 870 | * @param void |
|---|
| | 871 | * @return void |
|---|
| | 872 | */ |
|---|
| | 873 | function abbrs(){ |
|---|
| | 874 | if(empty($this->abbrs)){ |
|---|
| | 875 | return; # no abbrs stored |
|---|
| | 876 | } |
|---|
| | 877 | $this->out("\n\n"); |
|---|
| | 878 | foreach($this->abbrs as $abbr => $def){ |
|---|
| | 879 | $this->out('*['.$abbr.']: '.$def."\n"); |
|---|
| | 880 | } |
|---|
| | 881 | $this->abbrs = array(); |
|---|
| | 882 | $this->out("\n"); |
|---|
| | 883 | $this->lastWasNL = true; |
|---|
| | 884 | } |
|---|
| | 885 | /** |
|---|
| | 886 | * if the link is already set use its count, else increase acount |
|---|
| | 887 | * |
|---|
| | 888 | * @param array &$attrs link attributes |
|---|
| | 889 | * @return void |
|---|
| | 890 | */ |
|---|
| | 891 | function previousIndex(&$attrs) { |
|---|
| | 892 | # check for existing link |
|---|
| | 893 | if(isset($this->a[$attrs['href']])){ |
|---|
| | 894 | foreach($this->a[$attrs['href']] as $a){ |
|---|
| | 895 | if (!empty($attrs['title']) || !empty($a['title'])){ |
|---|
| | 896 | if($a['title'] == $attrs['title']) { |
|---|
| | 897 | $attrs = $a; |
|---|
| | 898 | return; |
|---|
| | 899 | } |
|---|
| | 900 | } else { |
|---|
| | 901 | $attrs = $a; |
|---|
| | 902 | return; |
|---|
| | 903 | } |
|---|
| | 904 | } |
|---|
| | 905 | } |
|---|
| | 906 | # if we come here, no matching link was found |
|---|
| | 907 | $this->acount++; |
|---|
| | 908 | $attrs['count'] = $this->acount; |
|---|
| | 909 | if(isset($this->a[$attrs['href']])){ |
|---|
| | 910 | array_push($this->a[$attrs['href']],$attrs); |
|---|
| | 911 | } else { |
|---|
| | 912 | $this->a[$attrs['href']] = array($attrs); |
|---|
| | 913 | } |
|---|
| | 914 | } |
|---|
| | 915 | /** |
|---|
| | 916 | * handles bad html to avoid xml parse errors |
|---|
| | 917 | * |
|---|
| | 918 | * @param string $html |
|---|
| | 919 | * @return string |
|---|
| | 920 | */ |
|---|
| | 921 | function handle_bad_html($html){ |
|---|
| | 922 | return preg_replace_callback('#<([a-z1-6]+)( [^>]*)?>(.*(?R).*)</\\1>#Us',array(&$this,'replace_bad_html'),$html); |
|---|
| | 923 | } |
|---|
| | 924 | /** |
|---|
| | 925 | * callback function which is used in handle_bad_html() |
|---|
| | 926 | * |
|---|
| | 927 | * @param array $matches |
|---|
| | 928 | * @return string |
|---|
| | 929 | */ |
|---|
| | 930 | function replace_bad_html($matches){ |
|---|
| | 931 | # recursion |
|---|
| | 932 | $matches[3] = $this->handle_bad_html($matches[3]); |
|---|
| | 933 | return '<'.$matches[1].$matches[2].'>'.$matches[3].'</'.$matches[1].'>'; |
|---|
| | 934 | } |
|---|
| | 935 | /** |
|---|
| | 936 | * if the option BODY_WIDTH is set, this option will wrap text to the |
|---|
| | 937 | * provided width |
|---|
| | 938 | * |
|---|
| | 939 | * @param string $text |
|---|
| | 940 | * @return string |
|---|
| | 941 | * |
|---|
| | 942 | * @todo wrapping of code (also kept code blocks) |
|---|
| | 943 | */ |
|---|
| | 944 | function optwrap($text) { |
|---|
| | 945 | if ($this->BODY_WIDTH < 30) { |
|---|
| | 946 | return $text; |
|---|
| | 947 | } |
|---|
| | 948 | $result = ''; |
|---|
| | 949 | $split = explode("\n", $text); |
|---|
| | 950 | foreach ($split as $para) { |
|---|
| | 951 | if (strlen($para) > 0) { |
|---|
| | 952 | if (preg_match('#^(\s*): #',$para,$indent)) { # definition lists |
|---|
| | 953 | $indent = isset($indent[1]) ? $indent[1] : ''; |
|---|
| | 954 | $result .= wordwrap($para, $this->BODY_WIDTH - strlen($indent) - 4, "\n".$indent.' ')."\n"; |
|---|
| | 955 | } elseif(preg_match('#^(\s*>+)#',$para,$indent)){ # blockquote |
|---|
| | 956 | $result .= wordwrap($para,$this->BODY_WIDTH - (strlen($indent[0])+1),"\n".$indent[0].' ')."\n"; |
|---|
| | 957 | } elseif(preg_match('#^\s*\|#',$para)){ # table |
|---|
| | 958 | $result .= $para."\n"; # dont wrap |
|---|
| | 959 | } elseif(preg_match('#^(\s*)\*#',$para,$indent)) { # list item @todo: ol |
|---|
| | 960 | $indent = isset($indent[1]) ? $indent[1] : ''; |
|---|
| | 961 | $indent.= ' '; |
|---|
| | 962 | $result .= wordwrap($para,$this->BODY_WIDTH - strlen($indent),"\n".$indent). "\n"; |
|---|
| | 963 | } elseif(preg_match('#^ \[[^\]]+\]:#',$para)){ # block links |
|---|
| | 964 | # don't wrap at the moment |
|---|
| | 965 | $result .= $para."\n"; |
|---|
| | 966 | continue; |
|---|
| | 967 | } else { # something else |
|---|
| | 968 | preg_match('#^\s+#',$para,$indent); |
|---|
| | 969 | $indent = isset($indent[0]) ? $indent[0] : ''; |
|---|
| | 970 | $result .= wordwrap($para,$this->BODY_WIDTH - strlen($indent),"\n".$indent). "\n"; |
|---|
| | 971 | } |
|---|
| | 972 | } else { |
|---|
| | 973 | $result .= "\n"; |
|---|
| | 974 | } |
|---|
| | 975 | } |
|---|
| | 976 | return $result; |
|---|
| | 977 | } |
|---|
| | 978 | /** |
|---|
| | 979 | * handles html tags which are not represented by the parser logic |
|---|
| | 980 | * if $this->KEEP_HTML is set to true, the tag will be appended to the |
|---|
| | 981 | * output and `markdown="1"` added to its attributes |
|---|
| | 982 | * |
|---|
| | 983 | * @param string $tag |
|---|
| | 984 | * @param array $attrs |
|---|
| | 985 | * @param bool $start |
|---|
| | 986 | * @param array $known_attrs these attrs can be handled by markdown |
|---|
| | 987 | * @return bool |
|---|
| | 988 | */ |
|---|
| | 989 | function keep_tag($tag,$attrs,$start,$force = false){ |
|---|
| | 990 | if(!$force && !$this->KEEP_HTML){ |
|---|
| | 991 | return false; |
|---|
| | 992 | } |
|---|
| 984 | | # start tag |
|---|
| 985 | | if($start){ |
|---|
| 986 | | # if there is a attr which cannot be handled by markdown |
|---|
| 987 | | # this tag will be kept. |
|---|
| 988 | | if(isset($this->has_attrs[$tag])){ |
|---|
| 989 | | $known_attrs = $this->has_attrs[$tag]; |
|---|
| 990 | | } else { |
|---|
| 991 | | $known_attrs = array(); |
|---|
| 992 | | } |
|---|
| 993 | | if(!$force && count($known_attrs) >= count($attrs)){ |
|---|
| 994 | | if(empty($attrs) || count(array_diff(array_keys($attrs),$known_attrs)) == 0){ |
|---|
| 995 | | # tag can be handled by markdown! |
|---|
| 996 | | return false; |
|---|
| 997 | | } |
|---|
| 998 | | } |
|---|
| 999 | | $attr = ''; |
|---|
| 1000 | | if(!empty($attrs)){ |
|---|
| 1001 | | foreach($attrs as $key => $value){ |
|---|
| 1002 | | if($key == 'forcehtml'){ |
|---|
| 1003 | | continue; |
|---|
| 1004 | | } |
|---|
| 1005 | | $attr.=' '.$key.'="'.$value.'"'; |
|---|
| 1006 | | } |
|---|
| 1007 | | } |
|---|
| 1008 | | if(!$force && in_array($tag,array('div','center','li','dt','dd'))){ |
|---|
| 1009 | | $attr.= ' markdown="1"'; |
|---|
| 1010 | | } elseif(!$this->force_html) { |
|---|
| 1011 | | $this->force_html($tag); |
|---|
| 1012 | | } |
|---|
| 1013 | | $this->o('<'.$tag.$attr.'>',true); |
|---|
| 1014 | | # add to list of parents: |
|---|
| 1015 | | if(isset($this->parents[$tag])){ |
|---|
| 1016 | | $this->parents[$tag] .= '2'; |
|---|
| 1017 | | } else { |
|---|
| 1018 | | $this->parents[$tag] = '2'; |
|---|
| 1019 | | } |
|---|
| 1020 | | # close tag |
|---|
| 1021 | | } else { |
|---|
| 1022 | | if(!$force && !$this->parent($tag,'kept')){ |
|---|
| 1023 | | # the start tag of this element was not parsed |
|---|
| 1024 | | return false; |
|---|
| 1025 | | } |
|---|
| 1026 | | $this->o('</'.$tag.'>'); |
|---|
| 1027 | | $this->parents[$tag] = substr($this->parents[$tag],0,-1); |
|---|
| 1028 | | # newlines after </tag> |
|---|
| 1029 | | if(in_array($tag,array('th','td','dt','dd','li','p'))){ |
|---|
| 1030 | | $this->o("\n"); |
|---|
| 1031 | | } |
|---|
| 1032 | | } |
|---|
| 1033 | | # newlines after <tag> and </tag> |
|---|
| 1034 | | if(in_array($tag,array('div','center','table','tr','ul','ol','dl','pre'))){ |
|---|
| 1035 | | $this->o("\n"); |
|---|
| 1036 | | } |
|---|
| 1037 | | return true; |
|---|
| 1038 | | } |
|---|
| 1039 | | /** |
|---|
| 1040 | | * outputs a cell widened to the proper width |
|---|
| 1041 | | * |
|---|
| 1042 | | * @param array &$row |
|---|
| 1043 | | * @return void |
|---|
| 1044 | | */ |
|---|
| 1045 | | function fill_td(&$row){ |
|---|
| 1046 | | $len = 0; |
|---|
| 1047 | | foreach($row as $col => $cont){ |
|---|
| 1048 | | $width = $this->max_len[$col]; |
|---|
| 1049 | | switch($this->align[$col]){ |
|---|
| 1050 | | case 'center': |
|---|
| 1051 | | $width += 2; |
|---|
| 1052 | | $row[$col] = str_pad($row[$col],$width,' ',STR_PAD_BOTH); |
|---|
| 1053 | | break; |
|---|
| 1054 | | case 'left': |
|---|
| 1055 | | $width++; |
|---|
| 1056 | | default: |
|---|
| 1057 | | $row[$col] = str_pad($row[$col],$width,' '); |
|---|
| 1058 | | break; |
|---|
| 1059 | | case 'right': |
|---|
| 1060 | | $width++; |
|---|
| 1061 | | $row[$col] = str_pad($row[$col],$width,' ',STR_PAD_LEFT); |
|---|
| 1062 | | break; |
|---|
| 1063 | | } |
|---|
| 1064 | | } |
|---|
| 1065 | | } |
|---|
| 1066 | | /** |
|---|
| 1067 | | * some sort of <br /> |
|---|
| 1068 | | * |
|---|
| 1069 | | * @param void |
|---|
| 1070 | | * @return void |
|---|
| 1071 | | */ |
|---|
| 1072 | | function pbr() { |
|---|
| 1073 | | if ($this->p_p == 0) { |
|---|
| 1074 | | $this->p_p = 1; |
|---|
| 1075 | | } |
|---|
| 1076 | | } |
|---|
| 1077 | | /** |
|---|
| 1078 | | * text <p> (e.g. newlines after output) |
|---|
| 1079 | | * |
|---|
| 1080 | | * @param void |
|---|
| 1081 | | * @return void |
|---|
| 1082 | | */ |
|---|
| 1083 | | function p() { |
|---|
| 1084 | | if($this->parent('table')){ |
|---|
| 1085 | | return; |
|---|
| 1086 | | } |
|---|
| 1087 | | $this->p_p = 2; |
|---|
| 1088 | | } |
|---|
| 1089 | | /** |
|---|
| 1090 | | * add $indent before each line |
|---|
| 1091 | | * |
|---|
| 1092 | | * @param string $indent |
|---|
| 1093 | | * @param bool $start wether it's an opening tag or a closing one |
|---|
| 1094 | | * @param bool $output shall $indent be outputted? (only if $start is true) |
|---|
| 1095 | | * @return void |
|---|
| 1096 | | */ |
|---|
| 1097 | | function indent($indent,$start,$output=false){ |
|---|
| 1098 | | if($start){ |
|---|
| 1099 | | if($output){ |
|---|
| 1100 | | $this->o($indent); |
|---|
| 1101 | | } |
|---|
| 1102 | | $this->indent .= $indent; |
|---|
| 1103 | | } else { |
|---|
| 1104 | | $len = strlen($indent); |
|---|
| 1105 | | if($len >= strlen($this->indent)){ |
|---|
| 1106 | | $this->indent = ''; |
|---|
| 1107 | | } else { |
|---|
| 1108 | | $this->indent = substr($this->indent,0,-$len); |
|---|
| 1109 | | } |
|---|
| 1110 | | } |
|---|
| 1111 | | } |
|---|
| 1112 | | /** |
|---|
| 1113 | | * checks if a parent element exists |
|---|
| 1114 | | * use $type to check for a parsed parent element or a kept element |
|---|
| 1115 | | * @param string $parent name of the parent tag |
|---|
| 1116 | | * @param string $type either 'parsed' or 'kept' or 'both' |
|---|
| 1117 | | * @return bool |
|---|
| 1118 | | */ |
|---|
| 1119 | | function parent($parent,$type = 'parsed'){ |
|---|
| 1120 | | if(!isset($this->parents[$parent])){ |
|---|
| 1121 | | return false; |
|---|
| 1122 | | } |
|---|
| 1123 | | if($type != 'both'){ |
|---|
| 1124 | | $type = $type == 'parsed' ? '1' : '2'; |
|---|
| 1125 | | return substr($this->parents[$parent],-1) === $type; |
|---|
| 1126 | | } else { |
|---|
| 1127 | | return !empty($this->parents[$parent]); |
|---|
| 1128 | | } |
|---|
| 1129 | | } |
|---|
| 1130 | | /** |
|---|
| 1131 | | * start buffer |
|---|
| 1132 | | * |
|---|
| 1133 | | * @param void |
|---|
| 1134 | | * @return void |
|---|
| 1135 | | */ |
|---|
| 1136 | | function buffer(){ |
|---|
| 1137 | | if($this->p_p){ |
|---|
| 1138 | | $this->out(str_repeat("\n".$this->indent, $this->p_p)); |
|---|
| 1139 | | $this->p_p = 0; |
|---|
| 1140 | | } |
|---|
| 1141 | | $this->buffer_lvl++; |
|---|
| 1142 | | $this->buffer[$this->buffer_lvl] = ''; |
|---|
| 1143 | | } |
|---|
| 1144 | | /** |
|---|
| 1145 | | * end buffer and return buffered output |
|---|
| 1146 | | * |
|---|
| 1147 | | * @param void |
|---|
| 1148 | | * @return string |
|---|
| 1149 | | */ |
|---|
| 1150 | | function unbuffer(){ |
|---|
| 1151 | | $out = $this->buffer[$this->buffer_lvl]; |
|---|
| 1152 | | unset($this->buffer[$this->buffer_lvl]); |
|---|
| 1153 | | $this->buffer_lvl--; |
|---|
| 1154 | | return $out; |
|---|
| 1155 | | } |
|---|
| 1156 | | /** |
|---|
| 1157 | | * decode email |
|---|
| 1158 | | * |
|---|
| 1159 | | * @author derernst@gmx.ch <http://www.php.net/manual/en/function.html-entity-decode.php#68536> |
|---|
| 1160 | | */ |
|---|
| 1161 | | function decode($text,$quote_style = ENT_NOQUOTES){ |
|---|
| 1162 | | if (function_exists('html_entity_decode')) { |
|---|
| 1163 | | $text = html_entity_decode($text, $quote_style, 'ISO-8859-1'); // NOTE: UTF-8 does not work! |
|---|
| 1164 | | } |
|---|
| 1165 | | else { |
|---|
| 1166 | | $trans_tbl = get_html_translation_table(HTML_ENTITIES, $quote_style); |
|---|
| 1167 | | $trans_tbl = array_flip($trans_tbl); |
|---|
| 1168 | | $text = strtr($text, $trans_tbl); |
|---|
| 1169 | | } |
|---|
| 1170 | | $text = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $text); |
|---|
| 1171 | | $text = preg_replace('~&#([0-9]+);~e', 'chr("\\1")', $text); |
|---|
| 1172 | | return $text; |
|---|
| 1173 | | } |
|---|
| | 994 | # start tag |
|---|
| | 995 | if($start){ |
|---|
| | 996 | # if there is a attr which cannot be handled by markdown |
|---|
| | 997 | # this tag will be kept. |
|---|
| | 998 | if(isset($this->has_attrs[$tag])){ |
|---|
| | 999 | $known_attrs = $this->has_attrs[$tag]; |
|---|
| | 1000 | } else { |
|---|
| | 1001 | $known_attrs = array(); |
|---|
| | 1002 | } |
|---|
| | 1003 | if(!$force && count($known_attrs) >= count($attrs)){ |
|---|
| | 1004 | if(empty($attrs) || count(array_diff(array_keys($attrs),$known_attrs)) == 0){ |
|---|
| | 1005 | # tag can be handled by markdown! |
|---|
| | 1006 | return false; |
|---|
| | 1007 | } |
|---|
| | 1008 | } |
|---|
| | 1009 | $attr = ''; |
|---|
| | 1010 | if(!empty($attrs)){ |
|---|
| | 1011 | foreach($attrs as $key => $value){ |
|---|
| | 1012 | if($key == 'forcehtml'){ |
|---|
| | 1013 | continue; |
|---|
| | 1014 | } |
|---|
| | 1015 | $attr.=' '.$key.'="'.$value.'"'; |
|---|
| | 1016 | } |
|---|
| | 1017 | } |
|---|
| | 1018 | if(!$force && in_array($tag,array('div','center','li','dt','dd'))){ |
|---|
| | 1019 | $attr.= ' markdown="1"'; |
|---|
| | 1020 | } elseif(!$this->force_html) { |
|---|
| | 1021 | $this->force_html($tag); |
|---|
| | 1022 | } |
|---|
| | 1023 | $this->o('<'.$tag.$attr.'>',true); |
|---|
| | 1024 | # add to list of parents: |
|---|
| | 1025 | if(isset($this->parents[$tag])){ |
|---|
| | 1026 | $this->parents[$tag] .= '2'; |
|---|
| | 1027 | } else { |
|---|
| | 1028 | $this->parents[$tag] = '2'; |
|---|
| | 1029 | } |
|---|
| | 1030 | # close tag |
|---|
| | 1031 | } else { |
|---|
| | 1032 | if(!$force && !$this->parent($tag,'kept')){ |
|---|
| | 1033 | # the start tag of this element was not parsed |
|---|
| | 1034 | return false; |
|---|
| | 1035 | } |
|---|
| | 1036 | $this->o('</'.$tag.'>'); |
|---|
| | 1037 | $this->parents[$tag] = substr($this->parents[$tag],0,-1); |
|---|
| | 1038 | # newlines after </tag> |
|---|
| | 1039 | if(in_array($tag,array('th','td','dt','dd','li','p'))){ |
|---|
| | 1040 | $this->o("\n"); |
|---|
| | 1041 | } |
|---|
| | 1042 | } |
|---|
| | 1043 | # newlines after <tag> and </tag> |
|---|
| | 1044 | if(in_array($tag,array('div','center','table','tr','ul','ol','dl','pre'))){ |
|---|
| | 1045 | $this->o("\n"); |
|---|
| | 1046 | } |
|---|
| | 1047 | return true; |
|---|
| | 1048 | } |
|---|
| | 1049 | /** |
|---|
| | 1050 | * outputs a cell widened to the proper width |
|---|
| | 1051 | * |
|---|
| | 1052 | * @param array &$row |
|---|
| | 1053 | * @return void |
|---|
| | 1054 | */ |
|---|
| | 1055 | function fill_td(&$row){ |
|---|
| | 1056 | $len = 0; |
|---|
| | 1057 | foreach($row as $col => $cont){ |
|---|
| | 1058 | $width = $this->max_len[$col]; |
|---|
| | 1059 | switch($this->align[$col]){ |
|---|
| | 1060 | case 'center': |
|---|
| | 1061 | $width += 2; |
|---|
| | 1062 | $row[$col] = str_pad($row[$col],$width,' ',STR_PAD_BOTH); |
|---|
| | 1063 | break; |
|---|
| | 1064 | case 'left': |
|---|
| | 1065 | $width++; |
|---|
| | 1066 | default: |
|---|
| | 1067 | $row[$col] = str_pad($row[$col],$width,' '); |
|---|
| | 1068 | break; |
|---|
| | 1069 | case 'right': |
|---|
| | 1070 | $width++; |
|---|
| | 1071 | $row[$col] = str_pad($row[$col],$width,' ',STR_PAD_LEFT); |
|---|
| | 1072 | break; |
|---|
| | 1073 | } |
|---|
| | 1074 | } |
|---|
| | 1075 | } |
|---|
| | 1076 | /** |
|---|
| | 1077 | * some sort of <br /> |
|---|
| | 1078 | * |
|---|
| | 1079 | * @param void |
|---|
| | 1080 | * @return void |
|---|
| | 1081 | */ |
|---|
| | 1082 | function pbr() { |
|---|
| | 1083 | if ($this->p_p == 0) { |
|---|
| | 1084 | $this->p_p = 1; |
|---|
| | 1085 | } |
|---|
| | 1086 | } |
|---|
| | 1087 | /** |
|---|
| | 1088 | * text <p> (e.g. newlines after output) |
|---|
| | 1089 | * |
|---|
| | 1090 | * @param void |
|---|
| | 1091 | * @return void |
|---|
| | 1092 | */ |
|---|
| | 1093 | function p() { |
|---|
| | 1094 | if($this->parent('table')){ |
|---|
| | 1095 | return; |
|---|
| | 1096 | } |
|---|
| | 1097 | $this->p_p = 2; |
|---|
| | 1098 | } |
|---|
| | 1099 | /** |
|---|
| | 1100 | * add $indent before each line |
|---|
| | 1101 | * |
|---|
| | 1102 | * @param string $indent |
|---|
| | 1103 | * @param bool $start wether it's an opening tag or a closing one |
|---|
| | 1104 | * @param bool $output shall $indent be outputted? (only if $start is true) |
|---|
| | 1105 | * @return void |
|---|
| | 1106 | */ |
|---|
| | 1107 | function indent($indent,$start,$output=false){ |
|---|
| | 1108 | if($start){ |
|---|
| | 1109 | if($output){ |
|---|
| | 1110 | $this->o($indent); |
|---|
| | 1111 | } |
|---|
| | 1112 | $this->indent .= $indent; |
|---|
| | 1113 | } else { |
|---|
| | 1114 | $len = strlen($indent); |
|---|
| | 1115 | if($len >= strlen($this->indent)){ |
|---|
| | 1116 | $this->indent = ''; |
|---|
| | 1117 | } else { |
|---|
| | 1118 | $this->indent = substr($this->indent,0,-$len); |
|---|
| | 1119 | } |
|---|
| | 1120 | } |
|---|
| | 1121 | } |
|---|
| | 1122 | /** |
|---|
| | 1123 | * checks if a parent element exists |
|---|
| | 1124 | * use $type to check for a parsed parent element or a kept element |
|---|
| | 1125 | * @param string $parent name of the parent tag |
|---|
| | 1126 | * @param string $type either 'parsed' or 'kept' or 'both' |
|---|
| | 1127 | * @return bool |
|---|
| | 1128 | */ |
|---|
| | 1129 | function parent($parent,$type = 'parsed'){ |
|---|
| | 1130 | if(!isset($this->parents[$parent])){ |
|---|
| | 1131 | return false; |
|---|
| | 1132 | } |
|---|
| | 1133 | if($type != 'both'){ |
|---|
| | 1134 | $type = $type == 'parsed' ? '1' : '2'; |
|---|
| | 1135 | return substr($this->parents[$parent],-1) === $type; |
|---|
| | 1136 | } else { |
|---|
| | 1137 | return !empty($this->parents[$parent]); |
|---|
| | 1138 | } |
|---|
| | 1139 | } |
|---|
| | 1140 | /** |
|---|
| | 1141 | * start buffer |
|---|
| | 1142 | * |
|---|
| | 1143 | * @param void |
|---|
| | 1144 | * @return void |
|---|
| | 1145 | */ |
|---|
| | 1146 | function buffer(){ |
|---|
| | 1147 | if($this->p_p){ |
|---|
| | 1148 | $this->out(str_repeat("\n".$this->indent, $this->p_p)); |
|---|
| | 1149 | $this->p_p = 0; |
|---|
| | 1150 | } |
|---|
| | 1151 | $this->buffer_lvl++; |
|---|
| | 1152 | $this->buffer[$this->buffer_lvl] = ''; |
|---|
| | 1153 | } |
|---|
| | 1154 | /** |
|---|
| | 1155 | * end buffer and return buffered output |
|---|
| | 1156 | * |
|---|
| | 1157 | * @param void |
|---|
| | 1158 | * @return string |
|---|
| | 1159 | */ |
|---|
| | 1160 | function unbuffer(){ |
|---|
| | 1161 | $out = $this->buffer[$this->buffer_lvl]; |
|---|
| | 1162 | unset($this->buffer[$this->buffer_lvl]); |
|---|
| | 1163 | $this->buffer_lvl--; |
|---|
| | 1164 | return $out; |
|---|
| | 1165 | } |
|---|
| | 1166 | /** |
|---|
| | 1167 | * decode email |
|---|
| | 1168 | * |
|---|
| | 1169 | * @author derernst@gmx.ch <http://www.php.net/manual/en/function.html-entity-decode.php#68536> |
|---|
| | 1170 | */ |
|---|
| | 1171 | function decode($text,$quote_style = ENT_NOQUOTES){ |
|---|
| | 1172 | if (function_exists('html_entity_decode')) { |
|---|
| | 1173 | $text = html_entity_decode($text, $quote_style, 'ISO-8859-1'); // NOTE: UTF-8 does not work! |
|---|
| | 1174 | } |
|---|
| | 1175 | else { |
|---|
| | 1176 | $trans_tbl = get_html_translation_table(HTML_ENTITIES, $quote_style); |
|---|
| | 1177 | $trans_tbl = array_flip($trans_tbl); |
|---|
| | 1178 | $text = strtr($text, $trans_tbl); |
|---|
| | 1179 | } |
|---|
| | 1180 | $text = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $text); |
|---|
| | 1181 | $text = preg_replace('~&#([0-9]+);~e', 'chr("\\1")', $text); |
|---|
| | 1182 | return $text; |
|---|
| | 1183 | } |
|---|