You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rcube_text2html.php 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. <?php
  2. /**
  3. +-----------------------------------------------------------------------+
  4. | This file is part of the Roundcube Webmail client |
  5. | Copyright (C) 2008-2014, The Roundcube Dev Team |
  6. | |
  7. | Licensed under the GNU General Public License version 3 or |
  8. | any later version with exceptions for skins & plugins. |
  9. | See the README file for a full license statement. |
  10. | |
  11. | PURPOSE: |
  12. | Converts plain text to HTML |
  13. +-----------------------------------------------------------------------+
  14. | Author: Aleksander Machniak <alec@alec.pl> |
  15. +-----------------------------------------------------------------------+
  16. */
  17. /**
  18. * Converts plain text to HTML
  19. *
  20. * @package Framework
  21. * @subpackage Utils
  22. */
  23. class rcube_text2html
  24. {
  25. /**
  26. * Contains the HTML content after conversion.
  27. *
  28. * @var string $html
  29. */
  30. protected $html;
  31. /**
  32. * Contains the plain text.
  33. *
  34. * @var string $text
  35. */
  36. protected $text;
  37. /**
  38. * Configuration
  39. *
  40. * @var array $config
  41. */
  42. protected $config = array(
  43. // non-breaking space
  44. 'space' => "\xC2\xA0",
  45. // enables format=flowed parser
  46. 'flowed' => false,
  47. // enables delsp=yes parser
  48. 'delsp' => false,
  49. // enables wrapping for non-flowed text
  50. 'wrap' => true,
  51. // line-break tag
  52. 'break' => "<br>\n",
  53. // prefix and suffix (wrapper element)
  54. 'begin' => '<div class="pre">',
  55. 'end' => '</div>',
  56. // enables links replacement
  57. 'links' => true,
  58. // string replacer class
  59. 'replacer' => 'rcube_string_replacer',
  60. // prefix and suffix of unwrappable line
  61. 'nobr_start' => '<span style="white-space:nowrap">',
  62. 'nobr_end' => '</span>',
  63. );
  64. /**
  65. * Constructor.
  66. *
  67. * If the plain text source string (or file) is supplied, the class
  68. * will instantiate with that source propagated, all that has
  69. * to be done it to call get_html().
  70. *
  71. * @param string $source Plain text
  72. * @param boolean $from_file Indicates $source is a file to pull content from
  73. * @param array $config Class configuration
  74. */
  75. function __construct($source = '', $from_file = false, $config = array())
  76. {
  77. if (!empty($source)) {
  78. $this->set_text($source, $from_file);
  79. }
  80. if (!empty($config) && is_array($config)) {
  81. $this->config = array_merge($this->config, $config);
  82. }
  83. }
  84. /**
  85. * Loads source text into memory, either from $source string or a file.
  86. *
  87. * @param string $source Plain text
  88. * @param boolean $from_file Indicates $source is a file to pull content from
  89. */
  90. function set_text($source, $from_file = false)
  91. {
  92. if ($from_file && file_exists($source)) {
  93. $this->text = file_get_contents($source);
  94. }
  95. else {
  96. $this->text = $source;
  97. }
  98. $this->_converted = false;
  99. }
  100. /**
  101. * Returns the HTML content.
  102. *
  103. * @return string HTML content
  104. */
  105. function get_html()
  106. {
  107. if (!$this->_converted) {
  108. $this->_convert();
  109. }
  110. return $this->html;
  111. }
  112. /**
  113. * Prints the HTML.
  114. */
  115. function print_html()
  116. {
  117. print $this->get_html();
  118. }
  119. /**
  120. * Workhorse function that does actual conversion (calls _converter() method).
  121. */
  122. protected function _convert()
  123. {
  124. // Convert TXT to HTML
  125. $this->html = $this->_converter($this->text);
  126. $this->_converted = true;
  127. }
  128. /**
  129. * Workhorse function that does actual conversion.
  130. *
  131. * @param string Plain text
  132. */
  133. protected function _converter($text)
  134. {
  135. // make links and email-addresses clickable
  136. $attribs = array('link_attribs' => array('rel' => 'noreferrer', 'target' => '_blank'));
  137. $replacer = new $this->config['replacer']($attribs);
  138. if ($this->config['flowed']) {
  139. $flowed_char = 0x01;
  140. $delsp = $this->config['delsp'];
  141. $text = rcube_mime::unfold_flowed($text, chr($flowed_char), $delsp);
  142. }
  143. // search for patterns like links and e-mail addresses and replace with tokens
  144. if ($this->config['links']) {
  145. $text = $replacer->replace($text);
  146. }
  147. // split body into single lines
  148. $text = preg_split('/\r?\n/', $text);
  149. $quote_level = 0;
  150. $last = null;
  151. // wrap quoted lines with <blockquote>
  152. for ($n = 0, $cnt = count($text); $n < $cnt; $n++) {
  153. $flowed = false;
  154. if ($this->config['flowed'] && ord($text[$n][0]) == $flowed_char) {
  155. $flowed = true;
  156. $text[$n] = substr($text[$n], 1);
  157. }
  158. if ($text[$n][0] == '>' && preg_match('/^(>+ {0,1})+/', $text[$n], $regs)) {
  159. $q = substr_count($regs[0], '>');
  160. $text[$n] = substr($text[$n], strlen($regs[0]));
  161. $text[$n] = $this->_convert_line($text[$n], $flowed || $this->config['wrap']);
  162. $_length = strlen(str_replace(' ', '', $text[$n]));
  163. if ($q > $quote_level) {
  164. if ($last !== null) {
  165. $text[$last] .= (!$length ? "\n" : '')
  166. . $replacer->get_replacement($replacer->add(
  167. str_repeat('<blockquote>', $q - $quote_level)))
  168. . $text[$n];
  169. unset($text[$n]);
  170. }
  171. else {
  172. $text[$n] = $replacer->get_replacement($replacer->add(
  173. str_repeat('<blockquote>', $q - $quote_level))) . $text[$n];
  174. $last = $n;
  175. }
  176. }
  177. else if ($q < $quote_level) {
  178. $text[$last] .= (!$length ? "\n" : '')
  179. . $replacer->get_replacement($replacer->add(
  180. str_repeat('</blockquote>', $quote_level - $q)))
  181. . $text[$n];
  182. unset($text[$n]);
  183. }
  184. else {
  185. $last = $n;
  186. }
  187. }
  188. else {
  189. $text[$n] = $this->_convert_line($text[$n], $flowed || $this->config['wrap']);
  190. $q = 0;
  191. $_length = strlen(str_replace(' ', '', $text[$n]));
  192. if ($quote_level > 0) {
  193. $text[$last] .= (!$length ? "\n" : '')
  194. . $replacer->get_replacement($replacer->add(
  195. str_repeat('</blockquote>', $quote_level)))
  196. . $text[$n];
  197. unset($text[$n]);
  198. }
  199. else {
  200. $last = $n;
  201. }
  202. }
  203. $quote_level = $q;
  204. $length = $_length;
  205. }
  206. if ($quote_level > 0) {
  207. $text[$last] .= $replacer->get_replacement($replacer->add(
  208. str_repeat('</blockquote>', $quote_level)));
  209. }
  210. $text = join("\n", $text);
  211. // colorize signature (up to <sig_max_lines> lines)
  212. $len = strlen($text);
  213. $sig_sep = "--" . $this->config['space'] . "\n";
  214. $sig_max_lines = rcube::get_instance()->config->get('sig_max_lines', 15);
  215. while (($sp = strrpos($text, $sig_sep, $sp ? -$len+$sp-1 : 0)) !== false) {
  216. if ($sp == 0 || $text[$sp-1] == "\n") {
  217. // do not touch blocks with more that X lines
  218. if (substr_count($text, "\n", $sp) < $sig_max_lines) {
  219. $text = substr($text, 0, max(0, $sp))
  220. .'<span class="sig">'.substr($text, $sp).'</span>';
  221. }
  222. break;
  223. }
  224. }
  225. // insert url/mailto links and citation tags
  226. $text = $replacer->resolve($text);
  227. // replace line breaks
  228. $text = str_replace("\n", $this->config['break'], $text);
  229. return $this->config['begin'] . $text . $this->config['end'];
  230. }
  231. /**
  232. * Converts spaces in line of text
  233. */
  234. protected function _convert_line($text, $is_flowed)
  235. {
  236. static $table;
  237. if (empty($table)) {
  238. $table = get_html_translation_table(HTML_SPECIALCHARS);
  239. unset($table['?']);
  240. // replace some whitespace characters
  241. $table["\r"] = '';
  242. $table["\t"] = ' ';
  243. }
  244. // skip signature separator
  245. if ($text == '-- ') {
  246. return '--' . $this->config['space'];
  247. }
  248. // replace HTML special and whitespace characters
  249. $text = strtr($text, $table);
  250. $nbsp = $this->config['space'];
  251. // replace spaces with non-breaking spaces
  252. if ($is_flowed) {
  253. $pos = 0;
  254. $diff = 0;
  255. $len = strlen($nbsp);
  256. $copy = $text;
  257. while (($pos = strpos($text, ' ', $pos)) !== false) {
  258. if ($pos == 0 || $text[$pos-1] == ' ') {
  259. $copy = substr_replace($copy, $nbsp, $pos + $diff, 1);
  260. $diff += $len - 1;
  261. }
  262. $pos++;
  263. }
  264. $text = $copy;
  265. }
  266. // make the whole line non-breakable if needed
  267. else if ($text !== '' && preg_match('/[^a-zA-Z0-9_]/', $text)) {
  268. // use non-breakable spaces to correctly display
  269. // trailing/leading spaces and multi-space inside
  270. $text = str_replace(' ', $nbsp, $text);
  271. // wrap in nobr element, so it's not wrapped on e.g. - or /
  272. $text = $this->config['nobr_start'] . $text . $this->config['nobr_end'];
  273. }
  274. return $text;
  275. }
  276. }