You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rcube_text2html.php 9.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. <?php
  2. /**
  3. +-----------------------------------------------------------------------+
  4. | This file is part of the Roundcube Webmail client |
  5. | Copyright (C) 2008-2014, The Roundcube Dev Team |
  6. | |
  7. | Licensed under the GNU General Public License version 3 or |
  8. | any later version with exceptions for skins & plugins. |
  9. | See the README file for a full license statement. |
  10. | |
  11. | PURPOSE: |
  12. | Converts plain text to HTML |
  13. +-----------------------------------------------------------------------+
  14. | Author: Aleksander Machniak <alec@alec.pl> |
  15. +-----------------------------------------------------------------------+
  16. */
  17. /**
  18. * Converts plain text to HTML
  19. *
  20. * @package Framework
  21. * @subpackage Utils
  22. */
  23. class rcube_text2html
  24. {
  25. /**
  26. * Contains the HTML content after conversion.
  27. *
  28. * @var string $html
  29. */
  30. protected $html;
  31. /**
  32. * Contains the plain text.
  33. *
  34. * @var string $text
  35. */
  36. protected $text;
  37. /**
  38. * Configuration
  39. *
  40. * @var array $config
  41. */
  42. protected $config = array(
  43. // non-breaking space
  44. 'space' => "\xC2\xA0",
  45. // enables format=flowed parser
  46. 'flowed' => false,
  47. // enables wrapping for non-flowed text
  48. 'wrap' => true,
  49. // line-break tag
  50. 'break' => "<br>\n",
  51. // prefix and suffix (wrapper element)
  52. 'begin' => '<div class="pre">',
  53. 'end' => '</div>',
  54. // enables links replacement
  55. 'links' => true,
  56. // string replacer class
  57. 'replacer' => 'rcube_string_replacer',
  58. // prefix and suffix of unwrappable line
  59. 'nobr_start' => '<span style="white-space:nowrap">',
  60. 'nobr_end' => '</span>',
  61. );
  62. /**
  63. * Constructor.
  64. *
  65. * If the plain text source string (or file) is supplied, the class
  66. * will instantiate with that source propagated, all that has
  67. * to be done it to call get_html().
  68. *
  69. * @param string $source Plain text
  70. * @param boolean $from_file Indicates $source is a file to pull content from
  71. * @param array $config Class configuration
  72. */
  73. function __construct($source = '', $from_file = false, $config = array())
  74. {
  75. if (!empty($source)) {
  76. $this->set_text($source, $from_file);
  77. }
  78. if (!empty($config) && is_array($config)) {
  79. $this->config = array_merge($this->config, $config);
  80. }
  81. }
  82. /**
  83. * Loads source text into memory, either from $source string or a file.
  84. *
  85. * @param string $source Plain text
  86. * @param boolean $from_file Indicates $source is a file to pull content from
  87. */
  88. function set_text($source, $from_file = false)
  89. {
  90. if ($from_file && file_exists($source)) {
  91. $this->text = file_get_contents($source);
  92. }
  93. else {
  94. $this->text = $source;
  95. }
  96. $this->_converted = false;
  97. }
  98. /**
  99. * Returns the HTML content.
  100. *
  101. * @return string HTML content
  102. */
  103. function get_html()
  104. {
  105. if (!$this->_converted) {
  106. $this->_convert();
  107. }
  108. return $this->html;
  109. }
  110. /**
  111. * Prints the HTML.
  112. */
  113. function print_html()
  114. {
  115. print $this->get_html();
  116. }
  117. /**
  118. * Workhorse function that does actual conversion (calls _converter() method).
  119. */
  120. protected function _convert()
  121. {
  122. // Convert TXT to HTML
  123. $this->html = $this->_converter($this->text);
  124. $this->_converted = true;
  125. }
  126. /**
  127. * Workhorse function that does actual conversion.
  128. *
  129. * @param string Plain text
  130. */
  131. protected function _converter($text)
  132. {
  133. // make links and email-addresses clickable
  134. $attribs = array('link_attribs' => array('rel' => 'noreferrer', 'target' => '_blank'));
  135. $replacer = new $this->config['replacer']($attribs);
  136. if ($this->config['flowed']) {
  137. $flowed_char = 0x01;
  138. $text = rcube_mime::unfold_flowed($text, chr($flowed_char));
  139. }
  140. // search for patterns like links and e-mail addresses and replace with tokens
  141. if ($this->config['links']) {
  142. $text = $replacer->replace($text);
  143. }
  144. // split body into single lines
  145. $text = preg_split('/\r?\n/', $text);
  146. $quote_level = 0;
  147. $last = null;
  148. // wrap quoted lines with <blockquote>
  149. for ($n = 0, $cnt = count($text); $n < $cnt; $n++) {
  150. $flowed = false;
  151. if ($this->config['flowed'] && ord($text[$n][0]) == $flowed_char) {
  152. $flowed = true;
  153. $text[$n] = substr($text[$n], 1);
  154. }
  155. if ($text[$n][0] == '>' && preg_match('/^(>+ {0,1})+/', $text[$n], $regs)) {
  156. $q = substr_count($regs[0], '>');
  157. $text[$n] = substr($text[$n], strlen($regs[0]));
  158. $text[$n] = $this->_convert_line($text[$n], $flowed || $this->config['wrap']);
  159. $_length = strlen(str_replace(' ', '', $text[$n]));
  160. if ($q > $quote_level) {
  161. if ($last !== null) {
  162. $text[$last] .= (!$length ? "\n" : '')
  163. . $replacer->get_replacement($replacer->add(
  164. str_repeat('<blockquote>', $q - $quote_level)))
  165. . $text[$n];
  166. unset($text[$n]);
  167. }
  168. else {
  169. $text[$n] = $replacer->get_replacement($replacer->add(
  170. str_repeat('<blockquote>', $q - $quote_level))) . $text[$n];
  171. $last = $n;
  172. }
  173. }
  174. else if ($q < $quote_level) {
  175. $text[$last] .= (!$length ? "\n" : '')
  176. . $replacer->get_replacement($replacer->add(
  177. str_repeat('</blockquote>', $quote_level - $q)))
  178. . $text[$n];
  179. unset($text[$n]);
  180. }
  181. else {
  182. $last = $n;
  183. }
  184. }
  185. else {
  186. $text[$n] = $this->_convert_line($text[$n], $flowed || $this->config['wrap']);
  187. $q = 0;
  188. $_length = strlen(str_replace(' ', '', $text[$n]));
  189. if ($quote_level > 0) {
  190. $text[$last] .= (!$length ? "\n" : '')
  191. . $replacer->get_replacement($replacer->add(
  192. str_repeat('</blockquote>', $quote_level)))
  193. . $text[$n];
  194. unset($text[$n]);
  195. }
  196. else {
  197. $last = $n;
  198. }
  199. }
  200. $quote_level = $q;
  201. $length = $_length;
  202. }
  203. if ($quote_level > 0) {
  204. $text[$last] .= $replacer->get_replacement($replacer->add(
  205. str_repeat('</blockquote>', $quote_level)));
  206. }
  207. $text = join("\n", $text);
  208. // colorize signature (up to <sig_max_lines> lines)
  209. $len = strlen($text);
  210. $sig_sep = "--" . $this->config['space'] . "\n";
  211. $sig_max_lines = rcube::get_instance()->config->get('sig_max_lines', 15);
  212. while (($sp = strrpos($text, $sig_sep, $sp ? -$len+$sp-1 : 0)) !== false) {
  213. if ($sp == 0 || $text[$sp-1] == "\n") {
  214. // do not touch blocks with more that X lines
  215. if (substr_count($text, "\n", $sp) < $sig_max_lines) {
  216. $text = substr($text, 0, max(0, $sp))
  217. .'<span class="sig">'.substr($text, $sp).'</span>';
  218. }
  219. break;
  220. }
  221. }
  222. // insert url/mailto links and citation tags
  223. $text = $replacer->resolve($text);
  224. // replace line breaks
  225. $text = str_replace("\n", $this->config['break'], $text);
  226. return $this->config['begin'] . $text . $this->config['end'];
  227. }
  228. /**
  229. * Converts spaces in line of text
  230. */
  231. protected function _convert_line($text, $is_flowed)
  232. {
  233. static $table;
  234. if (empty($table)) {
  235. $table = get_html_translation_table(HTML_SPECIALCHARS);
  236. unset($table['?']);
  237. }
  238. // skip signature separator
  239. if ($text == '-- ') {
  240. return '--' . $this->config['space'];
  241. }
  242. // replace HTML special characters
  243. $text = strtr($text, $table);
  244. // replace some whitespace characters
  245. $text = str_replace(array("\r", "\t"), array('', ' '), $text);
  246. $nbsp = $this->config['space'];
  247. // replace spaces with non-breaking spaces
  248. if ($is_flowed) {
  249. $pos = 0;
  250. $diff = 0;
  251. $len = strlen($nbsp);
  252. $copy = $text;
  253. while (($pos = strpos($text, ' ', $pos)) !== false) {
  254. if ($pos == 0 || $text[$pos-1] == ' ') {
  255. $copy = substr_replace($copy, $nbsp, $pos + $diff, 1);
  256. $diff += $len - 1;
  257. }
  258. $pos++;
  259. }
  260. $text = $copy;
  261. }
  262. // make the whole line non-breakable if needed
  263. else if ($text !== '' && preg_match('/[^a-zA-Z0-9_]/', $text)) {
  264. // use non-breakable spaces to correctly display
  265. // trailing/leading spaces and multi-space inside
  266. $text = str_replace(' ', $nbsp, $text);
  267. // wrap in nobr element, so it's not wrapped on e.g. - or /
  268. $text = $this->config['nobr_start'] . $text . $this->config['nobr_end'];
  269. }
  270. return $text;
  271. }
  272. }