You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rcube_mime_decode.php 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. <?php
  2. /**
  3. +-----------------------------------------------------------------------+
  4. | This file is part of the Roundcube Webmail client |
  5. | Copyright (C) 2005-2015, The Roundcube Dev Team |
  6. | Copyright (C) 2011-2015, Kolab Systems AG |
  7. | |
  8. | Licensed under the GNU General Public License version 3 or |
  9. | any later version with exceptions for skins & plugins. |
  10. | See the README file for a full license statement. |
  11. | |
  12. | PURPOSE: |
  13. | MIME message parsing utilities derived from Mail_mimeDecode |
  14. +-----------------------------------------------------------------------+
  15. | Author: Thomas Bruederli <roundcube@gmail.com> |
  16. | Author: Aleksander Machniak <alec@alec.pl> |
  17. | Author: Richard Heyes <richard@phpguru.org> |
  18. +-----------------------------------------------------------------------+
  19. */
  20. /**
  21. * Class for parsing MIME messages
  22. *
  23. * @package Framework
  24. * @subpackage Storage
  25. * @author Aleksander Machniak <alec@alec.pl>
  26. */
  27. class rcube_mime_decode
  28. {
  29. /**
  30. * Class configuration parameters.
  31. *
  32. * @var array
  33. */
  34. protected $params = array(
  35. 'include_bodies' => true,
  36. 'decode_bodies' => true,
  37. 'decode_headers' => true,
  38. 'crlf' => "\r\n",
  39. 'default_charset' => RCUBE_CHARSET,
  40. );
  41. /**
  42. * Constructor.
  43. *
  44. * Sets up the object, initialise the variables, and splits and
  45. * stores the header and body of the input.
  46. *
  47. * @param array $params An array of various parameters that determine
  48. * various things:
  49. * include_bodies - Whether to include the body in the returned
  50. * object.
  51. * decode_bodies - Whether to decode the bodies
  52. * of the parts. (Transfer encoding)
  53. * decode_headers - Whether to decode headers
  54. * crlf - CRLF type to use (CRLF/LF/CR)
  55. */
  56. public function __construct($params = array())
  57. {
  58. if (!empty($params)) {
  59. $this->params = array_merge($this->params, (array) $params);
  60. }
  61. }
  62. /**
  63. * Performs the decoding process.
  64. *
  65. * @param string $input The input to decode
  66. * @param bool $convert Convert result to rcube_message_part structure
  67. *
  68. * @return object|bool Decoded results or False on failure
  69. */
  70. public function decode($input, $convert = true)
  71. {
  72. list($header, $body) = $this->splitBodyHeader($input);
  73. $struct = $this->do_decode($header, $body);
  74. if ($struct && $convert) {
  75. $struct = $this->structure_part($struct);
  76. }
  77. return $struct;
  78. }
  79. /**
  80. * Performs the decoding. Decodes the body string passed to it
  81. * If it finds certain content-types it will call itself in a
  82. * recursive fashion
  83. *
  84. * @param string $headers Header section
  85. * @param string $body Body section
  86. * @param string $default_ctype Default content type
  87. *
  88. * @return object|bool Decoded results or False on error
  89. */
  90. protected function do_decode($headers, $body, $default_ctype = 'text/plain')
  91. {
  92. $return = new stdClass;
  93. $headers = $this->parseHeaders($headers);
  94. while (list($key, $value) = each($headers)) {
  95. $header_name = strtolower($value['name']);
  96. if (isset($return->headers[$header_name]) && !is_array($return->headers[$header_name])) {
  97. $return->headers[$header_name] = array($return->headers[$header_name]);
  98. $return->headers[$header_name][] = $value['value'];
  99. }
  100. else if (isset($return->headers[$header_name])) {
  101. $return->headers[$header_name][] = $value['value'];
  102. }
  103. else {
  104. $return->headers[$header_name] = $value['value'];
  105. }
  106. switch ($header_name) {
  107. case 'content-type':
  108. $content_type = $this->parseHeaderValue($value['value']);
  109. if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) {
  110. $return->ctype_primary = $regs[1];
  111. $return->ctype_secondary = $regs[2];
  112. }
  113. if (isset($content_type['other'])) {
  114. while (list($p_name, $p_value) = each($content_type['other'])) {
  115. $return->ctype_parameters[$p_name] = $p_value;
  116. }
  117. }
  118. break;
  119. case 'content-disposition';
  120. $content_disposition = $this->parseHeaderValue($value['value']);
  121. $return->disposition = $content_disposition['value'];
  122. if (isset($content_disposition['other'])) {
  123. while (list($p_name, $p_value) = each($content_disposition['other'])) {
  124. $return->d_parameters[$p_name] = $p_value;
  125. }
  126. }
  127. break;
  128. case 'content-transfer-encoding':
  129. $content_transfer_encoding = $this->parseHeaderValue($value['value']);
  130. break;
  131. }
  132. }
  133. if (isset($content_type)) {
  134. $ctype = strtolower($content_type['value']);
  135. switch ($ctype) {
  136. case 'text/plain':
  137. $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
  138. if ($this->params['include_bodies']) {
  139. $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body;
  140. }
  141. break;
  142. case 'text/html':
  143. $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
  144. if ($this->params['include_bodies']) {
  145. $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body;
  146. }
  147. break;
  148. case 'multipart/digest':
  149. case 'multipart/alternative':
  150. case 'multipart/related':
  151. case 'multipart/mixed':
  152. case 'multipart/signed':
  153. case 'multipart/encrypted':
  154. if (!isset($content_type['other']['boundary'])) {
  155. return false;
  156. }
  157. $default_ctype = $ctype === 'multipart/digest' ? 'message/rfc822' : 'text/plain';
  158. $parts = $this->boundarySplit($body, $content_type['other']['boundary']);
  159. for ($i = 0; $i < count($parts); $i++) {
  160. list($part_header, $part_body) = $this->splitBodyHeader($parts[$i]);
  161. $return->parts[] = $this->do_decode($part_header, $part_body, $default_ctype);
  162. }
  163. break;
  164. case 'message/rfc822':
  165. $obj = new rcube_mime_decode($this->params);
  166. $return->parts[] = $obj->decode($body, false);
  167. unset($obj);
  168. break;
  169. default:
  170. if ($this->params['include_bodies']) {
  171. $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $content_transfer_encoding['value']) : $body;
  172. }
  173. break;
  174. }
  175. }
  176. else {
  177. $ctype = explode('/', $default_ctype);
  178. $return->ctype_primary = $ctype[0];
  179. $return->ctype_secondary = $ctype[1];
  180. if ($this->params['include_bodies']) {
  181. $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body) : $body;
  182. }
  183. }
  184. return $return;
  185. }
  186. /**
  187. * Given a string containing a header and body
  188. * section, this function will split them (at the first
  189. * blank line) and return them.
  190. *
  191. * @param string $input Input to split apart
  192. *
  193. * @return array Contains header and body section
  194. */
  195. protected function splitBodyHeader($input)
  196. {
  197. $pos = strpos($input, $this->params['crlf'] . $this->params['crlf']);
  198. if ($pos === false) {
  199. return false;
  200. }
  201. $crlf_len = strlen($this->params['crlf']);
  202. $header = substr($input, 0, $pos);
  203. $body = substr($input, $pos + 2 * $crlf_len);
  204. if (substr_compare($body, $this->params['crlf'], -$crlf_len) === 0) {
  205. $body = substr($body, 0, -$crlf_len);
  206. }
  207. return array($header, $body);
  208. }
  209. /**
  210. * Parse headers given in $input and return as assoc array.
  211. *
  212. * @param string $input Headers to parse
  213. *
  214. * @return array Contains parsed headers
  215. */
  216. protected function parseHeaders($input)
  217. {
  218. if ($input !== '') {
  219. // Unfold the input
  220. $input = preg_replace('/' . $this->params['crlf'] . "(\t| )/", ' ', $input);
  221. $headers = explode($this->params['crlf'], trim($input));
  222. foreach ($headers as $value) {
  223. $hdr_name = substr($value, 0, $pos = strpos($value, ':'));
  224. $hdr_value = substr($value, $pos+1);
  225. if ($hdr_value[0] == ' ') {
  226. $hdr_value = substr($hdr_value, 1);
  227. }
  228. $return[] = array(
  229. 'name' => $hdr_name,
  230. 'value' => $this->params['decode_headers'] ? $this->decodeHeader($hdr_value) : $hdr_value,
  231. );
  232. }
  233. }
  234. else {
  235. $return = array();
  236. }
  237. return $return;
  238. }
  239. /**
  240. * Function to parse a header value, extract first part, and any secondary
  241. * parts (after ;) This function is not as robust as it could be.
  242. * Eg. header comments in the wrong place will probably break it.
  243. *
  244. * @param string $input Header value to parse
  245. *
  246. * @return array Contains parsed result
  247. */
  248. protected function parseHeaderValue($input)
  249. {
  250. $parts = preg_split('/;\s*/', $input);
  251. if (!empty($parts)) {
  252. $return['value'] = trim($parts[0]);
  253. for ($n = 1; $n < count($parts); $n++) {
  254. if (preg_match_all('/(([[:alnum:]]+)="?([^"]*)"?\s?;?)+/i', $parts[$n], $matches)) {
  255. for ($i = 0; $i < count($matches[2]); $i++) {
  256. $return['other'][strtolower($matches[2][$i])] = $matches[3][$i];
  257. }
  258. }
  259. }
  260. }
  261. else {
  262. $return['value'] = trim($input);
  263. }
  264. return $return;
  265. }
  266. /**
  267. * This function splits the input based on the given boundary
  268. *
  269. * @param string $input Input to parse
  270. * @param string $boundary Boundary
  271. *
  272. * @return array Contains array of resulting mime parts
  273. */
  274. protected function boundarySplit($input, $boundary)
  275. {
  276. $tmp = explode('--' . $boundary, $input);
  277. for ($i = 1; $i < count($tmp)-1; $i++) {
  278. $parts[] = $tmp[$i];
  279. }
  280. return $parts;
  281. }
  282. /**
  283. * Given a header, this function will decode it according to RFC2047.
  284. * Probably not *exactly* conformant, but it does pass all the given
  285. * examples (in RFC2047).
  286. *
  287. * @param string $input Input header value to decode
  288. *
  289. * @return string Decoded header value
  290. */
  291. protected function decodeHeader($input)
  292. {
  293. return rcube_mime::decode_mime_string($input, $this->params['default_charset']);
  294. }
  295. /**
  296. * Recursive method to convert a rcube_mime_decode structure
  297. * into a rcube_message_part object.
  298. *
  299. * @param object $part A message part struct
  300. * @param int $count Part count
  301. * @param string $parent Parent MIME ID
  302. *
  303. * @return object rcube_message_part
  304. * @see self::decode()
  305. */
  306. protected function structure_part($part, $count = 0, $parent = '')
  307. {
  308. $struct = new rcube_message_part;
  309. $struct->mime_id = $part->mime_id ?: (empty($parent) ? (string)$count : "$parent.$count");
  310. $struct->headers = $part->headers;
  311. $struct->mimetype = $part->ctype_primary . '/' . $part->ctype_secondary;
  312. $struct->ctype_primary = $part->ctype_primary;
  313. $struct->ctype_secondary = $part->ctype_secondary;
  314. $struct->ctype_parameters = $part->ctype_parameters;
  315. if ($part->headers['content-transfer-encoding']) {
  316. $struct->encoding = $part->headers['content-transfer-encoding'];
  317. }
  318. if ($part->ctype_parameters['charset']) {
  319. $struct->charset = $part->ctype_parameters['charset'];
  320. }
  321. $part_charset = $struct->charset ?: $this->params['default_charset'];
  322. // determine filename
  323. if (($filename = $part->d_parameters['filename']) || ($filename = $part->ctype_parameters['name'])) {
  324. if (!$this->params['decode_headers']) {
  325. $filename = $this->decodeHeader($filename);
  326. }
  327. $struct->filename = $filename;
  328. }
  329. $struct->body = $part->body;
  330. $struct->size = strlen($part->body);
  331. $struct->disposition = $part->disposition;
  332. $count = 0;
  333. foreach ((array)$part->parts as $child_part) {
  334. $struct->parts[] = $this->structure_part($child_part, ++$count, $struct->mime_id);
  335. }
  336. return $struct;
  337. }
  338. }