123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399 |
- <?php
-
- /**
- +-----------------------------------------------------------------------+
- | This file is part of the Roundcube Webmail client |
- | Copyright (C) 2005-2015, The Roundcube Dev Team |
- | Copyright (C) 2011-2015, Kolab Systems AG |
- | |
- | Licensed under the GNU General Public License version 3 or |
- | any later version with exceptions for skins & plugins. |
- | See the README file for a full license statement. |
- | |
- | PURPOSE: |
- | MIME message parsing utilities derived from Mail_mimeDecode |
- +-----------------------------------------------------------------------+
- | Author: Thomas Bruederli <roundcube@gmail.com> |
- | Author: Aleksander Machniak <alec@alec.pl> |
- | Author: Richard Heyes <richard@phpguru.org> |
- +-----------------------------------------------------------------------+
- */
-
- /**
- * Class for parsing MIME messages
- *
- * @package Framework
- * @subpackage Storage
- * @author Aleksander Machniak <alec@alec.pl>
- */
- class rcube_mime_decode
- {
- /**
- * Class configuration parameters.
- *
- * @var array
- */
- protected $params = array(
- 'include_bodies' => true,
- 'decode_bodies' => true,
- 'decode_headers' => true,
- 'crlf' => "\r\n",
- 'default_charset' => RCUBE_CHARSET,
- );
-
-
- /**
- * Constructor.
- *
- * Sets up the object, initialise the variables, and splits and
- * stores the header and body of the input.
- *
- * @param array $params An array of various parameters that determine
- * various things:
- * include_bodies - Whether to include the body in the returned
- * object.
- * decode_bodies - Whether to decode the bodies
- * of the parts. (Transfer encoding)
- * decode_headers - Whether to decode headers
- * crlf - CRLF type to use (CRLF/LF/CR)
- */
- public function __construct($params = array())
- {
- if (!empty($params)) {
- $this->params = array_merge($this->params, (array) $params);
- }
- }
-
- /**
- * Performs the decoding process.
- *
- * @param string $input The input to decode
- * @param bool $convert Convert result to rcube_message_part structure
- *
- * @return object|bool Decoded results or False on failure
- */
- public function decode($input, $convert = true)
- {
- list($header, $body) = $this->splitBodyHeader($input);
-
- $struct = $this->do_decode($header, $body);
-
- if ($struct && $convert) {
- $struct = $this->structure_part($struct);
- }
-
- return $struct;
- }
-
- /**
- * Performs the decoding. Decodes the body string passed to it
- * If it finds certain content-types it will call itself in a
- * recursive fashion
- *
- * @param string $headers Header section
- * @param string $body Body section
- * @param string $default_ctype Default content type
- *
- * @return object|bool Decoded results or False on error
- */
- protected function do_decode($headers, $body, $default_ctype = 'text/plain')
- {
- $return = new stdClass;
- $headers = $this->parseHeaders($headers);
-
- while (list($key, $value) = each($headers)) {
- $header_name = strtolower($value['name']);
-
- if (isset($return->headers[$header_name]) && !is_array($return->headers[$header_name])) {
- $return->headers[$header_name] = array($return->headers[$header_name]);
- $return->headers[$header_name][] = $value['value'];
- }
- else if (isset($return->headers[$header_name])) {
- $return->headers[$header_name][] = $value['value'];
- }
- else {
- $return->headers[$header_name] = $value['value'];
- }
-
- switch ($header_name) {
- case 'content-type':
- $content_type = $this->parseHeaderValue($value['value']);
-
- if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) {
- $return->ctype_primary = $regs[1];
- $return->ctype_secondary = $regs[2];
- }
-
- if (isset($content_type['other'])) {
- while (list($p_name, $p_value) = each($content_type['other'])) {
- $return->ctype_parameters[$p_name] = $p_value;
- }
- }
-
- break;
-
- case 'content-disposition';
- $content_disposition = $this->parseHeaderValue($value['value']);
- $return->disposition = $content_disposition['value'];
-
- if (isset($content_disposition['other'])) {
- while (list($p_name, $p_value) = each($content_disposition['other'])) {
- $return->d_parameters[$p_name] = $p_value;
- }
- }
-
- break;
-
- case 'content-transfer-encoding':
- $content_transfer_encoding = $this->parseHeaderValue($value['value']);
- break;
- }
- }
-
- if (isset($content_type)) {
- $ctype = strtolower($content_type['value']);
-
- switch ($ctype) {
- case 'text/plain':
- $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
-
- if ($this->params['include_bodies']) {
- $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body;
- }
-
- break;
-
- case 'text/html':
- $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
-
- if ($this->params['include_bodies']) {
- $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body;
- }
-
- break;
-
- case 'multipart/digest':
- case 'multipart/alternative':
- case 'multipart/related':
- case 'multipart/mixed':
- case 'multipart/signed':
- case 'multipart/encrypted':
- if (!isset($content_type['other']['boundary'])) {
- return false;
- }
-
- $default_ctype = $ctype === 'multipart/digest' ? 'message/rfc822' : 'text/plain';
- $parts = $this->boundarySplit($body, $content_type['other']['boundary']);
-
- for ($i = 0; $i < count($parts); $i++) {
- list($part_header, $part_body) = $this->splitBodyHeader($parts[$i]);
- $return->parts[] = $this->do_decode($part_header, $part_body, $default_ctype);
- }
-
- break;
-
- case 'message/rfc822':
- $obj = new rcube_mime_decode($this->params);
- $return->parts[] = $obj->decode($body, false);
- unset($obj);
- break;
-
- default:
- if ($this->params['include_bodies']) {
- $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $content_transfer_encoding['value']) : $body;
- }
-
- break;
- }
- }
- else {
- $ctype = explode('/', $default_ctype);
- $return->ctype_primary = $ctype[0];
- $return->ctype_secondary = $ctype[1];
-
- if ($this->params['include_bodies']) {
- $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body) : $body;
- }
- }
-
- return $return;
- }
-
- /**
- * Given a string containing a header and body
- * section, this function will split them (at the first
- * blank line) and return them.
- *
- * @param string $input Input to split apart
- *
- * @return array Contains header and body section
- */
- protected function splitBodyHeader($input)
- {
- $pos = strpos($input, $this->params['crlf'] . $this->params['crlf']);
- if ($pos === false) {
- return false;
- }
-
- $crlf_len = strlen($this->params['crlf']);
- $header = substr($input, 0, $pos);
- $body = substr($input, $pos + 2 * $crlf_len);
-
- if (substr_compare($body, $this->params['crlf'], -$crlf_len) === 0) {
- $body = substr($body, 0, -$crlf_len);
- }
-
- return array($header, $body);
- }
-
- /**
- * Parse headers given in $input and return as assoc array.
- *
- * @param string $input Headers to parse
- *
- * @return array Contains parsed headers
- */
- protected function parseHeaders($input)
- {
- if ($input !== '') {
- // Unfold the input
- $input = preg_replace('/' . $this->params['crlf'] . "(\t| )/", ' ', $input);
- $headers = explode($this->params['crlf'], trim($input));
-
- foreach ($headers as $value) {
- $hdr_name = substr($value, 0, $pos = strpos($value, ':'));
- $hdr_value = substr($value, $pos+1);
-
- if ($hdr_value[0] == ' ') {
- $hdr_value = substr($hdr_value, 1);
- }
-
- $return[] = array(
- 'name' => $hdr_name,
- 'value' => $this->params['decode_headers'] ? $this->decodeHeader($hdr_value) : $hdr_value,
- );
- }
- }
- else {
- $return = array();
- }
-
- return $return;
- }
-
- /**
- * Function to parse a header value, extract first part, and any secondary
- * parts (after ;) This function is not as robust as it could be.
- * Eg. header comments in the wrong place will probably break it.
- *
- * @param string $input Header value to parse
- *
- * @return array Contains parsed result
- */
- protected function parseHeaderValue($input)
- {
- $parts = preg_split('/;\s*/', $input);
-
- if (!empty($parts)) {
- $return['value'] = trim($parts[0]);
-
- for ($n = 1; $n < count($parts); $n++) {
- if (preg_match_all('/(([[:alnum:]]+)="?([^"]*)"?\s?;?)+/i', $parts[$n], $matches)) {
- for ($i = 0; $i < count($matches[2]); $i++) {
- $return['other'][strtolower($matches[2][$i])] = $matches[3][$i];
- }
- }
- }
- }
- else {
- $return['value'] = trim($input);
- }
-
- return $return;
- }
-
- /**
- * This function splits the input based on the given boundary
- *
- * @param string $input Input to parse
- * @param string $boundary Boundary
- *
- * @return array Contains array of resulting mime parts
- */
- protected function boundarySplit($input, $boundary)
- {
- $tmp = explode('--' . $boundary, $input);
-
- for ($i = 1; $i < count($tmp)-1; $i++) {
- $parts[] = $tmp[$i];
- }
-
- return $parts;
- }
-
- /**
- * Given a header, this function will decode it according to RFC2047.
- * Probably not *exactly* conformant, but it does pass all the given
- * examples (in RFC2047).
- *
- * @param string $input Input header value to decode
- *
- * @return string Decoded header value
- */
- protected function decodeHeader($input)
- {
- return rcube_mime::decode_mime_string($input, $this->params['default_charset']);
- }
-
- /**
- * Recursive method to convert a rcube_mime_decode structure
- * into a rcube_message_part object.
- *
- * @param object $part A message part struct
- * @param int $count Part count
- * @param string $parent Parent MIME ID
- *
- * @return object rcube_message_part
- * @see self::decode()
- */
- protected function structure_part($part, $count = 0, $parent = '')
- {
- $struct = new rcube_message_part;
- $struct->mime_id = $part->mime_id ?: (empty($parent) ? (string)$count : "$parent.$count");
- $struct->headers = $part->headers;
- $struct->mimetype = $part->ctype_primary . '/' . $part->ctype_secondary;
- $struct->ctype_primary = $part->ctype_primary;
- $struct->ctype_secondary = $part->ctype_secondary;
- $struct->ctype_parameters = $part->ctype_parameters;
-
- if ($part->headers['content-transfer-encoding']) {
- $struct->encoding = $part->headers['content-transfer-encoding'];
- }
-
- if ($part->ctype_parameters['charset']) {
- $struct->charset = $part->ctype_parameters['charset'];
- }
-
- $part_charset = $struct->charset ?: $this->params['default_charset'];
-
- // determine filename
- if (($filename = $part->d_parameters['filename']) || ($filename = $part->ctype_parameters['name'])) {
- if (!$this->params['decode_headers']) {
- $filename = $this->decodeHeader($filename);
- }
-
- $struct->filename = $filename;
- }
-
- $struct->body = $part->body;
- $struct->size = strlen($part->body);
- $struct->disposition = $part->disposition;
-
- $count = 0;
- foreach ((array)$part->parts as $child_part) {
- $struct->parts[] = $this->structure_part($child_part, ++$count, $struct->mime_id);
- }
-
- return $struct;
- }
- }
|