You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rcube_spellchecker.php 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. <?php
  2. /**
  3. +-----------------------------------------------------------------------+
  4. | This file is part of the Roundcube Webmail client |
  5. | Copyright (C) 2011-2013, Kolab Systems AG |
  6. | Copyright (C) 2008-2013, The Roundcube Dev Team |
  7. | |
  8. | Licensed under the GNU General Public License version 3 or |
  9. | any later version with exceptions for skins & plugins. |
  10. | See the README file for a full license statement. |
  11. | |
  12. | PURPOSE: |
  13. | Spellchecking using different backends |
  14. +-----------------------------------------------------------------------+
  15. | Author: Aleksander Machniak <machniak@kolabsys.com> |
  16. | Author: Thomas Bruederli <roundcube@gmail.com> |
  17. +-----------------------------------------------------------------------+
  18. */
  19. /**
  20. * Helper class for spellchecking with Googielspell and PSpell support.
  21. *
  22. * @package Framework
  23. * @subpackage Utils
  24. */
  25. class rcube_spellchecker
  26. {
  27. private $matches = array();
  28. private $engine;
  29. private $backend;
  30. private $lang;
  31. private $rc;
  32. private $error;
  33. private $options = array();
  34. private $dict;
  35. private $have_dict;
  36. /**
  37. * Constructor
  38. *
  39. * @param string $lang Language code
  40. */
  41. function __construct($lang = 'en')
  42. {
  43. $this->rc = rcube::get_instance();
  44. $this->engine = $this->rc->config->get('spellcheck_engine', 'googie');
  45. $this->lang = $lang ?: 'en';
  46. $this->options = array(
  47. 'ignore_syms' => $this->rc->config->get('spellcheck_ignore_syms'),
  48. 'ignore_nums' => $this->rc->config->get('spellcheck_ignore_nums'),
  49. 'ignore_caps' => $this->rc->config->get('spellcheck_ignore_caps'),
  50. 'dictionary' => $this->rc->config->get('spellcheck_dictionary'),
  51. );
  52. $cls = 'rcube_spellcheck_' . $this->engine;
  53. if (class_exists($cls)) {
  54. $this->backend = new $cls($this, $this->lang);
  55. $this->backend->options = $this->options;
  56. }
  57. else {
  58. $this->error = "Unknown spellcheck engine '$this->engine'";
  59. }
  60. }
  61. /**
  62. * Return a list of supported languages
  63. */
  64. function languages()
  65. {
  66. // trust configuration
  67. $configured = $this->rc->config->get('spellcheck_languages');
  68. if (!empty($configured) && is_array($configured) && !$configured[0]) {
  69. return $configured;
  70. }
  71. else if (!empty($configured)) {
  72. $langs = (array)$configured;
  73. }
  74. else if ($this->backend) {
  75. $langs = $this->backend->languages();
  76. }
  77. // load index
  78. @include(RCUBE_LOCALIZATION_DIR . 'index.inc');
  79. // add correct labels
  80. $languages = array();
  81. foreach ($langs as $lang) {
  82. $langc = strtolower(substr($lang, 0, 2));
  83. $alias = $rcube_language_aliases[$langc];
  84. if (!$alias) {
  85. $alias = $langc.'_'.strtoupper($langc);
  86. }
  87. if ($rcube_languages[$lang]) {
  88. $languages[$lang] = $rcube_languages[$lang];
  89. }
  90. else if ($rcube_languages[$alias]) {
  91. $languages[$lang] = $rcube_languages[$alias];
  92. }
  93. else {
  94. $languages[$lang] = ucfirst($lang);
  95. }
  96. }
  97. // remove possible duplicates (#1489395)
  98. $languages = array_unique($languages);
  99. asort($languages);
  100. return $languages;
  101. }
  102. /**
  103. * Set content and check spelling
  104. *
  105. * @param string $text Text content for spellchecking
  106. * @param bool $is_html Enables HTML-to-Text conversion
  107. *
  108. * @return bool True when no mispelling found, otherwise false
  109. */
  110. function check($text, $is_html = false)
  111. {
  112. // convert to plain text
  113. if ($is_html) {
  114. $this->content = $this->html2text($text);
  115. }
  116. else {
  117. $this->content = $text;
  118. }
  119. if ($this->backend) {
  120. $this->matches = $this->backend->check($this->content);
  121. }
  122. return $this->found() == 0;
  123. }
  124. /**
  125. * Number of mispellings found (after check)
  126. *
  127. * @return int Number of mispellings
  128. */
  129. function found()
  130. {
  131. return count($this->matches);
  132. }
  133. /**
  134. * Returns suggestions for the specified word
  135. *
  136. * @param string $word The word
  137. *
  138. * @return array Suggestions list
  139. */
  140. function get_suggestions($word)
  141. {
  142. if ($this->backend) {
  143. return $this->backend->get_suggestions($word);
  144. }
  145. return array();
  146. }
  147. /**
  148. * Returns misspelled words
  149. *
  150. * @param string $text The content for spellchecking. If empty content
  151. * used for check() method will be used.
  152. *
  153. * @return array List of misspelled words
  154. */
  155. function get_words($text = null, $is_html=false)
  156. {
  157. if ($is_html) {
  158. $text = $this->html2text($text);
  159. }
  160. if ($this->backend) {
  161. return $this->backend->get_words($text);
  162. }
  163. return array();
  164. }
  165. /**
  166. * Returns checking result in XML (Googiespell) format
  167. *
  168. * @return string XML content
  169. */
  170. function get_xml()
  171. {
  172. // send output
  173. $out = '<?xml version="1.0" encoding="'.RCUBE_CHARSET.'"?><spellresult charschecked="'.mb_strlen($this->content).'">';
  174. foreach ((array)$this->matches as $item) {
  175. $out .= '<c o="'.$item[1].'" l="'.$item[2].'">';
  176. $out .= is_array($item[4]) ? implode("\t", $item[4]) : $item[4];
  177. $out .= '</c>';
  178. }
  179. $out .= '</spellresult>';
  180. return $out;
  181. }
  182. /**
  183. * Returns checking result (misspelled words with suggestions)
  184. *
  185. * @return array Spellchecking result. An array indexed by word.
  186. */
  187. function get()
  188. {
  189. $result = array();
  190. foreach ((array)$this->matches as $item) {
  191. if ($this->engine == 'pspell') {
  192. $word = $item[0];
  193. }
  194. else {
  195. $word = mb_substr($this->content, $item[1], $item[2], RCUBE_CHARSET);
  196. }
  197. if (is_array($item[4])) {
  198. $suggestions = $item[4];
  199. }
  200. else if (empty($item[4])) {
  201. $suggestions = array();
  202. }
  203. else {
  204. $suggestions = explode("\t", $item[4]);
  205. }
  206. $result[$word] = $suggestions;
  207. }
  208. return $result;
  209. }
  210. /**
  211. * Returns error message
  212. *
  213. * @return string Error message
  214. */
  215. function error()
  216. {
  217. return $this->error ?: ($this->backend ? $this->backend->error() : false);
  218. }
  219. private function html2text($text)
  220. {
  221. $h2t = new rcube_html2text($text, false, false, 0);
  222. return $h2t->get_text();
  223. }
  224. /**
  225. * Check if the specified word is an exception accoring to
  226. * spellcheck options.
  227. *
  228. * @param string $word The word
  229. *
  230. * @return bool True if the word is an exception, False otherwise
  231. */
  232. public function is_exception($word)
  233. {
  234. // Contain only symbols (e.g. "+9,0", "2:2")
  235. if (!$word || preg_match('/^[0-9@#$%^&_+~*<>=:;?!,.-]+$/', $word))
  236. return true;
  237. // Contain symbols (e.g. "g@@gle"), all symbols excluding separators
  238. if (!empty($this->options['ignore_syms']) && preg_match('/[@#$%^&_+~*=-]/', $word))
  239. return true;
  240. // Contain numbers (e.g. "g00g13")
  241. if (!empty($this->options['ignore_nums']) && preg_match('/[0-9]/', $word))
  242. return true;
  243. // Blocked caps (e.g. "GOOGLE")
  244. if (!empty($this->options['ignore_caps']) && $word == mb_strtoupper($word))
  245. return true;
  246. // Use exceptions from dictionary
  247. if (!empty($this->options['dictionary'])) {
  248. $this->load_dict();
  249. // @TODO: should dictionary be case-insensitive?
  250. if (!empty($this->dict) && in_array($word, $this->dict))
  251. return true;
  252. }
  253. return false;
  254. }
  255. /**
  256. * Add a word to dictionary
  257. *
  258. * @param string $word The word to add
  259. */
  260. public function add_word($word)
  261. {
  262. $this->load_dict();
  263. foreach (explode(' ', $word) as $word) {
  264. // sanity check
  265. if (strlen($word) < 512) {
  266. $this->dict[] = $word;
  267. $valid = true;
  268. }
  269. }
  270. if ($valid) {
  271. $this->dict = array_unique($this->dict);
  272. $this->update_dict();
  273. }
  274. }
  275. /**
  276. * Remove a word from dictionary
  277. *
  278. * @param string $word The word to remove
  279. */
  280. public function remove_word($word)
  281. {
  282. $this->load_dict();
  283. if (($key = array_search($word, $this->dict)) !== false) {
  284. unset($this->dict[$key]);
  285. $this->update_dict();
  286. }
  287. }
  288. /**
  289. * Update dictionary row in DB
  290. */
  291. private function update_dict()
  292. {
  293. if (strcasecmp($this->options['dictionary'], 'shared') != 0) {
  294. $userid = $this->rc->get_user_id();
  295. }
  296. $plugin = $this->rc->plugins->exec_hook('spell_dictionary_save', array(
  297. 'userid' => $userid, 'language' => $this->lang, 'dictionary' => $this->dict));
  298. if (!empty($plugin['abort'])) {
  299. return;
  300. }
  301. if ($this->have_dict) {
  302. if (!empty($this->dict)) {
  303. $this->rc->db->query(
  304. "UPDATE " . $this->rc->db->table_name('dictionary', true)
  305. ." SET `data` = ?"
  306. ." WHERE `user_id` " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL")
  307. ." AND `language` = ?",
  308. implode(' ', $plugin['dictionary']), $plugin['language']);
  309. }
  310. // don't store empty dict
  311. else {
  312. $this->rc->db->query(
  313. "DELETE FROM " . $this->rc->db->table_name('dictionary', true)
  314. ." WHERE `user_id` " . ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL")
  315. ." AND `language` = ?",
  316. $plugin['language']);
  317. }
  318. }
  319. else if (!empty($this->dict)) {
  320. $this->rc->db->query(
  321. "INSERT INTO " . $this->rc->db->table_name('dictionary', true)
  322. ." (`user_id`, `language`, `data`) VALUES (?, ?, ?)",
  323. $plugin['userid'], $plugin['language'], implode(' ', $plugin['dictionary']));
  324. }
  325. }
  326. /**
  327. * Get dictionary from DB
  328. */
  329. private function load_dict()
  330. {
  331. if (is_array($this->dict)) {
  332. return $this->dict;
  333. }
  334. if (strcasecmp($this->options['dictionary'], 'shared') != 0) {
  335. $userid = $this->rc->get_user_id();
  336. }
  337. $plugin = $this->rc->plugins->exec_hook('spell_dictionary_get', array(
  338. 'userid' => $userid, 'language' => $this->lang, 'dictionary' => array()));
  339. if (empty($plugin['abort'])) {
  340. $dict = array();
  341. $sql_result = $this->rc->db->query(
  342. "SELECT `data` FROM " . $this->rc->db->table_name('dictionary', true)
  343. ." WHERE `user_id` ". ($plugin['userid'] ? "= ".$this->rc->db->quote($plugin['userid']) : "IS NULL")
  344. ." AND `language` = ?",
  345. $plugin['language']);
  346. if ($sql_arr = $this->rc->db->fetch_assoc($sql_result)) {
  347. $this->have_dict = true;
  348. if (!empty($sql_arr['data'])) {
  349. $dict = explode(' ', $sql_arr['data']);
  350. }
  351. }
  352. $plugin['dictionary'] = array_merge((array)$plugin['dictionary'], $dict);
  353. }
  354. if (!empty($plugin['dictionary']) && is_array($plugin['dictionary'])) {
  355. $this->dict = $plugin['dictionary'];
  356. }
  357. else {
  358. $this->dict = array();
  359. }
  360. return $this->dict;
  361. }
  362. }