MDL-11082 Improved groups upgrade performance 1.8x -> 1.9; thanks Eloy for telling...
[moodle-pu.git] / lib / editor / htmlarea / plugins / SpellChecker / spell-check-logic.cgi
blob3b767b464ecfba1c88f77f0a5900201281a6d22e
1 #! /usr/bin/perl -w
3 # Spell Checker Plugin for HTMLArea-3.0
4 # Implementation by Mihai Bazon. Sponsored by www.americanbible.org
6 # htmlArea v3.0 - Copyright (c) 2002 interactivetools.com, inc.
7 # This notice MUST stay intact for use (see license.txt).
9 # A free WYSIWYG editor replacement for <textarea> fields.
10 # For full source code and docs, visit http://www.interactivetools.com/
12 # Version 3.0 developed by Mihai Bazon for InteractiveTools.
13 # http://students.infoiasi.ro/~mishoo
15 # $Id$
17 use strict;
18 use utf8;
19 use Encode;
20 use Text::Aspell;
21 use HTML::Parser;
22 use HTML::Entities;
23 use CGI;
25 my $debug = 0;
27 open (DEBUG, '>:encoding(UTF-8)', '> /tmp/spell-check-debug.log') if $debug;
29 # use Data::Dumper; # for debug only
31 my $speller = new Text::Aspell;
32 my $cgi = new CGI;
34 # FIXME: report a nice error...
35 die "Can't create speller!" unless $speller;
37 # add configurable option for this
38 my $dict = $cgi->param('dictionary') || 'en_US';
39 $speller->set_option('lang', $dict);
41 # ultra, fast, normal, bad-spellers
42 # bad-spellers seems to cause segmentation fault
43 $speller->set_option('sug-mode', 'ultra');
45 my @replacements = ();
47 sub text_handler {
48 my ($offset, $length, $text, $is_cdata) = @_;
49 if ($is_cdata or $text =~ /^\s*$/) {
50 return 0;
52 # print STDERR "*** OFFSET: $offset, LENGTH: $length, $text\n";
53 $text = decode_entities($text);
54 $text =~ s/&#([0-9]+);/chr($1)/eg;
55 $text =~ s/&#x([0-9a-fA-F]+);/chr(hex $1)/eg;
56 my $repl = spellcheck($text);
57 if ($repl) {
58 push(@replacements, [ $offset, $length, $repl ]);
62 my $p = HTML::Parser->new
63 (api_version => 3,
64 handlers => { start => [ sub {
65 my ($self, $tagname, $attrs) = @_;
66 # print STDERR "\033[1;31m parsing tag: $tagname\033[0m\n";
67 # following we skip words that have already been marked as "fixed".
68 if ($tagname eq "span" and $attrs->{class} =~ /HA-spellcheck-fixed/) {
69 $self->handler(text => undef);
71 }, "self, tagname, attr"
73 end => [ sub {
74 my ($self, $tagname) = @_;
75 # print STDERR "\033[1;32m END tag: $tagname\033[0m\n";
76 $self->handler(text => \&text_handler, 'offset, length, dtext, is_cdata');
77 }, "self, tagname"
81 $p->handler(text => \&text_handler, 'offset, length, dtext, is_cdata');
82 $p->case_sensitive(1);
83 my $file_content = $cgi->param('content');
85 if ($debug) {
86 open (FOO, '>:encoding(UTF-8)', '/tmp/spell-check-before');
87 print FOO $file_content, "\n";
88 close(FOO);
91 $p->parse($file_content);
92 $p->eof();
94 foreach (reverse @replacements) {
95 substr($file_content, $_->[0], $_->[1], $_->[2]);
98 # we output UTF-8
99 binmode(STDOUT, ':encoding(UTF-8)'); # apparently, this sucks.
100 print "Content-type: text/html; charset: utf-8\n\n";
101 print qq^
102 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
103 <html>
104 <head>
105 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
106 <link rel="stylesheet" type="text/css" media="all" href="spell-check-style.css" />
107 </head>
108 <body onload="window.parent.finishedSpellChecking();">^;
110 print $file_content;
111 if ($cgi->param('init') eq '1') {
112 my @dicts = $speller->dictionary_info();
113 my $dictionaries = '';
114 foreach my $i (@dicts) {
115 $dictionaries .= ',' . $i->{name} unless $i->{jargon};
117 $dictionaries =~ s/^,//;
118 print qq^
119 <div id="HA-spellcheck-dictionaries"
120 >$dictionaries</div>
124 if ($debug) {
125 open (FOO, '>:encoding(UTF-8)', '/tmp/spell-check-after');
126 print FOO $file_content, "\n";
127 close(FOO);
130 print '</body></html>';
132 # Perl is beautiful.
133 sub spellcheck {
134 my $text = shift;
135 sub check { # called for each word in the text
136 # input is in UTF-8
137 my $U_word = shift;
138 my $word = encode($speller->get_option('encoding'), $U_word);
139 print DEBUG "*$U_word* ----> |$word|\n" if $debug;
140 if ($speller->check($word)) {
141 return $U_word; # we return the word in UTF-8
142 } else {
143 # we should have suggestions; give them back to browser in UTF-8
144 my $suggestions = decode($speller->get_option('encoding'), join(',', $speller->suggest($word)));
145 my $ret = '<span class="HA-spellcheck-error">'.$U_word.'</span><span class="HA-spellcheck-suggestions">'.$suggestions.'</span>';
146 return $ret;
149 $text =~ s/([[:word:]']+)/check($1)/egs;
150 # $text =~ s/(\w+)/check($1)/egs;
152 # the following is definitely what we want to use; too bad it sucks most.
153 # $text =~ s/(\p{IsWord}+)/check($1)/egs;
154 return $text;