lib/editor/htmlarea/plugins/SpellChecker/spell-check-logic.cgi

   1 #! /usr/bin/perl -w
   2
   3 # Spell Checker Plugin for HTMLArea-3.0
   4 # Implementation by Mihai Bazon.  Sponsored by www.americanbible.org
   5 #
   6 # htmlArea v3.0 - Copyright (c) 2002 interactivetools.com, inc.
   7 # This notice MUST stay intact for use (see license.txt).
   8 #
   9 # A free WYSIWYG editor replacement for <textarea> fields.
  10 # For full source code and docs, visit http://www.interactivetools.com/
  11 #
  12 # Version 3.0 developed by Mihai Bazon for InteractiveTools.
  13 #            http://students.infoiasi.ro/~mishoo
  14 #
  15 # $Id$
  16
  17 use strict;
  18 use utf8;
  19 use Encode;
  20 use Text::Aspell;
  21 use HTML::Parser;
  22 use HTML::Entities;
  23 use CGI;
  24
  25 my $debug = 0;
  26
  27 open (DEBUG, '>:encoding(UTF-8)', '> /tmp/spell-check-debug.log') if $debug;
  28
  29 # use Data::Dumper; # for debug only
  30
  31 my $speller = new Text::Aspell;
  32 my $cgi = new CGI;
  33
  34 # FIXME: report a nice error...
  35 die "Can't create speller!" unless $speller;
  36
  37 # add configurable option for this
  38 my $dict = $cgi->param('dictionary') || 'en_US';
  39 $speller->set_option('lang', $dict);
  40
  41 # ultra, fast, normal, bad-spellers
  42 # bad-spellers seems to cause segmentation fault
  43 $speller->set_option('sug-mode', 'ultra');
  44
  45 my @replacements = ();
  46
  47 sub text_handler {
  48     my ($offset, $length, $text, $is_cdata) = @_;
  49     if ($is_cdata or $text =~ /^\s*$/) {
  50         return 0;
  51     }
  52     # print STDERR "*** OFFSET: $offset, LENGTH: $length, $text\n";
  53     $text = decode_entities($text);
  54     $text =~ s/&#([0-9]+);/chr($1)/eg;
  55     $text =~ s/&#x([0-9a-fA-F]+);/chr(hex $1)/eg;
  56     my $repl = spellcheck($text);
  57     if ($repl) {
  58         push(@replacements, [ $offset, $length, $repl ]);
  59     }
  60 }
  61
  62 my $p = HTML::Parser->new
  63   (api_version => 3,
  64    handlers => { start => [ sub {
  65                                 my ($self, $tagname, $attrs) = @_;
  66                                 # print STDERR "\033[1;31m parsing tag: $tagname\033[0m\n";
  67                                 # following we skip words that have already been marked as "fixed".
  68                                 if ($tagname eq "span" and $attrs->{class} =~ /HA-spellcheck-fixed/) {
  69                                     $self->handler(text => undef);
  70                                 }
  71                             }, "self, tagname, attr"
  72                           ],
  73                  end => [ sub {
  74                               my ($self, $tagname) = @_;
  75                               # print STDERR "\033[1;32m END tag: $tagname\033[0m\n";
  76                               $self->handler(text => \&text_handler, 'offset, length, dtext, is_cdata');
  77                           }, "self, tagname"
  78                         ]
  79                }
  80   );
  81 $p->handler(text => \&text_handler, 'offset, length, dtext, is_cdata');
  82 $p->case_sensitive(1);
  83 my $file_content = $cgi->param('content');
  84
  85 if ($debug) {
  86     open (FOO, '>:encoding(UTF-8)', '/tmp/spell-check-before');
  87     print FOO $file_content, "\n";
  88     close(FOO);
  89 }
  90
  91 $p->parse($file_content);
  92 $p->eof();
  93
  94 foreach (reverse @replacements) {
  95     substr($file_content, $_->[0], $_->[1], $_->[2]);
  96 }
  97
  98 # we output UTF-8
  99 binmode(STDOUT, ':encoding(UTF-8)'); # apparently, this sucks.
 100 print "Content-type: text/html; charset: utf-8\n\n";
 101 print qq^
 102 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 103 <html>
 104 <head>
 105 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 106 <link rel="stylesheet" type="text/css" media="all" href="spell-check-style.css" />
 107 </head>
 108 <body onload="window.parent.finishedSpellChecking();">^;
 109
 110 print $file_content;
 111 if ($cgi->param('init') eq '1') {
 112     my @dicts = $speller->dictionary_info();
 113     my $dictionaries = '';
 114     foreach my $i (@dicts) {
 115         $dictionaries .= ',' . $i->{name} unless $i->{jargon};
 116     }
 117     $dictionaries =~ s/^,//;
 118     print qq^
 119 <div id="HA-spellcheck-dictionaries"
 120 >$dictionaries</div>
 121 ^;
 122 }
 123
 124 if ($debug) {
 125     open (FOO, '>:encoding(UTF-8)', '/tmp/spell-check-after');
 126     print FOO $file_content, "\n";
 127     close(FOO);
 128 }
 129
 130 print '</body></html>';
 131
 132 # Perl is beautiful.
 133 sub spellcheck {
 134     my $text = shift;
 135     sub check {                 # called for each word in the text
 136         # input is in UTF-8
 137         my $U_word = shift;
 138         my $word = encode($speller->get_option('encoding'), $U_word);
 139         print DEBUG "*$U_word* ----> |$word|\n" if $debug;
 140         if ($speller->check($word)) {
 141             return $U_word;      # we return the word in UTF-8
 142         } else {
 143             # we should have suggestions; give them back to browser in UTF-8
 144             my $suggestions = decode($speller->get_option('encoding'), join(',', $speller->suggest($word)));
 145             my $ret = '<span class="HA-spellcheck-error">'.$U_word.'</span><span class="HA-spellcheck-suggestions">'.$suggestions.'</span>';
 146             return $ret;
 147         }
 148     }
 149     $text =~ s/([[:word:]']+)/check($1)/egs;
 150     # $text =~ s/(\w+)/check($1)/egs;
 151
 152     # the following is definitely what we want to use; too bad it sucks most.
 153     # $text =~ s/(\p{IsWord}+)/check($1)/egs;
 154     return $text;
 155 }