3 # aristoteles - a regex-based, just-for-fun Italian-to-Portuguese translator
4 # Copyright (C) 2009 Davide Mancusi <arekfu@gmail.com>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 binmode(STDOUT
, ":utf8");
24 binmode(DATA
, ":utf8");
26 # Check the number of arguments
27 die "Usage: ", $0, " <filename>\n" unless $#ARGV == 0;
29 # Read the pattern dictionary
30 tie
my %patdic, 'Tie::IxHash';
34 my ($search, $subst) = split /\t+/;
35 $patdic{$search} = $subst;
38 # Open file to translate
39 open( my $infile, "<utf8", $ARGV[0] ) or die "Can't open input file: $!\n";
43 foreach my $search (keys %patdic) {
44 eval "s/$search/$patdic{$search}/g";
45 if( $search =~ /^\\b([[:lower:]].*)/ )
47 my $search2 = '\b'.ucfirst($1);
48 my $subst2 = ucfirst($patdic{$search});
49 eval "s/$search2/$subst2/g";
71 \bperch
(é
|e
')\b porque
86 \bl'([[:alpha
:]]+)(a
|i
|u
)\b a
$1$2
87 \bl
'([[:alpha:]]+)(o|e)\b o $1$2
92 \b(di|a|da|in|con|su|per|fra|tra)\Wme\b $1 mim
93 \b(di|a|da|in|con|su|per|fra|tra)\Wte\b $1 ti
98 \ball'([[:alpha
:]]+)(a
|i
)\b à
$1$2
99 \ball
'([[:alpha:]]+)(o|e)\b ao $1$2
105 \bdall'([[:alpha
:]]+)(a
|i
)\b da
$1$2
106 \bdall
'([[:alpha:]]+)(o|e|u)\b do $1$2
109 \bsu(i|gli)\b sobre os
112 \bdell'([[:alpha
:]]+)(a
|i
)\b da
$1$2
113 \bdell
'([[:alpha:]]+)(o|e|u)\b do $1$2
119 \bdell'([[:alpha
:]]+)(a
|i
)\b da
$1$2
120 \bdell
'([[:alpha:]]+)(o|e|u)\b do $1$2
125 \bnell'([[:alpha
:]]+)(a
|i
)\b à
$1$2
126 \bnell
'([[:alpha:]]+)(o|e)\b ao $1$2
145 \bquattordici
\b catorze
148 \bdiciassette
\b dezassete
150 \bdiciannove
\b dezanove
170 (b|c|d|f|g|l|m|n|p|q|t|v|z)\1 $1
171 ([^sl[:^alpha:]])ce\b $1z
177 \bs(t|p|d|f|c|b) es$1
178 (a|e|i|o)(r|l)e\b $1$2