Cleanup comments etc. in src
[gimp-lqr-plugin.git] / help / html2wiki
blob8431ceb24e2be19bcb391c00f3caf56d59b7251f
1 #!/bin/bash
3 # html to wiki script
4 # Copyright (C) 2007-2010 Carlo Baldassi (the "Author") <carlobaldassi@gmail.com>.
5 # All Rights Reserved.
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the Licence, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, see <http://www.gnu.org.licences/>.
21 ### FUNCTIONS AND FILTERS ###
22 #{{{
24 # REGEX RELATED FUNCTIONS
25 #{{{
27 # anti-regex
28 function anti
29 { #{{{
30 local id ind brid chbrid
31 local STR S
32 local OUTSTR
33 local C T
34 local BRANCHES NEWBRANCHES
35 local BR OLDBR
36 local CHBR CC
37 local go c0 found chbrN
38 local L="${#@}"
39 #echo "L=$L"
40 for id in $(seq 1 $L)
42 S="$(eval "echo \"\$${id}\"")"
43 STR[$id]="$S"
44 N[$id]=$(echo -n "$S" | wc -c)
45 BR[$id]=1
46 OLDBR[$id]=1
47 done
48 BRANCHES=1
49 T[1]=""
50 OUTSTR=""
51 ind=1
52 go=1
53 while [ $go -eq 1 ]
55 go=0
56 for id in $(seq 1 $L)
58 [ $ind -le ${N[$id]} ] || continue;
59 go=1
60 C[$id]="$(echo "${STR[$id]}" | cut -c${ind})"
61 OLDBR[$id]=${BR[$id]}
62 done
63 [ $go -eq 0 ] && continue;
64 NEWBRANCHES=$BRANCHES
65 for brid in $(seq 1 $BRANCHES)
67 local CHBR=( )
68 local chbrN=0
69 for id in $(seq 1 $L)
71 [ $ind -le ${N[$id]} ] || continue;
72 [ ${OLDBR[$id]} -eq $brid ] || continue;
73 c0="${C[$id]}"
74 if [ $chbrN -eq 0 ]
75 then
76 CHBR[1]="$brid"
77 CC[1]="$c0"
78 let chbrN++
79 else
80 found=0
81 for chbrid in $(seq 1 $chbrN )
83 if [ "$c0" == "${CC[$chbrid]}" ]
84 then
85 found=1;
86 break;
88 done
89 if [ $found -eq 0 ]
90 then
91 let NEWBRANCHES++
92 let chbrN++
93 CHBR[$chbrN]="$NEWBRANCHES"
94 CC[$chbrN]="$c0"
95 BR[$id]=$NEWBRANCHES
96 T[$NEWBRANCHES]=${T[$brid]}
99 done
100 [ $chbrN -eq 0 ] && continue;
101 [ -n "$OUTSTR" ] && OUTSTR="${OUTSTR}|"
102 OUTSTR="${OUTSTR}${T[$brid]}[^"
103 for chbrid in $(seq 1 $chbrN )
105 OUTSTR="${OUTSTR}${CC[$chbrid]}"
106 T[${CHBR[$chbrid]}]="${T[${CHBR[$chbrid]}]}${CC[$chbrid]}"
107 done
108 OUTSTR="${OUTSTR}]"
109 done
111 BRANCHES=$NEWBRANCHES
112 let ind++
113 done
114 echo "$OUTSTR"
115 } #}}}
117 # anti-opentag regex
118 function anti_open
119 { #{{{
120 anti "<$1>"
121 } #}}}
123 # anti-closetag regex
124 function anti_close
125 { #{{{
126 anti "</$1>"
127 } #}}}
129 # anti-tag regex
130 function anti_both
131 { #{{{
132 anti "<$1>" "</$1>"
133 } #}}}
135 # skip whole tag-delimited parts
136 function skipper
137 { #{{{
138 local id
139 local L="${#@}"
140 local S
141 local OUTSTR=""
142 for id in $(seq 1 $L)
144 S="$(eval "echo \"\$${id}\"")"
145 OUTSTR="${OUTSTR}<$S>($(anti_both "$S"))*</$S>|"
146 done
147 local TL=""
148 for id in $(seq 1 $L)
150 S="$(eval "echo \"\$${id}\"")"
151 TL="${TL} <$S>"
152 done
153 OUTSTR="${OUTSTR}$(anti $TL)"
154 echo $OUTSTR
155 } #}}}
157 #}}}
159 # SUBSTITUTION FILTERS (FLAT)
160 #{{{
162 # greedy remove
163 function rmtags
164 { #{{{
165 sed -r "s@</?$1\>[^>]*>@@g";
166 } #}}}
168 # substitute tags delimiters
169 function subtags
170 { #{{{
171 local BODY_EL="($(skipper "$1"))"
172 sed -r "s@<$1([[:space:]]+[^>]+)?>(${BODY_EL}*)</$1>@$2\2$3@g"
173 } #}}}
175 # images
176 function subimgs
177 { #{{{
178 sed -r "s/<img\>[^>]*src=\"([^>]*)\"[^>]*>/[[image \1]]/g"
179 } #}}}
181 # links (external and internal)
182 function sublinks
183 { #{{{
184 sed -r "s@<a href=\"(#[^\"]*)\">(($(anti_both "a"))*)</a>@[\1 \2]@g" | \
185 sed -r "s@<a name=\"([^\"]*)\">(($(anti_both "a"))*)</a>@[[# \1]]\2@g"
186 } #}}}
188 #}}}
190 # SUBSTITUTION FILTERS (WITH NESTING)
191 #{{{
193 # associate a depth to the given tags
194 function indent
195 { #{{{
196 local LEV="$1"
197 shift
198 local BODY_EL="($(skipper "$@"))"
199 local TAG="("
200 local id
201 local L="${#@}"
202 for id in $(seq 1 $L)
204 S="$(eval "echo \"\$${id}\"")"
205 [ $id -gt 1 ] && TAG="${TAG}|"
206 TAG="${TAG}${S}"
207 done
208 TAG="${TAG})"
209 sed -r "s@<${TAG}>(${BODY_EL}*)</\1>@<<I $LEV \1>>\2<</I $LEV \1>>@g"
210 } #}}}
212 # invoke indent for all levels
213 # up to the given one (in increasing order)
214 function indent_alllevels
215 { #{{{
216 local LEV="$1"
217 shift
218 local LMAX="$LEV"
219 [[ "$1" =~ ^@@[[:digit:]]+$ ]] && { LMAX=${1#@@}; shift; }
220 if [ $LEV -gt 0 ]
221 then
222 indent $((LMAX - LEV + 1)) "$@" |\
223 indent_alllevels $((LEV - 1)) "@@${LMAX}" "$@"
224 else
227 } #}}}
229 # substitute tags delimiters
230 # at a specified level
231 function subtags_atlevel
232 { #{{{
233 local LEV="$1"
234 shift
235 local BODY_EL1="($(anti "<</I $LEV "))"
236 #local BODY_EL1="(<<I [[:digit:]]+ [[:alpha:]]+>>($(anti "<<I " "<</I "))*<</I [[:digit:]]+ [[:alpha:]]+>>|$(anti "<</I $LEV "))"
237 local BODY_EL2="($(skipper "$1"))"
238 sed -r "s@<<I $LEV ([[:alpha:]]+)>>(${BODY_EL1}*)<</I $LEV \1>>@<<I $LEV \1>>\n<<MARKED>>\2\n<</I $LEV \1>>@g" | \
239 sed -r "/^<<MARKED>>/s@<$1>(${BODY_EL2}*)</$1>@$2\1$3@g" | \
240 sed -r "s@^<<MARKED>>@@" | \
241 tr '\n' ' '
242 } #}}}
244 # invoke subtags_atlevel for all levels
245 # up to the given one (in decreasing order)
246 # (delimiters are passed through array names)
247 function subtags_multilevel
248 { #{{{
249 local L="$1"
250 local TAG="$2"
251 local ARRAY_B="$3"
252 local ARRAY_E="$4"
254 if [ "$L" -gt 0 ]
255 then
256 local BS="$(eval echo \"\${${ARRAY_B}[L - 1]}\")"
257 local ES="$(eval echo \"\${${ARRAY_E}[L - 1]}\")"
258 subtags_atlevel $L "$TAG" "$BS" "$ES" | \
259 subtags_multilevel $((L - 1)) "$TAG" "$ARRAY_B" "$ARRAY_E"
260 else
263 } #}}}
265 # substitute indented tags
266 function subindent
267 { #{{{
268 local LEV="$1"
269 shift;
270 local BODY_EL="($(anti "<</I $LEV $1>>"))"
271 sed -r "s@<<I $LEV $1>>(${BODY_EL}*)<</I $LEV $1>>@$2\1$3@g"
272 } #}}}
274 # invoke subindent for all levels
275 # up to the given one (in increasing order)
276 function subindent_alllevels
277 { #{{{
278 local LEV="$1"
279 shift
280 local LMAX="$LEV"
281 [[ "$1" =~ ^@@[[:digit:]]+$ ]] && { LMAX=${1#@@}; shift; }
282 if [ $LEV -gt 0 ]
283 then
284 subindent $((LMAX - LEV + 1)) "$@" |\
285 subindent_alllevels $((LEV - 1)) "@@${LMAX}" "$@"
286 else
289 } #}}}
291 #}}}
293 # FORMAT AND CLEANUP FILTERS
294 #{{{
296 # remove unnecessary spaces
297 function rmspace
298 { #{{{
299 sed -r "s/[[:space:]]+/ /g"
300 } #}}}
302 # remove spaces at the beginning
303 # of the lines
304 function rmspace_start
305 { #{{{
306 sed -r "s/^[[:space:]]*//"
307 } #}}}
309 # collapse newlines marked as weak
310 # if next to non-weak newlines
311 function rmweaknewlines
312 { #{{{
313 sed -r "s/(<<WNL>>[[:space:]]*)+<<NL>>/<<NL>>/g" | \
314 sed -r "s/<<WNL>>/<<NL>>/g"
315 } #}}}
317 # apply space formattings
318 function subspace
319 { #{{{
320 sed -r "s/[[:space:]]*<<NL>>[[:space:]]*/\n/g" | \
321 sed -r "s/<<SP9>>/ /g" | \
322 sed -r "s/<<SP8>>/ /g" | \
323 sed -r "s/<<SP7>>/ /g" | \
324 sed -r "s/<<SP6>>/ /g" | \
325 sed -r "s/<<SP5>>/ /g" | \
326 sed -r "s/<<SP4>>/ /g" | \
327 sed -r "s/<<SP3>>/ /g" | \
328 sed -r "s/<<SP2>>/ /g" | \
329 sed -r "s/<<SP>>/ /g"
330 } #}}}
332 #}}}
334 # CUSTOM FILTERS
335 #{{{
337 function custom_filters
338 { #{{{
339 sed -r "s@(\[\[image[[:space:]]+)([^/]*/)?([^]]*\]\])@\1\3@g"
340 } #}}}
342 #}}}
344 #}}}
346 ### CONSTANTS AND SUBSTITUTION TABLES
347 #{{{
349 MAXLEV=4
351 DARRAY_DT_B=( '* //' '<<SP>>* //' '<<SP2>>* //' '<<SP3>>* //' '<<SP4>>* //' '<<SP5>>* //' '<<SP6>>* //' '<<SP7>>* //' '<<SP8>>* //' '<<SP9>>* //' )
352 DARRAY_DT_E=( './/' './/' './/' './/' './/' './/' './/' './/' './/' )
354 DARRAY_DD_B=( '' '' '' '' '' '' '' '' '' )
355 DARRAY_DD_E=( '<<NL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' )
357 DARRAY_LI_B=( '* ' '<<SP>>* ' '<<SP2>>* ' '<<SP3>>* ' '<<SP4>>* ' '<<SP5>>* ' '<<SP6>>* ' '<<SP7>>* ' '<<SP8>>* ' '<<SP9>>* ' )
358 DARRAY_LI_E=( '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' )
360 DARRAY_DIV_B=( '' '' '' '' '' '' '' '' '' )
361 DARRAY_DIV_E=( ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' )
363 DARRAY_P_B=( '' '' '' '' '' '' '' '' '' )
364 DARRAY_P_E=( ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' )
366 #}}}
368 ### MAIN FILTER ###
369 #{{{
371 cat | \
372 tr "\n" " " | \
373 tr "\t" " " | \
374 rmspace | \
375 rmtags "!doctype" | \
376 rmtags "html" | \
377 rmtags "META" | \
378 rmtags "head" | \
379 rmtags "link" | \
380 rmtags "body" | \
381 rmtags "table" | \
382 rmtags "tr" | \
383 rmtags "td" | \
384 rmtags "center" | \
385 rmtags "font" | \
386 subimgs | \
387 sublinks | \
388 subtags "title" '<<NL>>+ ' '<<NL>>' | \
389 subtags "h1" '<<NL>>+ ' '<<NL>>' | \
390 subtags "h2" '<<NL>>++ ' '<<NL>>' | \
391 subtags "h3" '<<NL>>+++ ' '<<NL>>' | \
392 subtags "h4" '<<NL>>++++ ' '<<NL>>' | \
393 subtags "b" '**' '**' | \
394 subtags "i" '//' '//' | \
395 subtags "u" '__' '__' | \
396 subtags "code" '<<NL>>[[code]]<<NL>>' '<<NL>>[[/code]]<<NL>>' | \
397 indent_alllevels $MAXLEV "dl" "ul" "ol" | \
398 subtags_multilevel $MAXLEV "dt" DARRAY_DT_B DARRAY_DT_E | \
399 subtags_multilevel $MAXLEV "dd" DARRAY_DD_B DARRAY_DD_E | \
400 subtags_multilevel $MAXLEV "li" DARRAY_LI_B DARRAY_LI_E | \
401 subtags_multilevel $MAXLEV "div" DARRAY_DIV_B DARRAY_DIV_E | \
402 subtags_multilevel $MAXLEV "p" DARRAY_P_B DARRAY_P_E | \
403 subtags "div" '<<NL>>' '<<NL>>' | \
404 subtags "div" '<<NL>>' '<<NL>>' | \
405 subtags "p" '<<NL>>' '<<NL>>' | \
406 subindent_alllevels $MAXLEV "dl" '<<NL>>' '' | \
407 subindent_alllevels $MAXLEV "ul" '<<NL>>' '<<NL>>' | \
408 subindent_alllevels $MAXLEV "ol" '<<NL>>' '<<NL>>' | \
409 rmspace_start | \
410 rmspace | \
411 rmweaknewlines | \
412 custom_filters | \
413 subspace | \
414 cat -s
415 echo
417 #}}}
419 ### END ###
421 # OBSOLETE / NONWORKING STUFF
422 #{{{
424 #function anti_open
425 #{#{{{
426 # local ind
427 # local STR="${1}>"
428 # local N=$(echo -n "$STR" | wc -c)
429 # local OUTSTR="[^<]"
430 # local T="<"
431 # local C
432 # for ind in $(seq 1 $N)
433 # do
434 # C="$(echo "$STR" | cut -c${ind})"
435 # OUTSTR="${OUTSTR}|${T}[^$C]"
436 # T="${T}${C}"
437 # done
439 # echo "$OUTSTR"
440 #}#}}}
442 #function anti_close
443 #{#{{{
444 # echo "$(anti_open "/$1")"
445 #}#}}}
447 #function anti_both
448 #{#{{{
449 # local ind
450 # local STR1="${1}>"
451 # local STR2="/${1}>"
452 # local N=$(echo -n "$STR2" | wc -c)
453 # local C1 C2
454 # C1="$(echo "$STR1" | cut -c1)"
455 # local OUTSTR="[^<]|<[^${C1}/]"
456 # local T1="<${C1}"
457 # local T2="</"
458 # for ind in $(seq 2 $((N - 1)) )
459 # do
460 # C1="$(echo "$STR1" | cut -c${ind})"
461 # C2="$(echo "$STR2" | cut -c${ind})"
462 # OUTSTR="${OUTSTR}|${T1}[^${C1}]|${T2}[^${C2}]"
463 # T1="${T1}${C1}"
464 # T2="${T2}${C2}"
465 # done
466 # C2="$(echo "$STR2" | cut -c${N})"
467 # OUTSTR="${OUTSTR}|${T2}[^${C2}]"
469 # echo "$OUTSTR"
470 #}#}}}
472 #function skipper
473 #{#{{{
474 # echo "<$1>($(anti_both "$1"))*</$1>|$(anti_open "$1")"
475 #}#}}}
477 #function rmtagsbody
478 #{#{{{
479 # sed -r "s@<$1\>[^>]*>[^<]*</$1>@@g";
480 #}#}}}
482 #function subdesclist
483 #{#{{{
484 # subtags "dt" '* ' '.' | \
485 # subtags "dd" '' '<<NL>>' | \
486 # subtags "dl" '<<NL>>' ''
487 #}#}}}
489 #function worknest
490 #{#{{{
491 # local BODY_EL="($(anti_both "$1"))"
492 # sed -r "s@<$1>(${BODY_EL}*)</$1>@<$1{{$2}}>\1</$1{{$2}}>@g"
493 #}#}}}
495 #function subtags_upperlev
496 #{#{{{
497 # local BODY_EL="($(skipper "$1"))"
498 # sed -r "s@<$1>(${BODY_EL}*)</$1>@$2\1$3>@g"
499 #}#}}}
501 #function subtags_skip
502 #{#{{{
503 # local BODY_EL="((<$1>($(anti_both "$1"))*</$1>)|($(anti "<$1>" "<$2>")))"
504 # #local BODY_EL="($(skipper "$1")|$(skipper "$2"))"
505 # sed -r "s@<$2>(${BODY_EL}*)</$2>@$3\1$4@g"
506 #}#}}}
508 #function subtags_without
509 #{#{{{
510 # local BODY_EL="($(anti "<$1>" "</$1>" "<$2>" "</$2>"))"
511 # sed -r "s@<$1>(${BODY_EL}*)</$1>@$3\1$4@g"
512 #}#}}}
514 #function subtags_within_
515 #{#{{{
516 # #local BODY_EL1="($(skipper "$1"))"
517 # #local BODY_EL2="($(skipper "$1"))"
518 # local BODY_EL1="(<$1>($(anti_both "$1"))*</$1>|$(anti "<$1>" "<$2>"))"
519 # local BODY_EL2="($(skipper "$1" "$2"))"
520 # #local BODY_EL2="($(anti "$2"))"
521 # local BODY_EL3="($(skipper "$1"))"
522 # sed -r "s@<$1>(${BODY_EL1}*)<$2>(${BODY_EL2}*)</$2>(${BODY_EL3}*)</$1>@<$1>\1$3\4$4\8</$1>@g"
523 #}#}}}
525 #function subtags_within
526 #{#{{{
527 # local N="$1"
528 # shift;
529 # if [ $N -gt 0 ]
530 # then
531 # subtags_within_ "$@" | subtags_within $(( N - 1 )) "$@"
532 # else
533 # cat
534 # fi
535 #}#}}}
537 #function subtags_atlevel__
538 #{#{{{
539 # local LEV="$1"
540 # shift
541 # local NONINDTAG="(<<I [[:digit:]]+ [[:alpha:]]+>>($(anti "<<I " "<</I "))*<</I [[:digit:]]+ [[:alpha:]]+>>|$(anti "<<I " "<</I " "<$1>"))"
542 # local BODY_EL="($(skipper "$1"))"
543 # local NONIND="(<<I [[:digit:]]+ [[:alpha:]]+>>($(anti "<<I " "<</I "))*<</I [[:digit:]]+ [[:alpha:]]+>>|$(anti "<</I $LEV "))"
544 # sed -r "s@<<I $LEV ([[:alpha:]]+)>>(${NONINDTAG}*)<$1>(${BODY_EL}*)</$1>(${NONIND}*)<</I $LEV \1>>@<<I $LEV \1>>\2$2\5$3\8<</I $LEV \1>>@g"
545 #}#}}}
547 #function subtags_atlevel_
548 #{#{{{
549 # local N="$1"
550 # shift;
551 # if [ $N -gt 0 ]
552 # then
553 # subtags_atlevel__ "$@" | subtags_atlevel_ $(( N - 1 )) "$@"
554 # else
555 # cat
556 # fi
557 #}#}}}
559 #function subtags_atlevel
560 #{#{{{
561 # subtags_atlevel_ 10 "$@"
562 #}#}}}
564 #}}}