4 # Copyright (C) 2007-2010 Carlo Baldassi (the "Author") <carlobaldassi@gmail.com>.
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the Licence, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, see <http://www.gnu.org.licences/>.
21 ### FUNCTIONS AND FILTERS ###
24 # REGEX RELATED FUNCTIONS
30 local id ind brid chbrid
34 local BRANCHES NEWBRANCHES
37 local go c0 found chbrN
42 S
="$(eval "echo \"\$
${id}\"")"
44 N
[$id]=$
(echo -n "$S" |
wc -c)
58 [ $ind -le ${N[$id]} ] ||
continue;
60 C
[$id]="$(echo "${STR[$id]}" | cut -c${ind})"
63 [ $go -eq 0 ] && continue;
65 for brid
in $
(seq 1 $BRANCHES)
71 [ $ind -le ${N[$id]} ] ||
continue;
72 [ ${OLDBR[$id]} -eq $brid ] ||
continue;
81 for chbrid
in $
(seq 1 $chbrN )
83 if [ "$c0" == "${CC[$chbrid]}" ]
93 CHBR
[$chbrN]="$NEWBRANCHES"
96 T
[$NEWBRANCHES]=${T[$brid]}
100 [ $chbrN -eq 0 ] && continue;
101 [ -n "$OUTSTR" ] && OUTSTR
="${OUTSTR}|"
102 OUTSTR
="${OUTSTR}${T[$brid]}[^"
103 for chbrid
in $
(seq 1 $chbrN )
105 OUTSTR
="${OUTSTR}${CC[$chbrid]}"
106 T
[${CHBR[$chbrid]}]="${T[${CHBR[$chbrid]}]}${CC[$chbrid]}"
111 BRANCHES=$NEWBRANCHES
123 # anti-closetag regex
135 # skip whole tag-delimited parts
142 for id in $(seq 1 $L)
144 S="$
(eval "echo \"\$${id}\"")"
145 OUTSTR="${OUTSTR}<$S>($
(anti_both
"$S"))*</$S>|
"
148 for id in $(seq 1 $L)
150 S="$
(eval "echo \"\$${id}\"")"
153 OUTSTR="${OUTSTR}$
(anti
$TL)"
159 # SUBSTITUTION FILTERS (FLAT)
165 sed -r "s@
</?
$1\
>[^
>]*>@@g
";
168 # substitute tags delimiters
171 local BODY_EL="($
(skipper
"$1"))"
172 sed -r "s@
<$1([[:space
:]]+[^
>]+)?
>(${BODY_EL}*)</$1>@
$2\
2$3@g
"
178 sed -r "s
/<img\
>[^
>]*src
=\"([^
>]*)\"[^
>]*>/[[image \
1]]/g
"
181 # links (external and internal)
184 sed -r "s@
<a href
=\"(#[^\"]*)\">(($(anti_both "a"))*)</a>@[\1 \2]@g" | \
185 sed -r "s@<a name=\"([^\"]*)\">(($(anti_both "a
"))*)</a>@[[# \1]]\2@g"
190 # SUBSTITUTION FILTERS (WITH NESTING)
193 # associate a depth to the given tags
198 local BODY_EL
="($(skipper "$@
"))"
202 for id
in $
(seq 1 $L)
204 S
="$(eval "echo \"\$
${id}\"")"
205 [ $id -gt 1 ] && TAG
="${TAG}|"
209 sed -r "s@<${TAG}>(${BODY_EL}*)</\1>@<<I $LEV \1>>\2<</I $LEV \1>>@g"
212 # invoke indent for all levels
213 # up to the given one (in increasing order)
214 function indent_alllevels
219 [[ "$1" =~ ^@@
[[:digit
:]]+$
]] && { LMAX
=${1#@@}; shift; }
222 indent $
((LMAX
- LEV
+ 1)) "$@" |\
223 indent_alllevels $
((LEV
- 1)) "@@${LMAX}" "$@"
229 # substitute tags delimiters
230 # at a specified level
231 function subtags_atlevel
235 local BODY_EL1
="($(anti "<</I
$LEV "))"
236 #local BODY_EL1="(<<I [[:digit:]]+ [[:alpha:]]+>>($(anti "<<I " "<</I "))*<</I [[:digit:]]+ [[:alpha:]]+>>|$(anti "<</I $LEV "))"
237 local BODY_EL2
="($(skipper "$1"))"
238 sed -r "s@<<I $LEV ([[:alpha:]]+)>>(${BODY_EL1}*)<</I $LEV \1>>@<<I $LEV \1>>\n<<MARKED>>\2\n<</I $LEV \1>>@g" | \
239 sed -r "/^<<MARKED>>/s@<$1>(${BODY_EL2}*)</$1>@$2\1$3@g" | \
240 sed -r "s@^<<MARKED>>@@" | \
244 # invoke subtags_atlevel for all levels
245 # up to the given one (in decreasing order)
246 # (delimiters are passed through array names)
247 function subtags_multilevel
256 local BS
="$(eval echo \"\${${ARRAY_B}[L - 1]}\")"
257 local ES
="$(eval echo \"\${${ARRAY_E}[L - 1]}\")"
258 subtags_atlevel
$L "$TAG" "$BS" "$ES" | \
259 subtags_multilevel $
((L
- 1)) "$TAG" "$ARRAY_B" "$ARRAY_E"
265 # substitute indented tags
270 local BODY_EL
="($(anti "<</I
$LEV $1>>"))"
271 sed -r "s@<<I $LEV $1>>(${BODY_EL}*)<</I $LEV $1>>@$2\1$3@g"
274 # invoke subindent for all levels
275 # up to the given one (in increasing order)
276 function subindent_alllevels
281 [[ "$1" =~ ^@@
[[:digit
:]]+$
]] && { LMAX
=${1#@@}; shift; }
284 subindent $
((LMAX
- LEV
+ 1)) "$@" |\
285 subindent_alllevels $
((LEV
- 1)) "@@${LMAX}" "$@"
293 # FORMAT AND CLEANUP FILTERS
296 # remove unnecessary spaces
299 sed -r "s/[[:space:]]+/ /g"
302 # remove spaces at the beginning
304 function rmspace_start
306 sed -r "s/^[[:space:]]*//"
309 # collapse newlines marked as weak
310 # if next to non-weak newlines
311 function rmweaknewlines
313 sed -r "s/(<<WNL>>[[:space:]]*)+<<NL>>/<<NL>>/g" | \
314 sed -r "s/<<WNL>>/<<NL>>/g"
317 # apply space formattings
320 sed -r "s/[[:space:]]*<<NL>>[[:space:]]*/\n/g" | \
321 sed -r "s/<<SP9>>/ /g" | \
322 sed -r "s/<<SP8>>/ /g" | \
323 sed -r "s/<<SP7>>/ /g" | \
324 sed -r "s/<<SP6>>/ /g" | \
325 sed -r "s/<<SP5>>/ /g" | \
326 sed -r "s/<<SP4>>/ /g" | \
327 sed -r "s/<<SP3>>/ /g" | \
328 sed -r "s/<<SP2>>/ /g" | \
329 sed -r "s/<<SP>>/ /g"
337 function custom_filters
339 sed -r "s@(\[\[image[[:space:]]+)([^/]*/)?([^]]*\]\])@\1\3@g"
346 ### CONSTANTS AND SUBSTITUTION TABLES
351 DARRAY_DT_B
=( '* //' '<<SP>>* //' '<<SP2>>* //' '<<SP3>>* //' '<<SP4>>* //' '<<SP5>>* //' '<<SP6>>* //' '<<SP7>>* //' '<<SP8>>* //' '<<SP9>>* //' )
352 DARRAY_DT_E
=( './/' './/' './/' './/' './/' './/' './/' './/' './/' )
354 DARRAY_DD_B
=( '' '' '' '' '' '' '' '' '' )
355 DARRAY_DD_E
=( '<<NL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' '<<WNL>>' )
357 DARRAY_LI_B
=( '* ' '<<SP>>* ' '<<SP2>>* ' '<<SP3>>* ' '<<SP4>>* ' '<<SP5>>* ' '<<SP6>>* ' '<<SP7>>* ' '<<SP8>>* ' '<<SP9>>* ' )
358 DARRAY_LI_E
=( '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' '<<NL>>' )
360 DARRAY_DIV_B
=( '' '' '' '' '' '' '' '' '' )
361 DARRAY_DIV_E
=( ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' )
363 DARRAY_P_B
=( '' '' '' '' '' '' '' '' '' )
364 DARRAY_P_E
=( ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' )
375 rmtags
"!doctype" | \
388 subtags
"title" '<<NL>>+ ' '<<NL>>' | \
389 subtags
"h1" '<<NL>>+ ' '<<NL>>' | \
390 subtags
"h2" '<<NL>>++ ' '<<NL>>' | \
391 subtags
"h3" '<<NL>>+++ ' '<<NL>>' | \
392 subtags
"h4" '<<NL>>++++ ' '<<NL>>' | \
393 subtags
"b" '**' '**' | \
394 subtags
"i" '//' '//' | \
395 subtags
"u" '__' '__' | \
396 subtags
"code" '<<NL>>[[code]]<<NL>>' '<<NL>>[[/code]]<<NL>>' | \
397 indent_alllevels
$MAXLEV "dl" "ul" "ol" | \
398 subtags_multilevel
$MAXLEV "dt" DARRAY_DT_B DARRAY_DT_E | \
399 subtags_multilevel
$MAXLEV "dd" DARRAY_DD_B DARRAY_DD_E | \
400 subtags_multilevel
$MAXLEV "li" DARRAY_LI_B DARRAY_LI_E | \
401 subtags_multilevel
$MAXLEV "div" DARRAY_DIV_B DARRAY_DIV_E | \
402 subtags_multilevel
$MAXLEV "p" DARRAY_P_B DARRAY_P_E | \
403 subtags
"div" '<<NL>>' '<<NL>>' | \
404 subtags
"div" '<<NL>>' '<<NL>>' | \
405 subtags
"p" '<<NL>>' '<<NL>>' | \
406 subindent_alllevels
$MAXLEV "dl" '<<NL>>' '' | \
407 subindent_alllevels
$MAXLEV "ul" '<<NL>>' '<<NL>>' | \
408 subindent_alllevels
$MAXLEV "ol" '<<NL>>' '<<NL>>' | \
421 # OBSOLETE / NONWORKING STUFF
428 # local N=$(echo -n "$STR" | wc -c)
429 # local OUTSTR="[^<]"
432 # for ind in $(seq 1 $N)
434 # C="$(echo "$STR" | cut -c${ind})"
435 # OUTSTR="${OUTSTR}|${T}[^$C]"
444 # echo "$(anti_open "/$1")"
451 # local STR2="/${1}>"
452 # local N=$(echo -n "$STR2" | wc -c)
454 # C1="$(echo "$STR1" | cut -c1)"
455 # local OUTSTR="[^<]|<[^${C1}/]"
458 # for ind in $(seq 2 $((N - 1)) )
460 # C1="$(echo "$STR1" | cut -c${ind})"
461 # C2="$(echo "$STR2" | cut -c${ind})"
462 # OUTSTR="${OUTSTR}|${T1}[^${C1}]|${T2}[^${C2}]"
466 # C2="$(echo "$STR2" | cut -c${N})"
467 # OUTSTR="${OUTSTR}|${T2}[^${C2}]"
474 # echo "<$1>($(anti_both "$1"))*</$1>|$(anti_open "$1")"
479 # sed -r "s@<$1\>[^>]*>[^<]*</$1>@@g";
482 #function subdesclist
484 # subtags "dt" '* ' '.' | \
485 # subtags "dd" '' '<<NL>>' | \
486 # subtags "dl" '<<NL>>' ''
491 # local BODY_EL="($(anti_both "$1"))"
492 # sed -r "s@<$1>(${BODY_EL}*)</$1>@<$1{{$2}}>\1</$1{{$2}}>@g"
495 #function subtags_upperlev
497 # local BODY_EL="($(skipper "$1"))"
498 # sed -r "s@<$1>(${BODY_EL}*)</$1>@$2\1$3>@g"
501 #function subtags_skip
503 # local BODY_EL="((<$1>($(anti_both "$1"))*</$1>)|($(anti "<$1>" "<$2>")))"
504 # #local BODY_EL="($(skipper "$1")|$(skipper "$2"))"
505 # sed -r "s@<$2>(${BODY_EL}*)</$2>@$3\1$4@g"
508 #function subtags_without
510 # local BODY_EL="($(anti "<$1>" "</$1>" "<$2>" "</$2>"))"
511 # sed -r "s@<$1>(${BODY_EL}*)</$1>@$3\1$4@g"
514 #function subtags_within_
516 # #local BODY_EL1="($(skipper "$1"))"
517 # #local BODY_EL2="($(skipper "$1"))"
518 # local BODY_EL1="(<$1>($(anti_both "$1"))*</$1>|$(anti "<$1>" "<$2>"))"
519 # local BODY_EL2="($(skipper "$1" "$2"))"
520 # #local BODY_EL2="($(anti "$2"))"
521 # local BODY_EL3="($(skipper "$1"))"
522 # sed -r "s@<$1>(${BODY_EL1}*)<$2>(${BODY_EL2}*)</$2>(${BODY_EL3}*)</$1>@<$1>\1$3\4$4\8</$1>@g"
525 #function subtags_within
531 # subtags_within_ "$@" | subtags_within $(( N - 1 )) "$@"
537 #function subtags_atlevel__
541 # local NONINDTAG="(<<I [[:digit:]]+ [[:alpha:]]+>>($(anti "<<I " "<</I "))*<</I [[:digit:]]+ [[:alpha:]]+>>|$(anti "<<I " "<</I " "<$1>"))"
542 # local BODY_EL="($(skipper "$1"))"
543 # local NONIND="(<<I [[:digit:]]+ [[:alpha:]]+>>($(anti "<<I " "<</I "))*<</I [[:digit:]]+ [[:alpha:]]+>>|$(anti "<</I $LEV "))"
544 # sed -r "s@<<I $LEV ([[:alpha:]]+)>>(${NONINDTAG}*)<$1>(${BODY_EL}*)</$1>(${NONIND}*)<</I $LEV \1>>@<<I $LEV \1>>\2$2\5$3\8<</I $LEV \1>>@g"
547 #function subtags_atlevel_
553 # subtags_atlevel__ "$@" | subtags_atlevel_ $(( N - 1 )) "$@"
559 #function subtags_atlevel
561 # subtags_atlevel_ 10 "$@"