3 BASE_SITE
=http
://www.citadel.org
7 #retrieves an index document from the citadel.org website, and filters it
9 # 2: outfile where to put the filtered content at
12 cd /tmp
/; wget
-q "${BASE_SITE}/${1}"
16 sed -e "s;.*href=\";;" \
18 -e "s;doku.php/;doku.php?id=;"| \
23 rm -f /tmp
/mainindex
/tmp
/doku.php
*
24 GetIndex
"doku.php?id=faq:start" mainindex
26 for i
in `cat /tmp/mainindex`; do
27 TMPNAME
=`echo $i|sed "s;.*=;;"`
30 GetIndex
"$i" "$TMPNAME/$TMPNAME"
31 for j
in `cat /tmp/$TMPNAME/$TMPNAME`; do
32 echo "-----------$j----------------"
34 DOCUMENT_NAME
=`echo $j|sed -e "s;/doku.php?id=.*:;;"`
35 PLAIN_NAME
=`grep "$DOCUMENT_NAME" /tmp/doku*$TMPNAME |head -n1 |sed -e "s;','/doku.*;;" -e "s;.*';;"`
37 echo "********** retrieving $DOCUMENT_NAME ************"
38 wget
-q "${BASE_SITE}/${j}&do=export_xhtmlbody"
39 mv "/tmp/$TMPNAME/${j}&do=export_xhtmlbody" /tmp
/$TMPNAME/$DOCUMENT_NAME
41 echo "<li><a href=\"#$DOCUMENT_NAME\">$PLAIN_NAME</a></li>" >>collect_index
42 echo "<a name=\"$DOCUMENT_NAME\"></a>" >>collect_bodies
43 cat $DOCUMENT_NAME>>collect_bodies
46 echo "<html><head>$TMPNAME</head><body><ul>"
47 cat "/tmp/$TMPNAME/collect_index"
49 cat "/tmp/$TMPNAME/collect_bodies"
51 ) >/tmp
/`echo $TMPNAME|sed "s;:;_;g"`.html