security/nss/cmd/signtool/javascript.c

   1 /* ***** BEGIN LICENSE BLOCK *****
   2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   3  *
   4  * The contents of this file are subject to the Mozilla Public License Version
   5  * 1.1 (the "License"); you may not use this file except in compliance with
   6  * the License. You may obtain a copy of the License at
   7  * http://www.mozilla.org/MPL/
   8  *
   9  * Software distributed under the License is distributed on an "AS IS" basis,
  10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  11  * for the specific language governing rights and limitations under the
  12  * License.
  13  *
  14  * The Original Code is the Netscape security libraries.
  15  *
  16  * The Initial Developer of the Original Code is
  17  * Netscape Communications Corporation.
  18  * Portions created by the Initial Developer are Copyright (C) 1994-2000
  19  * the Initial Developer. All Rights Reserved.
  20  *
  21  * Contributor(s):
  22  *
  23  * Alternatively, the contents of this file may be used under the terms of
  24  * either the GNU General Public License Version 2 or later (the "GPL"), or
  25  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  26  * in which case the provisions of the GPL or the LGPL are applicable instead
  27  * of those above. If you wish to allow use of your version of this file only
  28  * under the terms of either the GPL or the LGPL, and not to allow others to
  29  * use your version of this file under the terms of the MPL, indicate your
  30  * decision by deleting the provisions above and replace them with the notice
  31  * and other provisions required by the GPL or the LGPL. If you do not delete
  32  * the provisions above, a recipient may use your version of this file under
  33  * the terms of any one of the MPL, the GPL or the LGPL.
  34  *
  35  * ***** END LICENSE BLOCK ***** */
  36
  37 #include "signtool.h"
  38 #include <prmem.h>
  39 #include <prio.h>
  40 #include <prenv.h>
  41
  42 static int      javascript_fn(char *relpath, char *basedir, char *reldir,
  43 char *filename, void *arg);
  44 static int      extract_js (char *filename);
  45 static int      copyinto (char *from, char *to);
  46 static PRStatus ensureExists (char *base, char *path);
  47 static int      make_dirs(char *path, PRInt32 file_perms);
  48
  49 static char     *jartree = NULL;
  50 static int      idOrdinal;
  51 static PRBool dumpParse = PR_FALSE;
  52
  53 static char     *event_handlers[] = {
  54     "onAbort",
  55     "onBlur",
  56     "onChange",
  57     "onClick",
  58     "onDblClick",
  59     "onDragDrop",
  60     "onError",
  61     "onFocus",
  62     "onKeyDown",
  63     "onKeyPress",
  64     "onKeyUp",
  65     "onLoad",
  66     "onMouseDown",
  67     "onMouseMove",
  68     "onMouseOut",
  69     "onMouseOver",
  70     "onMouseUp",
  71     "onMove",
  72     "onReset",
  73     "onResize",
  74     "onSelect",
  75     "onSubmit",
  76     "onUnload"
  77 };
  78
  79
  80 static int      num_handlers = 23;
  81
  82 /*
  83  *  I n l i n e J a v a S c r i p t
  84  *
  85  *  Javascript signing. Instead of passing an archive to signtool,
  86  *  a directory containing html files is given. Archives are created
  87  *  from the archive= and src= tag attributes inside the html,
  88  *  as appropriate. Then the archives are signed.
  89  *
  90  */
  91 int
  92 InlineJavaScript(char *dir, PRBool recurse)
  93 {
  94     jartree = dir;
  95     if (verbosity >= 0) {
  96         PR_fprintf(outputFD, "\nGenerating inline signatures from HTML files in: %s\n",
  97              dir);
  98     }
  99     if (PR_GetEnv("SIGNTOOL_DUMP_PARSE")) {
 100         dumpParse = PR_TRUE;
 101     }
 102
 103     return foreach(dir, "", javascript_fn, recurse, PR_FALSE /*include dirs*/,
 104                         (void * )NULL);
 105
 106 }
 107
 108
 109 /************************************************************************
 110  *
 111  * j a v a s c r i p t _ f n
 112  */
 113 static int      javascript_fn
 114 (char *relpath, char *basedir, char *reldir, char *filename, void *arg)
 115 {
 116     char        fullname [FNSIZE];
 117
 118     /* only process inline scripts from .htm, .html, and .shtml*/
 119
 120     if (!(PL_strcaserstr(filename, ".htm") == filename + strlen(filename) -
 121         4) &&
 122         !(PL_strcaserstr(filename, ".html") == filename + strlen(filename) -
 123         5) &&
 124         !(PL_strcaserstr(filename, ".shtml") == filename + strlen(filename)
 125         -6)) {
 126         return 0;
 127     }
 128
 129     /* don't process scripts that signtool has already
 130      extracted (those that are inside .arc directories) */
 131
 132     if (PL_strcaserstr(filename, ".arc") == filename + strlen(filename) - 4)
 133         return 0;
 134
 135     if (verbosity >= 0) {
 136         PR_fprintf(outputFD, "Processing HTML file: %s\n", relpath);
 137     }
 138
 139     /* reset firstArchive at top of each HTML file */
 140
 141     /* skip directories that contain extracted scripts */
 142
 143     if (PL_strcaserstr(reldir, ".arc") == reldir + strlen(reldir) - 4)
 144         return 0;
 145
 146     sprintf (fullname, "%s/%s", basedir, relpath);
 147     return extract_js (fullname);
 148 }
 149
 150
 151 /*===========================================================================
 152  =
 153  = D A T A   S T R U C T U R E S
 154  =
 155 */
 156 typedef enum {
 157     TEXT_HTML_STATE = 0,
 158     SCRIPT_HTML_STATE
 159 }
 160
 161
 162 HTML_STATE ;
 163
 164 typedef enum {
 165     /* we start in the start state */
 166     START_STATE,
 167
 168     /* We are looking for or reading in an attribute */
 169     GET_ATT_STATE,
 170
 171     /* We're burning ws before finding an attribute */
 172     PRE_ATT_WS_STATE,
 173
 174     /* We're burning ws after an attribute.  Looking for an '='. */
 175     POST_ATT_WS_STATE,
 176
 177     /* We're burning ws after an '=', waiting for a value */
 178     PRE_VAL_WS_STATE,
 179
 180     /* We're reading in a value */
 181     GET_VALUE_STATE,
 182
 183     /* We're reading in a value that's inside quotes */
 184     GET_QUOTED_VAL_STATE,
 185
 186     /* We've encountered the closing '>' */
 187     DONE_STATE,
 188
 189     /* Error state */
 190     ERR_STATE
 191 }
 192
 193
 194 TAG_STATE ;
 195
 196 typedef struct AVPair_Str {
 197     char        *attribute;
 198     char        *value;
 199     unsigned int        valueLine; /* the line that the value ends on */
 200     struct AVPair_Str *next;
 201 } AVPair;
 202
 203 typedef enum {
 204     APPLET_TAG,
 205     SCRIPT_TAG,
 206     LINK_TAG,
 207     STYLE_TAG,
 208     COMMENT_TAG,
 209     OTHER_TAG
 210 }
 211
 212
 213 TAG_TYPE ;
 214
 215 typedef struct {
 216     TAG_TYPE type;
 217     AVPair * attList;
 218     AVPair * attListTail;
 219     char        *text;
 220 } TagItem;
 221
 222 typedef enum {
 223     TAG_ITEM,
 224     TEXT_ITEM
 225 }
 226
 227
 228 ITEM_TYPE ;
 229
 230 typedef struct HTMLItem_Str {
 231     unsigned int        startLine;
 232     unsigned int        endLine;
 233     ITEM_TYPE type;
 234     union {
 235         TagItem *tag;
 236         char    *text;
 237     } item;
 238     struct HTMLItem_Str *next;
 239 } HTMLItem;
 240
 241 typedef struct {
 242     PRFileDesc *fd;
 243     PRInt32 curIndex;
 244     PRBool IsEOF;
 245 #define FILE_BUFFER_BUFSIZE 512
 246     char        buf[FILE_BUFFER_BUFSIZE];
 247     PRInt32 startOffset;
 248     PRInt32 maxIndex;
 249     unsigned int        lineNum;
 250 } FileBuffer;
 251
 252 /*===========================================================================
 253  =
 254  = F U N C T I O N S
 255  =
 256 */
 257 static HTMLItem*CreateTextItem(char *text, unsigned int startline,
 258 unsigned int endline);
 259 static HTMLItem*CreateTagItem(TagItem*ti, unsigned int startline,
 260 unsigned int endline);
 261 static TagItem*ProcessTag(FileBuffer*fb, char **errStr);
 262 static void     DestroyHTMLItem(HTMLItem *item);
 263 static void     DestroyTagItem(TagItem*ti);
 264 static TAG_TYPE GetTagType(char *att);
 265 static FileBuffer*FB_Create(PRFileDesc*fd);
 266 static int      FB_GetChar(FileBuffer *fb);
 267 static PRInt32 FB_GetPointer(FileBuffer *fb);
 268 static PRInt32 FB_GetRange(FileBuffer *fb, PRInt32 start, PRInt32 end,
 269 char **buf);
 270 static unsigned int     FB_GetLineNum(FileBuffer *fb);
 271 static void     FB_Destroy(FileBuffer *fb);
 272 static void     PrintTagItem(PRFileDesc *fd, TagItem *ti);
 273 static void     PrintHTMLStream(PRFileDesc *fd, HTMLItem *head);
 274
 275 /************************************************************************
 276  *
 277  * C r e a t e T e x t I t e m
 278  */
 279 static HTMLItem*
 280 CreateTextItem(char *text, unsigned int startline, unsigned int endline)
 281 {
 282     HTMLItem * item;
 283
 284     item = PR_Malloc(sizeof(HTMLItem));
 285     if (!item) {
 286         return NULL;
 287     }
 288
 289     item->type = TEXT_ITEM;
 290     item->item.text = text;
 291     item->next = NULL;
 292     item->startLine = startline;
 293     item->endLine = endline;
 294
 295     return item;
 296 }
 297
 298
 299 /************************************************************************
 300  *
 301  * C r e a t e T a g I t e m
 302  */
 303 static HTMLItem*
 304 CreateTagItem(TagItem*ti, unsigned int startline, unsigned int endline)
 305 {
 306     HTMLItem * item;
 307
 308     item = PR_Malloc(sizeof(HTMLItem));
 309     if (!item) {
 310         return NULL;
 311     }
 312
 313     item->type = TAG_ITEM;
 314     item->item.tag = ti;
 315     item->next = NULL;
 316     item->startLine = startline;
 317     item->endLine = endline;
 318
 319     return item;
 320 }
 321
 322
 323 static PRBool
 324 isAttChar(int c)
 325 {
 326     return (isalnum(c) || c == '/' || c == '-');
 327 }
 328
 329
 330 /************************************************************************
 331  *
 332  * P r o c e s s T a g
 333  */
 334 static TagItem*
 335 ProcessTag(FileBuffer*fb, char **errStr)
 336 {
 337     TAG_STATE state;
 338     PRInt32 startText, startID, curPos;
 339     PRBool firstAtt;
 340     int curchar;
 341     TagItem * ti = NULL;
 342     AVPair * curPair = NULL;
 343     char        quotechar = '\0';
 344     unsigned int        linenum;
 345     unsigned int        startline;
 346
 347     state = START_STATE;
 348
 349     startID = FB_GetPointer(fb);
 350     startText = startID;
 351     firstAtt = PR_TRUE;
 352
 353     ti = (TagItem * ) PR_Malloc(sizeof(TagItem));
 354     if (!ti)
 355         out_of_memory();
 356     ti->type = OTHER_TAG;
 357     ti->attList = NULL;
 358     ti->attListTail = NULL;
 359     ti->text = NULL;
 360
 361     startline = FB_GetLineNum(fb);
 362
 363     while (state != DONE_STATE && state != ERR_STATE) {
 364         linenum = FB_GetLineNum(fb);
 365         curchar = FB_GetChar(fb);
 366         if (curchar == EOF) {
 367             *errStr = PR_smprintf(
 368                 "line %d: Unexpected end-of-file while parsing tag starting at line %d.\n",
 369                  linenum, startline);
 370             state = ERR_STATE;
 371             continue;
 372         }
 373
 374         switch (state) {
 375         case START_STATE:
 376             if (curchar == '!') {
 377                 /*
 378                  * SGML tag or comment
 379                  * Here's the general rule for SGML tags.  Everything from
 380                  * <! to > is the tag.  Inside the tag, comments are
 381                  * delimited with --.  So we are looking for the first '>'
 382                  * that is not commented out, that is, not inside a pair
 383                  * of --: <!DOCTYPE --this is a comment >(psyche!)   -->
 384                  */
 385
 386                 PRBool inComment = PR_FALSE;
 387                 short   hyphenCount = 0; /* number of consecutive hyphens */
 388
 389                 while (1) {
 390                     linenum = FB_GetLineNum(fb);
 391                     curchar = FB_GetChar(fb);
 392                     if (curchar == EOF) {
 393                         /* Uh oh, EOF inside comment */
 394                         *errStr = PR_smprintf(
 395     "line %d: Unexpected end-of-file inside comment starting at line %d.\n",
 396                                                 linenum, startline);
 397                         state = ERR_STATE;
 398                         break;
 399                     }
 400                     if (curchar == '-') {
 401                         if (hyphenCount == 1) {
 402                             /* This is a comment delimiter */
 403                             inComment = !inComment;
 404                             hyphenCount = 0;
 405                         } else {
 406                             /* beginning of a comment delimiter? */
 407                             hyphenCount = 1;
 408                         }
 409                     } else if (curchar == '>') {
 410                         if (!inComment) {
 411                             /* This is the end of the tag */
 412                             state = DONE_STATE;
 413                             break;
 414                         } else {
 415                             /* The > is inside a comment, so it's not
 416                                                          * really the end of the tag */
 417                             hyphenCount = 0;
 418                         }
 419                     } else {
 420                         hyphenCount = 0;
 421                     }
 422                 }
 423                 ti->type = COMMENT_TAG;
 424                 break;
 425             }
 426             /* fall through */
 427         case GET_ATT_STATE:
 428             if (isspace(curchar) || curchar == '=' || curchar
 429                 == '>') {
 430                 /* end of the current attribute */
 431                 curPos = FB_GetPointer(fb) - 2;
 432                 if (curPos >= startID) {
 433                     /* We have an attribute */
 434                     curPair = (AVPair * )PR_Malloc(sizeof(AVPair));
 435                     if (!curPair)
 436                         out_of_memory();
 437                     curPair->value = NULL;
 438                     curPair->next = NULL;
 439                     FB_GetRange(fb, startID, curPos,
 440                         &curPair->attribute);
 441
 442                     /* Stick this attribute on the list */
 443                     if (ti->attListTail) {
 444                         ti->attListTail->next = curPair;
 445                         ti->attListTail = curPair;
 446                     } else {
 447                         ti->attList = ti->attListTail =
 448                             curPair;
 449                     }
 450
 451                     /* If this is the first attribute, find the type of tag
 452                      * based on it. Also, start saving the text of the tag. */
 453                     if (firstAtt) {
 454                         ti->type = GetTagType(curPair->attribute);
 455                         startText = FB_GetPointer(fb)
 456                             -1;
 457                         firstAtt = PR_FALSE;
 458                     }
 459                 } else {
 460                     if (curchar == '=') {
 461                         /* If we don't have any attribute but we do have an
 462                          * equal sign, that's an error */
 463                         *errStr = PR_smprintf("line %d: Malformed tag starting at line %d.\n",
 464                              linenum, startline);
 465                         state = ERR_STATE;
 466                         break;
 467                     }
 468                 }
 469
 470                 /* Compute next state */
 471                 if (curchar == '=') {
 472                     startID = FB_GetPointer(fb);
 473                     state = PRE_VAL_WS_STATE;
 474                 } else if (curchar == '>') {
 475                     state = DONE_STATE;
 476                 } else if (curPair) {
 477                     state = POST_ATT_WS_STATE;
 478                 } else {
 479                     state = PRE_ATT_WS_STATE;
 480                 }
 481             } else if (isAttChar(curchar)) {
 482                 /* Just another char in the attribute. Do nothing */
 483                 state = GET_ATT_STATE;
 484             } else {
 485                 /* bogus char */
 486                 *errStr = PR_smprintf("line %d: Bogus chararacter '%c' in tag.\n",
 487                                         linenum, curchar);
 488                 state = ERR_STATE;
 489                 break;
 490             }
 491             break;
 492         case PRE_ATT_WS_STATE:
 493             if (curchar == '>') {
 494                 state = DONE_STATE;
 495             } else if (isspace(curchar)) {
 496                 /* more whitespace, do nothing */
 497             } else if (isAttChar(curchar)) {
 498                 /* starting another attribute */
 499                 startID = FB_GetPointer(fb) - 1;
 500                 state = GET_ATT_STATE;
 501             } else {
 502                 /* bogus char */
 503                 *errStr = PR_smprintf("line %d: Bogus character '%c' in tag.\n",
 504                                         linenum, curchar);
 505                 state = ERR_STATE;
 506                 break;
 507             }
 508             break;
 509         case POST_ATT_WS_STATE:
 510             if (curchar == '>') {
 511                 state = DONE_STATE;
 512             } else if (isspace(curchar)) {
 513                 /* more whitespace, do nothing */
 514             } else if (isAttChar(curchar)) {
 515                 /* starting another attribute */
 516                 startID = FB_GetPointer(fb) - 1;
 517                 state = GET_ATT_STATE;
 518             } else if (curchar == '=') {
 519                 /* there was whitespace between the attribute and its equal
 520                  * sign, which means there's a value coming up */
 521                 state = PRE_VAL_WS_STATE;
 522             } else {
 523                 /* bogus char */
 524                 *errStr = PR_smprintf("line %d: Bogus character '%c' in tag.\n",
 525                                                         linenum, curchar);
 526                 state = ERR_STATE;
 527                 break;
 528             }
 529             break;
 530         case PRE_VAL_WS_STATE:
 531             if (curchar == '>') {
 532                 /* premature end-of-tag (sounds like a personal problem). */
 533                 *errStr = PR_smprintf(
 534                     "line %d: End of tag while waiting for value.\n",
 535                      linenum);
 536                 state = ERR_STATE;
 537                 break;
 538             } else if (isspace(curchar)) {
 539                 /* more whitespace, do nothing */
 540                 break;
 541             } else {
 542                 /* this must be some sort of value. Fall through
 543                                  * to GET_VALUE_STATE */
 544                 startID = FB_GetPointer(fb) - 1;
 545                 state = GET_VALUE_STATE;
 546             }
 547             /* Fall through if we didn't break on '>' or whitespace */
 548         case GET_VALUE_STATE:
 549             if (isspace(curchar) || curchar == '>') {
 550                 /* end of value */
 551                 curPos = FB_GetPointer(fb) - 2;
 552                 if (curPos >= startID) {
 553                     /* Grab the value */
 554                     FB_GetRange(fb, startID, curPos,
 555                         &curPair->value);
 556                     curPair->valueLine = linenum;
 557                 } else {
 558                     /* empty value, leave as NULL */
 559                 }
 560                 if (isspace(curchar)) {
 561                     state = PRE_ATT_WS_STATE;
 562                 } else {
 563                     state = DONE_STATE;
 564                 }
 565             } else if (curchar == '\"' || curchar == '\'') {
 566                 /* quoted value.  Start recording the value inside the quote*/
 567                 startID = FB_GetPointer(fb);
 568                 state = GET_QUOTED_VAL_STATE;
 569                 PORT_Assert(quotechar == '\0');
 570                 quotechar = curchar; /* look for matching quote type */
 571             } else {
 572                 /* just more value */
 573             }
 574             break;
 575         case GET_QUOTED_VAL_STATE:
 576             PORT_Assert(quotechar != '\0');
 577             if (curchar == quotechar) {
 578                 /* end of quoted value */
 579                 curPos = FB_GetPointer(fb) - 2;
 580                 if (curPos >= startID) {
 581                     /* Grab the value */
 582                     FB_GetRange(fb, startID, curPos,
 583                         &curPair->value);
 584                     curPair->valueLine = linenum;
 585                 } else {
 586                     /* empty value, leave it as NULL */
 587                 }
 588                 state = GET_ATT_STATE;
 589                 quotechar = '\0';
 590                 startID = FB_GetPointer(fb);
 591             } else {
 592                 /* more quoted value, continue */
 593             }
 594             break;
 595         case DONE_STATE:
 596         case ERR_STATE:
 597         default:
 598             ; /* should never get here */
 599         }
 600     }
 601
 602     if (state == DONE_STATE) {
 603         /* Get the text of the tag */
 604         curPos = FB_GetPointer(fb) - 1;
 605         FB_GetRange(fb, startText, curPos, &ti->text);
 606
 607         /* Return the tag */
 608         return ti;
 609     }
 610
 611     /* Uh oh, an error.  Kill the tag item*/
 612     DestroyTagItem(ti);
 613     return NULL;
 614 }
 615
 616
 617 /************************************************************************
 618  *
 619  * D e s t r o y H T M L I t e m
 620  */
 621 static void
 622 DestroyHTMLItem(HTMLItem *item)
 623 {
 624     if (item->type == TAG_ITEM) {
 625         DestroyTagItem(item->item.tag);
 626     } else {
 627         if (item->item.text) {
 628             PR_Free(item->item.text);
 629         }
 630     }
 631 }
 632
 633
 634 /************************************************************************
 635  *
 636  * D e s t r o y T a g I t e m
 637  */
 638 static void
 639 DestroyTagItem(TagItem*ti)
 640 {
 641     AVPair * temp;
 642
 643     if (ti->text) {
 644         PR_Free(ti->text);
 645         ti->text = NULL;
 646     }
 647
 648     while (ti->attList) {
 649         temp = ti->attList;
 650         ti->attList = ti->attList->next;
 651
 652         if (temp->attribute) {
 653             PR_Free(temp->attribute);
 654             temp->attribute = NULL;
 655         }
 656         if (temp->value) {
 657             PR_Free(temp->value);
 658             temp->value = NULL;
 659         }
 660         PR_Free(temp);
 661     }
 662
 663     PR_Free(ti);
 664 }
 665
 666
 667 /************************************************************************
 668  *
 669  * G e t T a g T y p e
 670  */
 671 static TAG_TYPE
 672 GetTagType(char *att)
 673 {
 674     if (!PORT_Strcasecmp(att, "APPLET")) {
 675         return APPLET_TAG;
 676     }
 677     if (!PORT_Strcasecmp(att, "SCRIPT")) {
 678         return SCRIPT_TAG;
 679     }
 680     if (!PORT_Strcasecmp(att, "LINK")) {
 681         return LINK_TAG;
 682     }
 683     if (!PORT_Strcasecmp(att, "STYLE")) {
 684         return STYLE_TAG;
 685     }
 686     return OTHER_TAG;
 687 }
 688
 689
 690 /************************************************************************
 691  *
 692  * F B _ C r e a t e
 693  */
 694 static FileBuffer*
 695 FB_Create(PRFileDesc*fd)
 696 {
 697     FileBuffer * fb;
 698     PRInt32 amountRead;
 699     PRInt32 storedOffset;
 700
 701     fb = (FileBuffer * ) PR_Malloc(sizeof(FileBuffer));
 702     fb->fd = fd;
 703     storedOffset = PR_Seek(fd, 0, PR_SEEK_CUR);
 704     PR_Seek(fd, 0, PR_SEEK_SET);
 705     fb->startOffset = 0;
 706     amountRead = PR_Read(fd, fb->buf, FILE_BUFFER_BUFSIZE);
 707     if (amountRead == -1)
 708         goto loser;
 709     fb->maxIndex = amountRead - 1;
 710     fb->curIndex = 0;
 711     fb->IsEOF = (fb->curIndex > fb->maxIndex) ? PR_TRUE : PR_FALSE;
 712     fb->lineNum = 1;
 713
 714     PR_Seek(fd, storedOffset, PR_SEEK_SET);
 715     return fb;
 716 loser:
 717     PR_Seek(fd, storedOffset, PR_SEEK_SET);
 718     PR_Free(fb);
 719     return NULL;
 720 }
 721
 722
 723 /************************************************************************
 724  *
 725  * F B _ G e t C h a r
 726  */
 727 static int
 728 FB_GetChar(FileBuffer *fb)
 729 {
 730     PRInt32 storedOffset;
 731     PRInt32 amountRead;
 732     int retval = -1;
 733
 734     if (fb->IsEOF) {
 735         return EOF;
 736     }
 737
 738     storedOffset = PR_Seek(fb->fd, 0, PR_SEEK_CUR);
 739
 740     retval = (unsigned char) fb->buf[fb->curIndex++];
 741     if (retval == '\n')
 742         fb->lineNum++;
 743
 744     if (fb->curIndex > fb->maxIndex) {
 745         /* We're at the end of the buffer. Try to get some new data from the
 746                  * file */
 747         fb->startOffset += fb->maxIndex + 1;
 748         PR_Seek(fb->fd, fb->startOffset, PR_SEEK_SET);
 749         amountRead = PR_Read(fb->fd, fb->buf, FILE_BUFFER_BUFSIZE);
 750         if (amountRead == -1)
 751             goto loser;
 752         fb->maxIndex = amountRead - 1;
 753         fb->curIndex = 0;
 754     }
 755
 756     fb->IsEOF = (fb->curIndex > fb->maxIndex) ? PR_TRUE : PR_FALSE;
 757
 758 loser:
 759     PR_Seek(fb->fd, storedOffset, PR_SEEK_SET);
 760     return retval;
 761 }
 762
 763
 764 /************************************************************************
 765  *
 766  * F B _ G e t L i n e N u m
 767  *
 768  */
 769 static unsigned int
 770 FB_GetLineNum(FileBuffer *fb)
 771 {
 772     return fb->lineNum;
 773 }
 774
 775
 776 /************************************************************************
 777  *
 778  * F B _ G e t P o i n t e r
 779  *
 780  */
 781 static PRInt32
 782 FB_GetPointer(FileBuffer *fb)
 783 {
 784     return fb->startOffset + fb->curIndex;
 785 }
 786
 787
 788 /************************************************************************
 789  *
 790  * F B _ G e t R a n g e
 791  *
 792  */
 793 static PRInt32
 794 FB_GetRange(FileBuffer *fb, PRInt32 start, PRInt32 end, char **buf)
 795 {
 796     PRInt32 amountRead;
 797     PRInt32 storedOffset;
 798
 799     *buf = PR_Malloc(end - start + 2);
 800     if (*buf == NULL) {
 801         return 0;
 802     }
 803
 804     storedOffset = PR_Seek(fb->fd, 0, PR_SEEK_CUR);
 805     PR_Seek(fb->fd, start, PR_SEEK_SET);
 806     amountRead = PR_Read(fb->fd, *buf, end - start + 1);
 807     PR_Seek(fb->fd, storedOffset, PR_SEEK_SET);
 808     if (amountRead == -1) {
 809         PR_Free(*buf);
 810         *buf = NULL;
 811         return 0;
 812     }
 813
 814     (*buf)[end-start+1] = '\0';
 815     return amountRead;
 816 }
 817
 818
 819 /************************************************************************
 820  *
 821  * F B _ D e s t r o y
 822  *
 823  */
 824 static void
 825 FB_Destroy(FileBuffer *fb)
 826 {
 827     if (fb) {
 828         PR_Free(fb);
 829     }
 830 }
 831
 832
 833 /************************************************************************
 834  *
 835  * P r i n t T a g I t e m
 836  *
 837  */
 838 static void
 839 PrintTagItem(PRFileDesc *fd, TagItem *ti)
 840 {
 841     AVPair * pair;
 842
 843     PR_fprintf(fd, "TAG:\n----\nType: ");
 844     switch (ti->type) {
 845     case APPLET_TAG:
 846         PR_fprintf(fd, "applet\n");
 847         break;
 848     case SCRIPT_TAG:
 849         PR_fprintf(fd, "script\n");
 850         break;
 851     case LINK_TAG:
 852         PR_fprintf(fd, "link\n");
 853         break;
 854     case STYLE_TAG:
 855         PR_fprintf(fd, "style\n");
 856         break;
 857     case COMMENT_TAG:
 858         PR_fprintf(fd, "comment\n");
 859         break;
 860     case OTHER_TAG:
 861     default:
 862         PR_fprintf(fd, "other\n");
 863         break;
 864     }
 865
 866     PR_fprintf(fd, "Attributes:\n");
 867     for (pair = ti->attList; pair; pair = pair->next) {
 868         PR_fprintf(fd, "\t%s=%s\n", pair->attribute,
 869             pair->value ? pair->value : "");
 870     }
 871     PR_fprintf(fd, "Text:%s\n", ti->text ? ti->text : "");
 872
 873     PR_fprintf(fd, "---End of tag---\n");
 874 }
 875
 876
 877 /************************************************************************
 878  *
 879  * P r i n t H T M L S t r e a m
 880  *
 881  */
 882 static void
 883 PrintHTMLStream(PRFileDesc *fd, HTMLItem *head)
 884 {
 885     while (head) {
 886         if (head->type == TAG_ITEM) {
 887             PrintTagItem(fd, head->item.tag);
 888         } else {
 889             PR_fprintf(fd, "\nTEXT:\n-----\n%s\n-----\n\n", head->item.text);
 890         }
 891         head = head->next;
 892     }
 893 }
 894
 895
 896 /************************************************************************
 897  *
 898  * S a v e I n l i n e S c r i p t
 899  *
 900  */
 901 static int
 902 SaveInlineScript(char *text, char *id, char *basedir, char *archiveDir)
 903 {
 904     char        *filename = NULL;
 905     PRFileDesc * fd = NULL;
 906     int retval = -1;
 907     PRInt32 writeLen;
 908     char        *ilDir = NULL;
 909
 910     if (!text || !id || !archiveDir) {
 911         return - 1;
 912     }
 913
 914     if (dumpParse) {
 915         PR_fprintf(outputFD, "SaveInlineScript: text=%s, id=%s, \n"
 916             "basedir=%s, archiveDir=%s\n",
 917             text, id, basedir, archiveDir);
 918     }
 919
 920     /* Make sure the archive directory is around */
 921     if (ensureExists(basedir, archiveDir) != PR_SUCCESS) {
 922         PR_fprintf(errorFD,
 923             "ERROR: Unable to create archive directory %s.\n", archiveDir);
 924         errorCount++;
 925         return - 1;
 926     }
 927
 928     /* Make sure the inline script directory is around */
 929     ilDir = PR_smprintf("%s/inlineScripts", archiveDir);
 930     scriptdir = "inlineScripts";
 931     if (ensureExists(basedir, ilDir) != PR_SUCCESS) {
 932         PR_fprintf(errorFD,
 933             "ERROR: Unable to create directory %s.\n", ilDir);
 934         errorCount++;
 935         return - 1;
 936     }
 937
 938     filename = PR_smprintf("%s/%s/%s", basedir, ilDir, id);
 939
 940     /* If the file already exists, give a warning, then blow it away */
 941     if (PR_Access(filename, PR_ACCESS_EXISTS) == PR_SUCCESS) {
 942         PR_fprintf(errorFD,
 943             "warning: file \"%s\" already exists--will overwrite.\n",
 944                                 filename);
 945         warningCount++;
 946         if (rm_dash_r(filename)) {
 947             PR_fprintf(errorFD, "ERROR: Unable to delete %s.\n", filename);
 948             errorCount++;
 949             goto finish;
 950         }
 951     }
 952
 953     /* Write text into file with name id */
 954     fd = PR_Open(filename, PR_WRONLY | PR_CREATE_FILE | PR_TRUNCATE, 0777);
 955     if (!fd) {
 956         PR_fprintf(errorFD, "ERROR: Unable to create file \"%s\".\n",
 957                                 filename);
 958         errorCount++;
 959         goto finish;
 960     }
 961     writeLen = strlen(text);
 962     if ( PR_Write(fd, text, writeLen) != writeLen) {
 963         PR_fprintf(errorFD, "ERROR: Unable to write to file \"%s\".\n",
 964                                 filename);
 965         errorCount++;
 966         goto finish;
 967     }
 968
 969     retval = 0;
 970 finish:
 971     if (filename) {
 972         PR_smprintf_free(filename);
 973     }
 974     if (ilDir) {
 975         PR_smprintf_free(ilDir);
 976     }
 977     if (fd) {
 978         PR_Close(fd);
 979     }
 980     return retval;
 981 }
 982
 983
 984 /************************************************************************
 985  *
 986  * S a v e U n n a m a b l e S c r i p t
 987  *
 988  */
 989 static int
 990 SaveUnnamableScript(char *text, char *basedir, char *archiveDir,
 991 char *HTMLfilename)
 992 {
 993     char        *id = NULL;
 994     char        *ext = NULL;
 995     char        *start = NULL;
 996     int retval = -1;
 997
 998     if (!text || !archiveDir || !HTMLfilename) {
 999         return - 1;
1000     }
1001
1002     if (dumpParse) {
1003         PR_fprintf(outputFD, "SaveUnnamableScript: text=%s, basedir=%s,\n"
1004             "archiveDir=%s, filename=%s\n", text, basedir, archiveDir,
1005                                 HTMLfilename);
1006     }
1007
1008     /* Construct the filename */
1009     ext = PL_strrchr(HTMLfilename, '.');
1010     if (ext) {
1011         *ext = '\0';
1012     }
1013     for (start = HTMLfilename; strpbrk(start, "/\\");
1014          start = strpbrk(start, "/\\") + 1)
1015         /* do nothing */;
1016     if (*start == '\0')
1017         start = HTMLfilename;
1018     id = PR_smprintf("_%s%d", start, idOrdinal++);
1019     if (ext) {
1020         *ext = '.';
1021     }
1022
1023     /* Now call SaveInlineScript to do the work */
1024     retval = SaveInlineScript(text, id, basedir, archiveDir);
1025
1026     PR_Free(id);
1027
1028     return retval;
1029 }
1030
1031
1032 /************************************************************************
1033  *
1034  * S a v e S o u r c e
1035  *
1036  */
1037 static int
1038 SaveSource(char *src, char *codebase, char *basedir, char *archiveDir)
1039 {
1040     char        *from = NULL, *to = NULL;
1041     int retval = -1;
1042     char        *arcDir = NULL;
1043
1044     if (!src || !archiveDir) {
1045         return - 1;
1046     }
1047
1048     if (dumpParse) {
1049         PR_fprintf(outputFD, "SaveSource: src=%s, codebase=%s, basedir=%s,\n"
1050             "archiveDir=%s\n", src, codebase, basedir, archiveDir);
1051     }
1052
1053     if (codebase) {
1054         arcDir = PR_smprintf("%s/%s/%s/", basedir, codebase, archiveDir);
1055     } else {
1056         arcDir = PR_smprintf("%s/%s/", basedir, archiveDir);
1057     }
1058
1059     if (codebase) {
1060         from = PR_smprintf("%s/%s/%s", basedir, codebase, src);
1061         to = PR_smprintf("%s%s", arcDir, src);
1062     } else {
1063         from = PR_smprintf("%s/%s", basedir, src);
1064         to = PR_smprintf("%s%s", arcDir, src);
1065     }
1066
1067     if (make_dirs(to, 0777)) {
1068         PR_fprintf(errorFD,
1069             "ERROR: Unable to create archive directory %s.\n", archiveDir);
1070         errorCount++;
1071         goto finish;
1072     }
1073
1074     retval = copyinto(from, to);
1075 finish:
1076     if (from)
1077         PR_Free(from);
1078     if (to)
1079         PR_Free(to);
1080     if (arcDir)
1081         PR_Free(arcDir);
1082     return retval;
1083 }
1084
1085
1086 /************************************************************************
1087  *
1088  * T a g T y p e T o S t r i n g
1089  *
1090  */
1091 char    *
1092 TagTypeToString(TAG_TYPE type)
1093 {
1094     switch (type) {
1095     case APPLET_TAG:
1096         return "APPLET";
1097     case SCRIPT_TAG:
1098         return "SCRIPT";
1099     case LINK_TAG:
1100         return "LINK";
1101     case STYLE_TAG:
1102         return "STYLE";
1103     default:
1104         break;
1105     }
1106     return "unknown";
1107 }
1108
1109
1110 /************************************************************************
1111  *
1112  * e x t r a c t _ j s
1113  *
1114  */
1115 static int
1116 extract_js(char *filename)
1117 {
1118     PRFileDesc * fd = NULL;
1119     FileBuffer * fb = NULL;
1120     HTMLItem * head = NULL;
1121     HTMLItem * tail = NULL;
1122     HTMLItem * curitem = NULL;
1123     HTMLItem * styleList        = NULL;
1124     HTMLItem * styleListTail    = NULL;
1125     HTMLItem * entityList       = NULL;
1126     HTMLItem * entityListTail   = NULL;
1127     TagItem * tagp = NULL;
1128     char        *text = NULL;
1129     char        *tagerr = NULL;
1130     char        *archiveDir = NULL;
1131     char        *firstArchiveDir = NULL;
1132     char        *basedir = NULL;
1133     PRInt32    textStart;
1134     PRInt32    curOffset;
1135     HTML_STATE state;
1136     int        curchar;
1137     int        retval = -1;
1138     unsigned int linenum, startLine;
1139
1140     /* Initialize the implicit ID counter for each file */
1141     idOrdinal = 0;
1142
1143     /*
1144      * First, parse the HTML into a stream of tags and text.
1145      */
1146
1147     fd = PR_Open(filename, PR_RDONLY, 0);
1148     if (!fd) {
1149         PR_fprintf(errorFD, "Unable to open %s for reading.\n", filename);
1150         errorCount++;
1151         return - 1;
1152     }
1153
1154     /* Construct base directory of filename. */
1155      {
1156         char    *cp;
1157
1158         basedir = PL_strdup(filename);
1159
1160         /* Remove trailing slashes */
1161         while ( (cp = PL_strprbrk(basedir, "/\\")) ==
1162             (basedir + strlen(basedir) - 1)) {
1163             *cp = '\0';
1164         }
1165
1166         /* Now remove everything from the last slash (which will be followed
1167          * by a filename) to the end */
1168         cp = PL_strprbrk(basedir, "/\\");
1169         if (cp) {
1170             *cp = '\0';
1171         }
1172     }
1173
1174     state = TEXT_HTML_STATE;
1175
1176     fb = FB_Create(fd);
1177
1178     textStart = 0;
1179     startLine = 0;
1180     while (linenum = FB_GetLineNum(fb), (curchar = FB_GetChar(fb)) !=
1181         EOF) {
1182         switch (state) {
1183         case TEXT_HTML_STATE:
1184             if (curchar == '<') {
1185                 /*
1186                  * Found a tag
1187                  */
1188                 /* Save the text so far to a new text item */
1189                 curOffset = FB_GetPointer(fb) - 2;
1190                 if (curOffset >= textStart) {
1191                     if (FB_GetRange(fb, textStart, curOffset,
1192                          &text) !=
1193                         curOffset - textStart + 1)  {
1194                         PR_fprintf(errorFD,
1195                             "Unable to read from %s.\n",
1196                              filename);
1197                         errorCount++;
1198                         goto loser;
1199                     }
1200                     /* little fudge here.  If the first character on a line
1201                      * is '<', meaning a new tag, the preceding text item
1202                      * actually ends on the previous line.  In this case
1203                      * we will be saying that the text segment ends on the
1204                      * next line. I don't think this matters for text items. */
1205                     curitem = CreateTextItem(text, startLine,
1206                          linenum);
1207                     text = NULL;
1208                     if (tail == NULL) {
1209                         head = tail = curitem;
1210                     } else {
1211                         tail->next = curitem;
1212                         tail = curitem;
1213                     }
1214                 }
1215
1216                 /* Process the tag */
1217                 tagp = ProcessTag(fb, &tagerr);
1218                 if (!tagp) {
1219                     if (tagerr) {
1220                         PR_fprintf(errorFD, "Error in file %s: %s\n",
1221                                                   filename, tagerr);
1222                         errorCount++;
1223                     } else {
1224                         PR_fprintf(errorFD,
1225                             "Error in file %s, in tag starting at line %d\n",
1226                                                   filename, linenum);
1227                         errorCount++;
1228                     }
1229                     goto loser;
1230                 }
1231                 /* Add the tag to the list */
1232                 curitem = CreateTagItem(tagp, linenum, FB_GetLineNum(fb));
1233                 if (tail == NULL) {
1234                     head = tail = curitem;
1235                 } else {
1236                     tail->next = curitem;
1237                     tail = curitem;
1238                 }
1239
1240                 /* What's the next state */
1241                 if (tagp->type == SCRIPT_TAG) {
1242                     state = SCRIPT_HTML_STATE;
1243                 }
1244
1245                 /* Start recording text from the new offset */
1246                 textStart = FB_GetPointer(fb);
1247                 startLine = FB_GetLineNum(fb);
1248             } else {
1249                 /* regular character.  Next! */
1250             }
1251             break;
1252         case SCRIPT_HTML_STATE:
1253             if (curchar == '<') {
1254                 char    *cp;
1255                 /*
1256                  * If this is a </script> tag, then we're at the end of the
1257                  * script.  Otherwise, ignore
1258                  */
1259                 curOffset = FB_GetPointer(fb) - 1;
1260                 cp = NULL;
1261                 if (FB_GetRange(fb, curOffset, curOffset + 8, &cp) != 9) {
1262                     if (cp) {
1263                         PR_Free(cp);
1264                         cp = NULL;
1265                     }
1266                 } else {
1267                     /* compare the strings */
1268                     if ( !PORT_Strncasecmp(cp, "</script>", 9) ) {
1269                         /* This is the end of the script. Record the text. */
1270                         curOffset--;
1271                         if (curOffset >= textStart) {
1272                             if (FB_GetRange(fb, textStart, curOffset, &text) !=
1273                                 curOffset - textStart + 1) {
1274                                 PR_fprintf(errorFD, "Unable to read from %s.\n",
1275                                      filename);
1276                                 errorCount++;
1277                                 goto loser;
1278                             }
1279                             curitem = CreateTextItem(text, startLine, linenum);
1280                             text = NULL;
1281                             if (tail == NULL) {
1282                                 head = tail = curitem;
1283                             } else {
1284                                 tail->next = curitem;
1285                                 tail = curitem;
1286                             }
1287                         }
1288
1289                         /* Now parse the /script tag and put it on the list */
1290                         tagp = ProcessTag(fb, &tagerr);
1291                         if (!tagp) {
1292                             if (tagerr) {
1293                                 PR_fprintf(errorFD, "Error in file %s: %s\n",
1294                                      filename, tagerr);
1295                             } else {
1296                                 PR_fprintf(errorFD,
1297                                     "Error in file %s, in tag starting at"
1298                                     " line %d\n", filename, linenum);
1299                             }
1300                             errorCount++;
1301                             goto loser;
1302                         }
1303                         curitem = CreateTagItem(tagp, linenum,
1304                                                 FB_GetLineNum(fb));
1305                         if (tail == NULL) {
1306                             head = tail = curitem;
1307                         } else {
1308                             tail->next = curitem;
1309                             tail = curitem;
1310                         }
1311
1312                         /* go back to text state */
1313                         state = TEXT_HTML_STATE;
1314
1315                         textStart = FB_GetPointer(fb);
1316                         startLine = FB_GetLineNum(fb);
1317                     }
1318                 }
1319             }
1320             break;
1321         }
1322     }
1323
1324     /* End of the file.  Wrap up any remaining text */
1325     if (state == SCRIPT_HTML_STATE) {
1326         if (tail && tail->type == TAG_ITEM) {
1327             PR_fprintf(errorFD, "ERROR: <SCRIPT> tag at %s:%d is not followed "
1328                 "by a </SCRIPT> tag.\n", filename, tail->startLine);
1329         } else {
1330             PR_fprintf(errorFD, "ERROR: <SCRIPT> tag in file %s is not followed"
1331                 " by a </SCRIPT tag.\n", filename);
1332         }
1333         errorCount++;
1334         goto loser;
1335     }
1336     curOffset = FB_GetPointer(fb) - 1;
1337     if (curOffset >= textStart) {
1338         text = NULL;
1339         if ( FB_GetRange(fb, textStart, curOffset, &text) !=
1340             curOffset - textStart + 1) {
1341             PR_fprintf(errorFD, "Unable to read from %s.\n", filename);
1342             errorCount++;
1343             goto loser;
1344         }
1345         curitem = CreateTextItem(text, startLine, linenum);
1346         text = NULL;
1347         if (tail == NULL) {
1348             head = tail = curitem;
1349         } else {
1350             tail->next = curitem;
1351             tail = curitem;
1352         }
1353     }
1354
1355     if (dumpParse) {
1356         PrintHTMLStream(outputFD, head);
1357     }
1358
1359     /*
1360      * Now we have a stream of tags and text.  Go through and deal with each.
1361      */
1362     for (curitem = head; curitem; curitem = curitem->next) {
1363         TagItem * tagp = NULL;
1364         AVPair * pairp = NULL;
1365         char    *src = NULL, *id = NULL, *codebase = NULL;
1366         PRBool hasEventHandler = PR_FALSE;
1367         int     i;
1368
1369         /* Reset archive directory for each tag */
1370         if (archiveDir) {
1371             PR_Free(archiveDir);
1372             archiveDir = NULL;
1373         }
1374
1375         /* We only analyze tags */
1376         if (curitem->type != TAG_ITEM) {
1377             continue;
1378         }
1379
1380         tagp = curitem->item.tag;
1381
1382         /* go through the attributes to get information */
1383         for (pairp = tagp->attList; pairp; pairp = pairp->next) {
1384
1385             /* ARCHIVE= */
1386             if ( !PL_strcasecmp(pairp->attribute, "archive")) {
1387                 if (archiveDir) {
1388                     /* Duplicate attribute.  Print warning */
1389                     PR_fprintf(errorFD,
1390                         "warning: \"%s\" attribute overwrites previous attribute"
1391                         " in tag starting at %s:%d.\n",
1392                         pairp->attribute, filename, curitem->startLine);
1393                     warningCount++;
1394                     PR_Free(archiveDir);
1395                 }
1396                 archiveDir = PL_strdup(pairp->value);
1397
1398                 /* Substiture ".arc" for ".jar" */
1399                 if ( (PL_strlen(archiveDir) < 4) ||
1400                     PL_strcasecmp((archiveDir + strlen(archiveDir) -4),
1401                         ".jar")) {
1402                     PR_fprintf(errorFD,
1403                         "warning: ARCHIVE attribute should end in \".jar\" in tag"
1404                         " starting on %s:%d.\n", filename, curitem->startLine);
1405                     warningCount++;
1406                     PR_Free(archiveDir);
1407                     archiveDir = PR_smprintf("%s.arc", archiveDir);
1408                 } else {
1409                     PL_strcpy(archiveDir + strlen(archiveDir) -4, ".arc");
1410                 }
1411
1412                 /* Record the first archive.  This will be used later if
1413                  * the archive is not specified */
1414                 if (firstArchiveDir == NULL) {
1415                     firstArchiveDir = PL_strdup(archiveDir);
1416                 }
1417             }
1418             /* CODEBASE= */
1419             else if ( !PL_strcasecmp(pairp->attribute, "codebase")) {
1420                 if (codebase) {
1421                     /* Duplicate attribute.  Print warning */
1422                     PR_fprintf(errorFD,
1423                         "warning: \"%s\" attribute overwrites previous attribute"
1424                         " in tag staring at %s:%d.\n",
1425                         pairp->attribute, filename, curitem->startLine);
1426                     warningCount++;
1427                 }
1428                 codebase = pairp->value;
1429             }
1430             /* SRC= and HREF= */
1431             else if ( !PORT_Strcasecmp(pairp->attribute, "src") ||
1432                 !PORT_Strcasecmp(pairp->attribute, "href") ) {
1433                 if (src) {
1434                     /* Duplicate attribute.  Print warning */
1435                     PR_fprintf(errorFD,
1436                         "warning: \"%s\" attribute overwrites previous attribute"
1437                         " in tag staring at %s:%d.\n",
1438                         pairp->attribute, filename, curitem->startLine);
1439                     warningCount++;
1440                 }
1441                 src = pairp->value;
1442             }
1443             /* CODE= */
1444             else if (!PORT_Strcasecmp(pairp->attribute, "code") ) {
1445                 /*!!!XXX Change PORT to PL all over this code !!! */
1446                 if (src) {
1447                     /* Duplicate attribute.  Print warning */
1448                     PR_fprintf(errorFD,
1449                         "warning: \"%s\" attribute overwrites previous attribute"
1450                         " ,in tag staring at %s:%d.\n",
1451                         pairp->attribute, filename, curitem->startLine);
1452                     warningCount++;
1453                 }
1454                 src = pairp->value;
1455
1456                 /* Append a .class if one is not already present */
1457                 if ( (PL_strlen(src) < 6) ||
1458                     PL_strcasecmp( (src + PL_strlen(src) - 6), ".class") ) {
1459                     src = PR_smprintf("%s.class", src);
1460                     /* Put this string back into the data structure so it
1461                      * will be deallocated properly */
1462                     PR_Free(pairp->value);
1463                     pairp->value = src;
1464                 }
1465             }
1466             /* ID= */
1467             else if (!PL_strcasecmp(pairp->attribute, "id") ) {
1468                 if (id) {
1469                     /* Duplicate attribute.  Print warning */
1470                     PR_fprintf(errorFD,
1471                         "warning: \"%s\" attribute overwrites previous attribute"
1472                         " in tag staring at %s:%d.\n",
1473                         pairp->attribute, filename, curitem->startLine);
1474                     warningCount++;
1475                 }
1476                 id = pairp->value;
1477             }
1478
1479             /* STYLE= */
1480             /* style= attributes, along with JS entities, are stored into
1481              * files with dynamically generated names. The filenames are
1482              * based on the order in which the text is found in the file.
1483              * All JS entities on all lines up to and including the line
1484              * containing the end of the tag that has this style= attribute
1485              * will be processed before this style=attribute.  So we need
1486              * to record the line that this _tag_ (not the attribute) ends on.
1487              */
1488             else if (!PL_strcasecmp(pairp->attribute, "style") && pairp->value)
1489             {
1490                 HTMLItem * styleItem;
1491                 /* Put this item on the style list */
1492                 styleItem = CreateTextItem(PL_strdup(pairp->value),
1493                     curitem->startLine, curitem->endLine);
1494                 if (styleListTail == NULL) {
1495                     styleList = styleListTail = styleItem;
1496                 } else {
1497                     styleListTail->next = styleItem;
1498                     styleListTail = styleItem;
1499                 }
1500             }
1501             /* Event handlers */
1502             else {
1503                 for (i = 0; i < num_handlers; i++) {
1504                     if (!PL_strcasecmp(event_handlers[i], pairp->attribute)) {
1505                         hasEventHandler = PR_TRUE;
1506                         break;
1507                     }
1508                 }
1509             }
1510
1511
1512             /* JS Entity */
1513             {
1514                 char    *entityStart, *entityEnd;
1515                 HTMLItem * entityItem;
1516
1517                 /* go through each JavaScript entity ( &{...}; ) and store it
1518                  * in the entityList.  The important thing is to record what
1519                  * line number it's on, so we can get it in the right order
1520                  * in relation to style= attributes.
1521                  * Apparently, these can't flow across lines, so the start and
1522                  * end line will be the same.  That helps matters.
1523                  */
1524                 entityEnd = pairp->value;
1525                 while ( entityEnd &&
1526                     (entityStart = PL_strstr(entityEnd, "&{")) /*}*/ != NULL) {
1527                     entityStart += 2; /* point at beginning of actual entity */
1528                     entityEnd = PL_strstr(entityStart, /*{*/ "}");
1529                     if (entityEnd) {
1530                         /* Put this item on the entity list */
1531                         *entityEnd = '\0';
1532                         entityItem = CreateTextItem(PL_strdup(entityStart),
1533                                             pairp->valueLine, pairp->valueLine);
1534                         *entityEnd = /* { */ '}';
1535                         if (entityListTail) {
1536                             entityListTail->next = entityItem;
1537                             entityListTail = entityItem;
1538                         } else {
1539                             entityList = entityListTail = entityItem;
1540                         }
1541                     }
1542                 }
1543             }
1544         }
1545
1546         /* If no archive was supplied, we use the first one of the file */
1547         if (!archiveDir && firstArchiveDir) {
1548             archiveDir = PL_strdup(firstArchiveDir);
1549         }
1550
1551         /* If we have an event handler, we need to archive this tag */
1552         if (hasEventHandler) {
1553             if (!id) {
1554                 PR_fprintf(errorFD,
1555                     "warning: tag starting at %s:%d has event handler but"
1556                     " no ID attribute.  The tag will not be signed.\n",
1557                                         filename, curitem->startLine);
1558                 warningCount++;
1559             } else if (!archiveDir) {
1560                 PR_fprintf(errorFD,
1561                     "warning: tag starting at %s:%d has event handler but"
1562                     " no ARCHIVE attribute.  The tag will not be signed.\n",
1563                                             filename, curitem->startLine);
1564                 warningCount++;
1565             } else {
1566                 if (SaveInlineScript(tagp->text, id, basedir, archiveDir)) {
1567                     goto loser;
1568                 }
1569             }
1570         }
1571
1572         switch (tagp->type) {
1573         case APPLET_TAG:
1574             if (!src) {
1575                 PR_fprintf(errorFD,
1576                     "error: APPLET tag starting on %s:%d has no CODE "
1577                     "attribute.\n", filename, curitem->startLine);
1578                 errorCount++;
1579                 goto loser;
1580             } else if (!archiveDir) {
1581                 PR_fprintf(errorFD,
1582                     "error: APPLET tag starting on %s:%d has no ARCHIVE "
1583                     "attribute.\n", filename, curitem->startLine);
1584                 errorCount++;
1585                 goto loser;
1586             } else {
1587                 if (SaveSource(src, codebase, basedir, archiveDir)) {
1588                     goto loser;
1589                 }
1590             }
1591             break;
1592         case SCRIPT_TAG:
1593         case LINK_TAG:
1594         case STYLE_TAG:
1595             if (!archiveDir) {
1596                 PR_fprintf(errorFD,
1597                     "error: %s tag starting on %s:%d has no ARCHIVE "
1598                     "attribute.\n", TagTypeToString(tagp->type),
1599                                             filename, curitem->startLine);
1600                 errorCount++;
1601                 goto loser;
1602             } else if (src) {
1603                 if (SaveSource(src, codebase, basedir, archiveDir)) {
1604                     goto loser;
1605                 }
1606             } else if (id) {
1607                 /* Save the next text item */
1608                 if (!curitem->next || (curitem->next->type !=
1609                     TEXT_ITEM)) {
1610                     PR_fprintf(errorFD,
1611                         "warning: %s tag starting on %s:%d is not followed"
1612                         " by script text.\n", TagTypeToString(tagp->type),
1613                                             filename, curitem->startLine);
1614                     warningCount++;
1615                     /* just create empty file */
1616                     if (SaveInlineScript("", id, basedir, archiveDir)) {
1617                         goto loser;
1618                     }
1619                 } else {
1620                     curitem = curitem->next;
1621                     if (SaveInlineScript(curitem->item.text,
1622                          id, basedir,
1623                         archiveDir)) {
1624                         goto loser;
1625                     }
1626                 }
1627             } else {
1628                 /* No src or id tag--warning */
1629                 PR_fprintf(errorFD,
1630                     "warning: %s tag starting on %s:%d has no SRC or"
1631                     " ID attributes.  Will not sign.\n",
1632                     TagTypeToString(tagp->type), filename, curitem->startLine);
1633                 warningCount++;
1634             }
1635             break;
1636         default:
1637             /* do nothing for other tags */
1638             break;
1639         }
1640
1641     }
1642
1643     /* Now deal with all the unnamable scripts */
1644     if (firstArchiveDir) {
1645         HTMLItem * style, *entity;
1646
1647         /* Go through the lists of JS entities and style attributes.  Do them
1648          * in chronological order within a list.  Pick the list with the lower
1649          * endLine. In case of a tie, entities come first.
1650          */
1651         style = styleList;
1652         entity = entityList;
1653         while (style || entity) {
1654             if (!entity || (style && (style->endLine < entity->endLine))) {
1655                 /* Process style */
1656                 SaveUnnamableScript(style->item.text, basedir, firstArchiveDir,
1657                                     filename);
1658                 style = style->next;
1659             } else {
1660                 /* Process entity */
1661                 SaveUnnamableScript(entity->item.text, basedir, firstArchiveDir,
1662                                     filename);
1663                 entity = entity->next;
1664             }
1665         }
1666     }
1667
1668
1669     retval = 0;
1670 loser:
1671     /* Blow away the stream */
1672     while (head) {
1673         curitem = head;
1674         head = head->next;
1675         DestroyHTMLItem(curitem);
1676     }
1677     while (styleList) {
1678         curitem = styleList;
1679         styleList = styleList->next;
1680         DestroyHTMLItem(curitem);
1681     }
1682     while (entityList) {
1683         curitem = entityList;
1684         entityList = entityList->next;
1685         DestroyHTMLItem(curitem);
1686     }
1687     if (text) {
1688         PR_Free(text);
1689         text = NULL;
1690     }
1691     if (fb) {
1692         FB_Destroy(fb);
1693         fb = NULL;
1694     }
1695     if (fd) {
1696         PR_Close(fd);
1697     }
1698     if (tagerr) {
1699         PR_smprintf_free(tagerr);
1700         tagerr = NULL;
1701     }
1702     if (archiveDir) {
1703         PR_Free(archiveDir);
1704         archiveDir = NULL;
1705     }
1706     if (firstArchiveDir) {
1707         PR_Free(firstArchiveDir);
1708         firstArchiveDir = NULL;
1709     }
1710     return retval;
1711 }
1712
1713
1714 /**********************************************************************
1715  *
1716  * e n s u r e E x i s t s
1717  *
1718  * Check for existence of indicated directory.  If it doesn't exist,
1719  * it will be created.
1720  * Returns PR_SUCCESS if the directory is present, PR_FAILURE otherwise.
1721  */
1722 static PRStatus
1723 ensureExists (char *base, char *path)
1724 {
1725     char        fn [FNSIZE];
1726     PRDir * dir;
1727     sprintf (fn, "%s/%s", base, path);
1728
1729     /*PR_fprintf(outputFD, "Trying to open directory %s.\n", fn);*/
1730
1731     if ( (dir = PR_OpenDir(fn)) ) {
1732         PR_CloseDir(dir);
1733         return PR_SUCCESS;
1734     }
1735     return PR_MkDir(fn, 0777);
1736 }
1737
1738
1739 /***************************************************************************
1740  *
1741  * m a k e _ d i r s
1742  *
1743  * Ensure that the directory portion of the path exists.  This may require
1744  * making the directory, and its parent, and its parent's parent, etc.
1745  */
1746 static int
1747 make_dirs(char *path, int file_perms)
1748 {
1749     char        *Path;
1750     char        *start;
1751     char        *sep;
1752     int ret = 0;
1753     PRFileInfo info;
1754
1755     if (!path) {
1756         return 0;
1757     }
1758
1759     Path = PL_strdup(path);
1760     start = strpbrk(Path, "/\\");
1761     if (!start) {
1762         return 0;
1763     }
1764     start++; /* start right after first slash */
1765
1766     /* Each time through the loop add one more directory. */
1767     while ( (sep = strpbrk(start, "/\\")) ) {
1768         *sep = '\0';
1769
1770         if ( PR_GetFileInfo(Path, &info) != PR_SUCCESS) {
1771             /* No such dir, we have to create it */
1772             if ( PR_MkDir(Path, file_perms) != PR_SUCCESS) {
1773                 PR_fprintf(errorFD, "ERROR: Unable to create directory %s.\n",
1774                                                         Path);
1775                 errorCount++;
1776                 ret = -1;
1777                 goto loser;
1778             }
1779         } else {
1780             /* something exists by this name, make sure it's a directory */
1781             if ( info.type != PR_FILE_DIRECTORY ) {
1782                 PR_fprintf(errorFD, "ERROR: Unable to create directory %s.\n",
1783                                                         Path);
1784                 errorCount++;
1785                 ret = -1;
1786                 goto loser;
1787             }
1788         }
1789
1790         start = sep + 1; /* start after the next slash */
1791         *sep = '/';
1792     }
1793
1794 loser:
1795     PR_Free(Path);
1796     return ret;
1797 }
1798
1799
1800 /*
1801  *  c o p y i n t o
1802  *
1803  *  Function to copy file "from" to path "to".
1804  *
1805  */
1806 static int
1807 copyinto (char *from, char *to)
1808 {
1809     PRInt32 num;
1810     char        buf [BUFSIZ];
1811     PRFileDesc * infp = NULL, *outfp = NULL;
1812     int retval = -1;
1813
1814     if ((infp = PR_Open(from, PR_RDONLY, 0777)) == NULL) {
1815         PR_fprintf(errorFD, "ERROR: Unable to open \"%s\" for reading.\n",
1816                                 from);
1817         errorCount++;
1818         goto finish;
1819     }
1820
1821     /* If to already exists, print a warning before deleting it */
1822     if (PR_Access(to, PR_ACCESS_EXISTS) == PR_SUCCESS) {
1823         PR_fprintf(errorFD, "warning: %s already exists--will overwrite\n", to);
1824         warningCount++;
1825         if (rm_dash_r(to)) {
1826             PR_fprintf(errorFD,
1827                 "ERROR: Unable to remove %s.\n", to);
1828             errorCount++;
1829             goto finish;
1830         }
1831     }
1832
1833     if ((outfp = PR_Open(to, PR_WRONLY | PR_CREATE_FILE | PR_TRUNCATE, 0777))
1834          == NULL) {
1835         char    *errBuf = NULL;
1836
1837         errBuf = PR_Malloc(PR_GetErrorTextLength());
1838         PR_fprintf(errorFD, "ERROR: Unable to open \"%s\" for writing.\n", to);
1839         if (PR_GetErrorText(errBuf)) {
1840             PR_fprintf(errorFD, "Cause: %s\n", errBuf);
1841         }
1842         if (errBuf) {
1843             PR_Free(errBuf);
1844         }
1845         errorCount++;
1846         goto finish;
1847     }
1848
1849     while ( (num = PR_Read(infp, buf, BUFSIZ)) > 0) {
1850         if (PR_Write(outfp, buf, num) != num) {
1851             PR_fprintf(errorFD, "ERROR: Error writing to %s.\n", to);
1852             errorCount++;
1853             goto finish;
1854         }
1855     }
1856
1857     retval = 0;
1858 finish:
1859     if (infp)
1860         PR_Close(infp);
1861     if (outfp)
1862         PR_Close(outfp);
1863
1864     return retval;
1865 }
1866
1867