apps/webbrowser/htmlparser.c

   1 /*
   2  * Copyright (c) 2002, Adam Dunkels.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above
  11  *    copyright notice, this list of conditions and the following
  12  *    disclaimer in the documentation and/or other materials provided
  13  *    with the distribution.
  14  * 3. The name of the author may not be used to endorse or promote
  15  *    products derived from this software without specific prior
  16  *    written permission.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  19  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  24  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  27  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * This file is part of the Contiki desktop environment
  31  *
  32  * $Id: htmlparser.c,v 1.8 2007/11/30 21:53:50 oliverschmidt Exp $
  33  *
  34  */
  35
  36 /* htmlparser.c:
  37  *
  38  * Implements a very simplistic HTML parser. It recognizes HTML links
  39  * (<a href>-tags), HTML img alt tags, a few text flow break tags
  40 G * (<br>, <p>, <h>), the <li> tag (but does not even try to
  41  * distinguish between <ol> or <ul>) as well as HTML comment tags
  42  * (<!-- -->).
  43  *
  44  * To save memory, the HTML parser is state machine driver, which
  45  * means that it will shave off one character from the HTML page,
  46  * process that character, and return to the next. Another way of
  47  * doing it would be to buffer a number of characters and process them
  48  * together.
  49  *
  50  * The main function in this file is the htmlparser_parse() function
  51  * which takes a htmlparser_state structur and a part of an HTML file
  52  * as an argument. The htmlparser_parse() function will call the
  53  * helper functions parse_char() and parse_tag(). Those functions will
  54  * in turn call the two callback functions htmlparser_char() and
  55  * htmlparser_tag(). Those functions must be implemented by the using
  56  * module (e.g., a web browser program).
  57  *
  58  * htmlparser_char() will be called for every non-tag character.
  59  *
  60  * htmlparser_tag() will be called whenever a full tag has been found.
  61  *
  62  */
  63
  64 #include <string.h>
  65
  66 #include "contiki.h"
  67 #include "html-strings.h"
  68 #include "www.h"
  69
  70 #include "htmlparser.h"
  71
  72 #if 1
  73 #define PRINTF(x)
  74 #else
  75 #include <stdio.h>
  76 #define PRINTF(x) printf x
  77 #endif
  78
  79
  80 /*-----------------------------------------------------------------------------------*/
  81 #define ISO_A     0x41
  82 #define ISO_B     0x42
  83 #define ISO_E     0x45
  84 #define ISO_F     0x46
  85 #define ISO_G     0x47
  86 #define ISO_H     0x48
  87 #define ISO_I     0x49
  88 #define ISO_L     0x4c
  89 #define ISO_M     0x4d
  90 #define ISO_P     0x50
  91 #define ISO_R     0x52
  92 #define ISO_T     0x54
  93
  94 #define ISO_a     (ISO_A | 0x20)
  95 #define ISO_b     (ISO_B | 0x20)
  96 #define ISO_e     (ISO_E | 0x20)
  97 #define ISO_f     (ISO_F | 0x20)
  98 #define ISO_g     (ISO_G | 0x20)
  99 #define ISO_h     (ISO_H | 0x20)
 100 #define ISO_i     (ISO_I | 0x20)
 101 #define ISO_l     (ISO_L | 0x20)
 102 #define ISO_m     (ISO_M | 0x20)
 103 #define ISO_p     (ISO_P | 0x20)
 104 #define ISO_r     (ISO_R | 0x20)
 105 #define ISO_t     (ISO_T | 0x20)
 106
 107 #define ISO_ht    0x09
 108 #define ISO_nl    0x0a
 109 #define ISO_cr    0x0d
 110 #define ISO_space 0x20
 111 #define ISO_bang  0x21
 112 #define ISO_citation 0x22
 113 #define ISO_ampersand 0x26
 114 #define ISO_citation2 0x27
 115 #define ISO_asterisk 0x2a
 116 #define ISO_dash  0x2d
 117 #define ISO_slash 0x2f
 118 #define ISO_semicolon  0x3b
 119 #define ISO_lt    0x3c
 120 #define ISO_eq    0x3d
 121 #define ISO_gt    0x3e
 122
 123 #define ISO_rbrack 0x5b
 124 #define ISO_lbrack 0x5d
 125
 126 #define MINORSTATE_NONE           0
 127 #define MINORSTATE_TEXT           1 /* Parse normal text */
 128 #define MINORSTATE_EXTCHAR        2 /* Check for semi-colon */
 129 #define MINORSTATE_TAG            3 /* Check for name of tag. */
 130 #define MINORSTATE_TAGEND         4 /* Scan for end of tag. */
 131 #define MINORSTATE_TAGATTR        5 /* Parse tag attr. */
 132 #define MINORSTATE_TAGATTRSPACE   6 /* Parse optional space after tag
 133                                        attr. */
 134 #define MINORSTATE_TAGATTRPARAM   7 /* Parse tag attr parameter. */
 135 #define MINORSTATE_TAGATTRPARAMNQ 8 /* Parse tag attr parameter without
 136                                   quotation marks. */
 137 #define MINORSTATE_HTMLCOMMENT    9 /* Scan for HTML comment end */
 138
 139 #define MAJORSTATE_NONE       0
 140 #define MAJORSTATE_BODY       1
 141 #define MAJORSTATE_LINK       2
 142 #define MAJORSTATE_FORM       3
 143 #define MAJORSTATE_DISCARD    4
 144
 145
 146 struct htmlparser_state {
 147
 148   unsigned char minorstate;
 149   char tag[20];
 150   unsigned char tagptr;
 151   char tagattr[20];
 152   unsigned char tagattrptr;
 153   char tagattrparam[WWW_CONF_MAX_URLLEN];
 154   unsigned char tagattrparamptr;
 155   unsigned char lastchar, quotechar;
 156   unsigned char majorstate, lastmajorstate;
 157   char linkurl[WWW_CONF_MAX_URLLEN];
 158
 159   char word[WWW_CONF_WEBPAGE_WIDTH];
 160   unsigned char wordlen;
 161
 162 #if WWW_CONF_FORMS
 163   char formaction[WWW_CONF_MAX_FORMACTIONLEN];
 164   char formname[WWW_CONF_MAX_FORMNAMELEN];
 165   unsigned char inputtype;
 166   char inputname[WWW_CONF_MAX_INPUTNAMELEN];
 167   char inputvalue[WWW_CONF_MAX_INPUTVALUELEN];
 168   unsigned char inputvaluesize;
 169 #endif /* WWW_CONF_FORMS */
 170 };
 171
 172 static struct htmlparser_state s;
 173
 174 /*-----------------------------------------------------------------------------------*/
 175 static char last[1] = {(char)0xff};
 176
 177 static const char *tags[] = {
 178 #define TAG_FIRST       0
 179 #define TAG_SLASHA      0
 180   html_slasha,
 181 #define TAG_SLASHCENTER 1
 182   html_slashcenter,
 183 #define TAG_SLASHFORM   2
 184   html_slashform,
 185 #define TAG_SLASHH      3
 186   html_slashh,
 187 #define TAG_SLASHSCRIPT 4
 188   html_slashscript,
 189 #define TAG_SLASHSELECT 5
 190   html_slashselect,
 191 #define TAG_SLASHSTYLE  6
 192   html_slashstyle,
 193 #define TAG_A           7
 194   html_a,
 195 #define TAG_BODY        8
 196   html_body,
 197 #define TAG_BR          9
 198   html_br,
 199 #define TAG_CENTER     10
 200   html_center,
 201 #define TAG_FORM       11
 202   html_form,
 203 #define TAG_FRAME      12
 204   html_frame,
 205 #define TAG_H1         13
 206   html_h1,
 207 #define TAG_H2         14
 208   html_h2,
 209 #define TAG_H3         15
 210   html_h3,
 211 #define TAG_H4         16
 212   html_h4,
 213 #define TAG_IMG        17
 214   html_img,
 215 #define TAG_INPUT      18
 216   html_input,
 217 #define TAG_LI         19
 218   html_li,
 219 #define TAG_P          20
 220   html_p,
 221 #define TAG_SCRIPT     21
 222   html_script,
 223 #define TAG_SELECT     22
 224   html_select,
 225 #define TAG_STYLE      23
 226   html_style,
 227 #define TAG_TR         24
 228   html_tr,
 229 #define TAG_LAST       25
 230   last,
 231 };
 232
 233 /*-----------------------------------------------------------------------------------*/
 234 static unsigned char CC_FASTCALL
 235 iswhitespace(char c)
 236 {
 237   return (c == ISO_space ||
 238           c == ISO_nl ||
 239           c == ISO_cr ||
 240           c == ISO_ht);
 241 }
 242 /*-----------------------------------------------------------------------------------*/
 243 void
 244 htmlparser_init(void)
 245 {
 246   s.majorstate = s.lastmajorstate = MAJORSTATE_DISCARD;
 247   s.minorstate = MINORSTATE_TEXT;
 248   s.lastchar = 0;
 249 }
 250 /*-----------------------------------------------------------------------------------*/
 251 static char CC_FASTCALL
 252 lowercase(char c)
 253 {
 254   /* XXX: This is a *brute force* approach to lower-case
 255      converting and should *not* be used anywhere else! It
 256      works for our purposes, however (i.e., HTML tags). */
 257   if(c > 0x40) {
 258     return (c & 0x1f) | 0x60;
 259   } else {
 260     return c;
 261   }
 262 }
 263 /*-----------------------------------------------------------------------------------*/
 264 static void
 265 endtagfound(void)
 266 {
 267   s.tag[s.tagptr] = 0;
 268   s.tagattr[s.tagattrptr] = 0;
 269   s.tagattrparam[s.tagattrparamptr] = 0;
 270 }
 271 /*-----------------------------------------------------------------------------------*/
 272 static void CC_FASTCALL
 273 switch_majorstate(unsigned char newstate)
 274 {
 275   if(s.majorstate != newstate) {
 276     PRINTF(("Switching state from %d to %d (%d)\n", s.majorstate, newstate, s.lastmajorstate));
 277     s.lastmajorstate = s.majorstate;
 278     s.majorstate = newstate;
 279   }
 280 }
 281 /*-----------------------------------------------------------------------------------*/
 282 static void CC_FASTCALL
 283 add_char(unsigned char c)
 284 {
 285   if(s.wordlen < WWW_CONF_WEBPAGE_WIDTH - 1 && c < 0x80) {
 286     s.word[s.wordlen] = c;
 287     ++s.wordlen;
 288   }
 289 }
 290 /*-----------------------------------------------------------------------------------*/
 291 static void
 292 do_word(void)
 293 {
 294   if(s.wordlen > 0) {
 295     if(s.majorstate == MAJORSTATE_LINK) {
 296       if(s.word[s.wordlen] != ISO_space) {
 297         add_char(ISO_space);
 298       }
 299     } else if(s.majorstate == MAJORSTATE_DISCARD) {
 300       s.wordlen = 0;
 301     } else {
 302       s.word[s.wordlen] = '\0';
 303       htmlparser_word(s.word, s.wordlen);
 304       s.wordlen = 0;
 305     }
 306   }
 307 }
 308 /*-----------------------------------------------------------------------------------*/
 309 static void
 310 newline(void)
 311 {
 312   do_word();
 313   htmlparser_newline();
 314 }
 315 /*-----------------------------------------------------------------------------------*/
 316 static unsigned char CC_FASTCALL
 317 find_tag(char *tag)
 318 {
 319   static unsigned char first, last, i, tabi;
 320   static char tagc;
 321
 322   first = TAG_FIRST;
 323   last = TAG_LAST;
 324   i = 0;
 325
 326   do {
 327     tagc = tag[i];
 328
 329     if(tagc == 0 &&
 330        tags[first][i] == 0) {
 331       return first;
 332     }
 333
 334     tabi = first;
 335
 336     /* First, find first matching tag from table. */
 337     while(tagc > (tags[tabi])[i] &&
 338           tabi < last) {
 339       ++tabi;
 340     }
 341     first = tabi;
 342
 343     /* Second, find last matching tag from table. */
 344     while(tagc == (tags[tabi])[i] &&
 345           tabi < last) {
 346       ++tabi;
 347     }
 348     last = tabi;
 349
 350     /* If first and last matching tags are equal, we have a non-match
 351        and return. Else we continue with the next character. */
 352     ++i;
 353
 354   } while(last != first);
 355   return TAG_LAST;
 356 }
 357 /*-----------------------------------------------------------------------------------*/
 358 static void
 359 parse_tag(void)
 360 {
 361   static char *tagattrparam;
 362   static unsigned char size;
 363
 364   static char dummy;
 365
 366   PRINTF(("Parsing tag '%s' '%s' '%s'\n",
 367           s.tag, s.tagattr, s.tagattrparam));
 368
 369   switch(find_tag(s.tag)) {
 370   case TAG_P:
 371   case TAG_H1:
 372   case TAG_H2:
 373   case TAG_H3:
 374   case TAG_H4:
 375     /*    parse_char(ISO_nl);*/
 376     newline();
 377     /* FALLTHROUGH */
 378   case TAG_BR:
 379   case TAG_TR:
 380   case TAG_SLASHH:
 381     /*    parse_char(ISO_nl);*/
 382     dummy = 0;
 383     newline();
 384     break;
 385   case TAG_LI:
 386     newline();
 387     add_char(ISO_asterisk);
 388     add_char(ISO_space);
 389     break;
 390   case TAG_SCRIPT:
 391   case TAG_STYLE:
 392   case TAG_SELECT:
 393     switch_majorstate(MAJORSTATE_DISCARD);
 394     break;
 395   case TAG_SLASHSCRIPT:
 396   case TAG_SLASHSTYLE:
 397   case TAG_SLASHSELECT:
 398     do_word();
 399     switch_majorstate(s.lastmajorstate);
 400     break;
 401   case TAG_BODY:
 402     s.majorstate = s.lastmajorstate = MAJORSTATE_BODY;
 403     break;
 404   case TAG_FRAME:
 405     if(strncmp(s.tagattr, html_src, sizeof(html_src)) == 0 &&
 406        s.tagattrparam[0] != 0) {
 407       switch_majorstate(MAJORSTATE_BODY);
 408       newline();
 409       add_char(ISO_rbrack);
 410       do_word();
 411       htmlparser_link((char *)html_frame, (unsigned char)strlen(html_frame), s.tagattrparam);
 412       PRINTF(("Frame [%s]\n", s.tagattrparam));
 413       add_char(ISO_lbrack);
 414       newline();
 415     }
 416     break;
 417   case TAG_IMG:
 418     if(strncmp(s.tagattr, html_alt, sizeof(html_alt)) == 0 &&
 419        s.tagattrparam[0] != 0) {
 420       /*      parse_char(ISO_lt);*/
 421       add_char(ISO_lt);
 422       tagattrparam = &s.tagattrparam[0];
 423       while(*tagattrparam) {
 424         /*      parse_char(*tagattrparam);*/
 425         add_char(*tagattrparam);
 426         ++tagattrparam;
 427       }
 428       /*      parse_char(ISO_gt);*/
 429       add_char(ISO_gt);
 430       do_word();
 431     }
 432     break;
 433   case TAG_A:
 434     PRINTF(("A %s %s\n", s.tagattr, s.tagattrparam));
 435     if(strncmp(s.tagattr, html_href, sizeof(html_href)) == 0 &&
 436        s.tagattrparam[0] != 0) {
 437       strcpy(s.linkurl, s.tagattrparam);
 438       do_word();
 439       switch_majorstate(MAJORSTATE_LINK);
 440     }
 441     break;
 442   case TAG_SLASHA:
 443     if(s.majorstate == MAJORSTATE_LINK) {
 444       switch_majorstate(s.lastmajorstate);
 445       s.word[s.wordlen] = 0;
 446       htmlparser_link(s.word, s.wordlen, s.linkurl);
 447       s.wordlen = 0;
 448     }
 449     break;
 450 #if WWW_CONF_FORMS
 451   case TAG_FORM:
 452     PRINTF(("Form tag\n"));
 453     switch_majorstate(MAJORSTATE_FORM);
 454     if(strncmp(s.tagattr, html_action, sizeof(html_action)) == 0) {
 455       PRINTF(("Form action '%s'\n", s.tagattrparam));
 456       strncpy(s.formaction, s.tagattrparam, WWW_CONF_MAX_FORMACTIONLEN - 1);
 457     } else if(strncmp(s.tagattr, html_name, sizeof(html_name)) == 0) {
 458       PRINTF(("Form name '%s'\n", s.tagattrparam));
 459       strncpy(s.formname, s.tagattrparam, WWW_CONF_MAX_FORMNAMELEN - 1);
 460     }
 461     s.inputname[0] = s.inputvalue[0] = 0;
 462     break;
 463   case TAG_SLASHFORM:
 464     switch_majorstate(MAJORSTATE_BODY);
 465     s.formaction[0] = s.formname[0] = 0;
 466     break;
 467   case TAG_INPUT:
 468     if(s.majorstate == MAJORSTATE_FORM) {
 469       /* First check if we are called at the end of an input tag. If
 470          so, we should render the input widget. */
 471       if(s.tagattr[0] == 0 &&
 472          s.inputname[0] != 0) {
 473         PRINTF(("Render input type %d\n", s.inputtype));
 474         switch(s.inputtype) {
 475         case HTMLPARSER_INPUTTYPE_NONE:
 476         case HTMLPARSER_INPUTTYPE_TEXT:
 477           s.inputvalue[s.inputvaluesize] = 0;
 478           htmlparser_inputfield(s.inputvaluesize, s.inputvalue, s.inputname,
 479                                 s.formname, s.formaction);
 480           break;
 481         case HTMLPARSER_INPUTTYPE_SUBMIT:
 482         case HTMLPARSER_INPUTTYPE_IMAGE:
 483           htmlparser_submitbutton(s.inputvalue, s.inputname,
 484                                   s.formname, s.formaction);
 485           break;
 486         }
 487         s.inputtype = HTMLPARSER_INPUTTYPE_NONE;
 488       } else {
 489         PRINTF(("Input '%s' '%s'\n", s.tagattr, s.tagattrparam));
 490         if(strncmp(s.tagattr, html_type, sizeof(html_type)) == 0) {
 491           if(strncmp(s.tagattrparam, html_submit,
 492                      sizeof(html_submit)) == 0) {
 493             s.inputtype = HTMLPARSER_INPUTTYPE_SUBMIT;
 494           } else if(strncmp(s.tagattrparam, html_image,
 495                             sizeof(html_image)) == 0) {
 496             s.inputtype = HTMLPARSER_INPUTTYPE_IMAGE;
 497           } else if(strncmp(s.tagattrparam, html_text,
 498                             sizeof(html_text)) == 0) {
 499             s.inputtype = HTMLPARSER_INPUTTYPE_TEXT;
 500           } else {
 501             s.inputtype = HTMLPARSER_INPUTTYPE_OTHER;
 502           }
 503         } else if(strncmp(s.tagattr, html_name,
 504                           sizeof(html_name)) == 0) {
 505           strncpy(s.inputname, s.tagattrparam,
 506                   WWW_CONF_MAX_INPUTNAMELEN);
 507         } else if(strncmp(s.tagattr, html_alt,
 508                           sizeof(html_alt)) == 0 &&
 509                   s.inputtype == HTMLPARSER_INPUTTYPE_IMAGE) {
 510           strncpy(s.inputvalue, s.tagattrparam,
 511                   WWW_CONF_MAX_INPUTVALUELEN);
 512         } else if(strncmp(s.tagattr, html_value,
 513                           sizeof(html_value)) == 0) {
 514           strncpy(s.inputvalue, s.tagattrparam,
 515                   WWW_CONF_MAX_INPUTVALUELEN);
 516         } else if(strncmp(s.tagattr, html_size,
 517                           sizeof(html_size)) == 0) {
 518           size = 0;
 519           if(s.tagattrparam[0] >= '0' &&
 520              s.tagattrparam[0] <= '9') {
 521             size = s.tagattrparam[0] - '0';
 522             if(s.tagattrparam[1] >= '0' &&
 523                s.tagattrparam[1] <= '9') {
 524               size = size * 10 + (s.tagattrparam[1] - '0');
 525             }
 526           }
 527           if(size >= WWW_CONF_MAX_INPUTVALUELEN) {
 528             size = WWW_CONF_MAX_INPUTVALUELEN - 1;
 529           }
 530           s.inputvaluesize = size;
 531           /*      strncpy(s.inputvalue, s.tagattrparam,
 532                   WWW_CONF_MAX_INPUTVALUELEN);*/
 533         }
 534       }
 535
 536     }
 537     break;
 538 #endif /* WWW_CONF_FORMS */
 539 #if WWW_CONF_RENDERSTATE
 540   case TAG_CENTER:
 541     /*    parse_char(ISO_nl);    */
 542     newline();
 543     htmlparser_renderstate(HTMLPARSER_RENDERSTATE_BEGIN |
 544                            HTMLPARSER_RENDERSTATE_CENTER);
 545     break;
 546   case TAG_SLASHCENTER:
 547     /*    parse_char(ISO_nl);*/
 548     newline();
 549     htmlparser_renderstate(HTMLPARSER_RENDERSTATE_END |
 550                            HTMLPARSER_RENDERSTATE_CENTER);
 551     break;
 552 #endif /* WWW_CONF_RENDERSTATE */
 553   }
 554 }
 555 /*-----------------------------------------------------------------------------------*/
 556 static u16_t
 557 parse_word(char *data, u8_t dlen)
 558 {
 559   static u8_t i;
 560   static u8_t len;
 561   unsigned char c;
 562
 563   len = dlen;
 564
 565   switch(s.minorstate) {
 566   case MINORSTATE_TEXT:
 567     for(i = 0; i < len; ++i) {
 568       c = data[i];
 569       if(iswhitespace(c)) {
 570         do_word();
 571       } else if(c == ISO_lt) {
 572         s.minorstate = MINORSTATE_TAG;
 573         s.tagptr = 0;
 574         /*      do_word();*/
 575         break;
 576       } else if(c == ISO_ampersand) {
 577         s.minorstate = MINORSTATE_EXTCHAR;
 578         break;
 579       } else {
 580         add_char(c);
 581       }
 582     }
 583     break;
 584   case MINORSTATE_EXTCHAR:
 585     for(i = 0; i < len; ++i) {
 586       c = data[i];
 587       if(c == ISO_semicolon) {
 588         s.minorstate = MINORSTATE_TEXT;
 589         add_char(' ');
 590         break;
 591       } else if(iswhitespace(c)) {
 592         s.minorstate = MINORSTATE_TEXT;
 593         add_char('&');
 594         add_char(' ');
 595         break;
 596       }
 597     }
 598     break;
 599   case MINORSTATE_TAG:
 600     /* We are currently parsing within the name of a tag. We check
 601        for the end of a tag (the '>' character) or whitespace (which
 602        indicates that we should parse a tag attr argument
 603        instead). */
 604     for(i = 0; i < len; ++i) {
 605       c = data[i];
 606       if(c == ISO_gt) {
 607         /* Full tag found. We continue parsing regular text. */
 608         s.minorstate = MINORSTATE_TEXT;
 609         s.tagattrptr = s.tagattrparamptr = 0;
 610         endtagfound();
 611         parse_tag();
 612         break;
 613       } else if(iswhitespace(c)) {
 614         /* The name of the tag found. We continue parsing the tag
 615            attr.*/
 616         s.minorstate = MINORSTATE_TAGATTR;
 617         s.tagattrptr = 0;
 618         endtagfound();
 619         break;
 620       } else {
 621         /* Keep track of the name of the tag, but convert it to
 622            lower case. */
 623
 624         s.tag[s.tagptr] = lowercase(c);
 625         ++s.tagptr;
 626         /* Check if the ->tag field is full. If so, we just eat up
 627            any data left in the tag. */
 628         if(s.tagptr == sizeof(s.tag)) {
 629           s.minorstate = MINORSTATE_TAGEND;
 630           break;
 631         }
 632       }
 633
 634       /* Check for HTML comment, indicated by <!-- */
 635       if(s.tagptr == 3 &&
 636          s.tag[0] == ISO_bang &&
 637          s.tag[1] == ISO_dash &&
 638          s.tag[2] == ISO_dash) {
 639         PRINTF(("Starting comment...\n"));
 640         s.minorstate = MINORSTATE_HTMLCOMMENT;
 641         s.tagptr = 0;
 642         endtagfound();
 643         break;
 644       }
 645     }
 646     break;
 647   case MINORSTATE_TAGATTR:
 648     /* We parse the "tag attr", i.e., the "href" in <a
 649        href="...">. */
 650     for(i = 0; i < len; ++i) {
 651       c = data[i];
 652       if(c == ISO_gt) {
 653         /* Full tag found. */
 654         s.minorstate = MINORSTATE_TEXT;
 655         s.tagattrparamptr = 0;
 656         s.tagattrptr = 0;
 657         endtagfound();
 658         parse_tag();
 659         s.tagptr = 0;
 660         endtagfound();
 661         break;
 662       } else if(iswhitespace(c)) {
 663         if(s.tagattrptr == 0) {
 664           /* Discard leading spaces. */
 665         } else {
 666           /* A non-leading space is the end of the attribute. */
 667           s.tagattrparamptr = 0;
 668           endtagfound();
 669           parse_tag();
 670           s.minorstate = MINORSTATE_TAGATTRSPACE;
 671           break;
 672           /*        s.tagattrptr = 0;
 673                     endtagfound();*/
 674         }
 675       } else if(c == ISO_eq) {
 676         s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
 677         s.tagattrparamptr = 0;
 678         endtagfound();
 679         break;
 680       } else {
 681         s.tagattr[s.tagattrptr] = lowercase(c);
 682         ++s.tagattrptr;
 683         /* Check if the "tagattr" field is full. If so, we just eat
 684            up any data left in the tag. */
 685         if(s.tagattrptr == sizeof(s.tagattr)) {
 686           s.minorstate = MINORSTATE_TAGEND;
 687           break;
 688         }
 689       }
 690     }
 691     break;
 692   case MINORSTATE_TAGATTRSPACE:
 693     for(i = 0; i < len; ++i) {
 694       c = data[i];
 695       if(iswhitespace(c)) {
 696         /* Discard spaces. */
 697       } else if(c == ISO_eq) {
 698         s.minorstate = MINORSTATE_TAGATTRPARAMNQ;
 699         s.tagattrparamptr = 0;
 700         endtagfound();
 701         parse_tag();
 702         break;
 703       } else {
 704         s.tagattr[0] = lowercase(c);
 705         s.tagattrptr = 1;
 706         s.minorstate = MINORSTATE_TAGATTR;
 707         break;
 708       }
 709     }
 710     break;
 711   case MINORSTATE_TAGATTRPARAMNQ:
 712     /* We are parsing the "tag attr parameter", i.e., the link part
 713        in <a href="link">. */
 714     for(i = 0; i < len; ++i) {
 715       c = data[i];
 716       if(c == ISO_gt) {
 717         /* Full tag found. */
 718         endtagfound();
 719         parse_tag();
 720         s.minorstate = MINORSTATE_TEXT;
 721         s.tagattrptr = 0;
 722         endtagfound();
 723         parse_tag();
 724         s.tagptr = 0;
 725         endtagfound();
 726         break;
 727       } else if(iswhitespace(c) &&
 728                 s.tagattrparamptr == 0) {
 729         /* Discard leading spaces. */
 730       } else if((c == ISO_citation ||
 731                  c == ISO_citation2) &&
 732                 s.tagattrparamptr == 0) {
 733         s.minorstate = MINORSTATE_TAGATTRPARAM;
 734         s.quotechar = c;
 735         PRINTF(("tag attr param q found\n"));
 736         break;
 737       } else if(iswhitespace(c)) {
 738         PRINTF(("Non-leading space found at %d\n",
 739                 s.tagattrparamptr));
 740         /* Stop parsing if a non-leading space was found */
 741         endtagfound();
 742         parse_tag();
 743
 744         s.minorstate = MINORSTATE_TAGATTR;
 745         s.tagattrptr = 0;
 746         endtagfound();
 747         break;
 748       } else {
 749         s.tagattrparam[s.tagattrparamptr] = c;
 750         ++s.tagattrparamptr;
 751         /* Check if the "tagattr" field is full. If so, we just eat
 752            up any data left in the tag. */
 753         if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
 754           s.minorstate = MINORSTATE_TAGEND;
 755           break;
 756         }
 757       }
 758     }
 759     break;
 760   case MINORSTATE_TAGATTRPARAM:
 761     /* We are parsing the "tag attr parameter", i.e., the link
 762        part in <a href="link">. */
 763     for(i = 0; i < len; ++i) {
 764       c = data[i];
 765       if(c == s.quotechar) {
 766         /* Found end of tag attr parameter. */
 767         endtagfound();
 768         parse_tag();
 769
 770         s.minorstate = MINORSTATE_TAGATTR;
 771         s.tagattrptr = 0;
 772         endtagfound();
 773         break;
 774       } else {
 775         if(iswhitespace(c)) {
 776           s.tagattrparam[s.tagattrparamptr] = ISO_space;
 777         } else {
 778           s.tagattrparam[s.tagattrparamptr] = c;
 779         }
 780
 781         ++s.tagattrparamptr;
 782         /* Check if the "tagattr" field is full. If so, we just eat
 783            up any data left in the tag. */
 784         if(s.tagattrparamptr >= sizeof(s.tagattrparam) - 1) {
 785           s.minorstate = MINORSTATE_TAGEND;
 786           break;
 787         }
 788       }
 789     }
 790     break;
 791   case MINORSTATE_HTMLCOMMENT:
 792     for(i = 0; i < len; ++i) {
 793       c = data[i];
 794       if(c == ISO_dash) {
 795         ++s.tagptr;
 796       } else if(c == ISO_gt && s.tagptr > 0) {
 797         PRINTF(("Comment done.\n"));
 798         s.minorstate = MINORSTATE_TEXT;
 799         break;
 800       } else {
 801         s.tagptr = 0;
 802       }
 803     }
 804     break;
 805   case MINORSTATE_TAGEND:
 806     /* Discard characters until a '>' is seen. */
 807     for(i = 0; i < len; ++i) {
 808       if(data[i] == ISO_gt) {
 809         s.minorstate = MINORSTATE_TEXT;
 810         s.tagattrptr = 0;
 811         endtagfound();
 812         parse_tag();
 813         break;
 814       }
 815     }
 816     break;
 817   default:
 818     i = 0;
 819     break;
 820   }
 821   if(i >= len) {
 822     return len;
 823   }
 824   return i + 1;
 825 }
 826 /*-----------------------------------------------------------------------------------*/
 827 void
 828 htmlparser_parse(char *data, u16_t datalen)
 829 {
 830   u16_t plen;
 831
 832   while(datalen > 0) {
 833     if(datalen > 255) {
 834       plen = parse_word(data, 255);
 835     } else {
 836       plen = parse_word(data, (u8_t)datalen);
 837     }
 838     datalen -= plen;
 839     data += plen;
 840   }
 841 }
 842 /*-----------------------------------------------------------------------------------*/