Merge pull request #11 from esorton/bugfix/add-constexpr-keyword-to-arduino-ctags
[arduino-ctags.git] / tex.c
blob0c6714ea3bce1cdd7de45a47297bf78bf5bad499
1 /*
2 * $Id: tex.c 666 2008-05-15 17:47:31Z dfishburn $
4 * Copyright (c) 2008, David Fishburn
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for generating tags for TeX language files.
11 * Tex language reference:
12 * http://en.wikibooks.org/wiki/TeX#The_Structure_of_TeX
16 * INCLUDE FILES
18 #include "general.h" /* must always come first */
19 #include <ctype.h> /* to define isalpha () */
20 #include <setjmp.h>
21 #ifdef DEBUG
22 #include <stdio.h>
23 #endif
25 #include "debug.h"
26 #include "entry.h"
27 #include "keyword.h"
28 #include "parse.h"
29 #include "read.h"
30 #include "routines.h"
31 #include "vstring.h"
34 * MACROS
36 #define isType(token,t) (boolean) ((token)->type == (t))
37 #define isKeyword(token,k) (boolean) ((token)->keyword == (k))
40 * DATA DECLARATIONS
43 typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
46 * Used to specify type of keyword.
48 typedef enum eKeywordId {
49 KEYWORD_NONE = -1,
50 KEYWORD_chapter,
51 KEYWORD_section,
52 KEYWORD_subsection,
53 KEYWORD_subsubsection,
54 KEYWORD_part,
55 KEYWORD_paragraph,
56 KEYWORD_subparagraph,
57 KEYWORD_include
58 } keywordId;
60 /* Used to determine whether keyword is valid for the token language and
61 * what its ID is.
63 typedef struct sKeywordDesc {
64 const char *name;
65 keywordId id;
66 } keywordDesc;
68 typedef enum eTokenType {
69 TOKEN_UNDEFINED,
70 TOKEN_CHARACTER,
71 TOKEN_CLOSE_PAREN,
72 TOKEN_COMMA,
73 TOKEN_KEYWORD,
74 TOKEN_OPEN_PAREN,
75 TOKEN_IDENTIFIER,
76 TOKEN_STRING,
77 TOKEN_OPEN_CURLY,
78 TOKEN_CLOSE_CURLY,
79 TOKEN_OPEN_SQUARE,
80 TOKEN_CLOSE_SQUARE,
81 TOKEN_QUESTION_MARK,
82 TOKEN_STAR
83 } tokenType;
85 typedef struct sTokenInfo {
86 tokenType type;
87 keywordId keyword;
88 vString * string;
89 vString * scope;
90 unsigned long lineNumber;
91 fpos_t filePosition;
92 } tokenInfo;
95 * DATA DEFINITIONS
98 static langType Lang_js;
100 static jmp_buf Exception;
102 typedef enum {
103 TEXTAG_CHAPTER,
104 TEXTAG_SECTION,
105 TEXTAG_SUBSECTION,
106 TEXTAG_SUBSUBSECTION,
107 TEXTAG_PART,
108 TEXTAG_PARAGRAPH,
109 TEXTAG_SUBPARAGRAPH,
110 TEXTAG_INCLUDE,
111 TEXTAG_COUNT
112 } texKind;
114 static kindOption TexKinds [] = {
115 { TRUE, 'c', "chapter", "chapters" },
116 { TRUE, 's', "section", "sections" },
117 { TRUE, 'u', "subsection", "subsections" },
118 { TRUE, 'b', "subsubsection", "subsubsections" },
119 { TRUE, 'p', "part", "parts" },
120 { TRUE, 'P', "paragraph", "paragraphs" },
121 { TRUE, 'G', "subparagraph", "subparagraphs" },
122 { TRUE, 'i', "include", "includes" }
125 static const keywordDesc TexKeywordTable [] = {
126 /* keyword keyword ID */
127 { "chapter", KEYWORD_chapter },
128 { "section", KEYWORD_section },
129 { "subsection", KEYWORD_subsection },
130 { "subsubsection", KEYWORD_subsubsection },
131 { "part", KEYWORD_part },
132 { "paragraph", KEYWORD_paragraph },
133 { "subparagraph", KEYWORD_subparagraph },
134 { "include", KEYWORD_include }
138 * FUNCTION DEFINITIONS
141 static boolean isIdentChar (const int c)
143 return (boolean)
144 (isalpha (c) || isdigit (c) || c == '$' ||
145 c == '_' || c == '#' || c == '-' || c == '.');
148 static void buildTexKeywordHash (void)
150 const size_t count = sizeof (TexKeywordTable) /
151 sizeof (TexKeywordTable [0]);
152 size_t i;
153 for (i = 0 ; i < count ; ++i)
155 const keywordDesc* const p = &TexKeywordTable [i];
156 addKeyword (p->name, Lang_js, (int) p->id);
160 static tokenInfo *newToken (void)
162 tokenInfo *const token = xMalloc (1, tokenInfo);
164 token->type = TOKEN_UNDEFINED;
165 token->keyword = KEYWORD_NONE;
166 token->string = vStringNew ();
167 token->scope = vStringNew ();
168 token->lineNumber = getSourceLineNumber ();
169 token->filePosition = getInputFilePosition ();
171 return token;
174 static void deleteToken (tokenInfo *const token)
176 vStringDelete (token->string);
177 vStringDelete (token->scope);
178 eFree (token);
182 * Tag generation functions
185 static void makeConstTag (tokenInfo *const token, const texKind kind)
187 if (TexKinds [kind].enabled )
189 const char *const name = vStringValue (token->string);
190 tagEntryInfo e;
191 initTagEntry (&e, name);
193 e.lineNumber = token->lineNumber;
194 e.filePosition = token->filePosition;
195 e.kindName = TexKinds [kind].name;
196 e.kind = TexKinds [kind].letter;
198 makeTagEntry (&e);
202 static void makeTexTag (tokenInfo *const token, texKind kind)
204 vString * fulltag;
206 if (TexKinds [kind].enabled)
209 * If a scope has been added to the token, change the token
210 * string to include the scope when making the tag.
212 if ( vStringLength (token->scope) > 0 )
214 fulltag = vStringNew ();
215 vStringCopy (fulltag, token->scope);
216 vStringCatS (fulltag, ".");
217 vStringCatS (fulltag, vStringValue (token->string));
218 vStringTerminate (fulltag);
219 vStringCopy (token->string, fulltag);
220 vStringDelete (fulltag);
222 makeConstTag (token, kind);
227 * Parsing functions
230 static void parseString (vString *const string, const int delimiter)
232 boolean end = FALSE;
233 while (! end)
235 int c = fileGetc ();
236 if (c == EOF)
237 end = TRUE;
238 else if (c == '\\')
240 c = fileGetc(); /* This maybe a ' or ". */
241 vStringPut (string, c);
243 else if (c == delimiter)
244 end = TRUE;
245 else
246 vStringPut (string, c);
248 vStringTerminate (string);
252 * Read a C identifier beginning with "firstChar" and places it into
253 * "name".
255 static void parseIdentifier (vString *const string, const int firstChar)
257 int c = firstChar;
258 Assert (isIdentChar (c));
261 vStringPut (string, c);
262 c = fileGetc ();
263 } while (isIdentChar (c));
265 vStringTerminate (string);
266 if (!isspace (c))
267 fileUngetc (c); /* unget non-identifier character */
270 static void readToken (tokenInfo *const token)
272 int c;
274 token->type = TOKEN_UNDEFINED;
275 token->keyword = KEYWORD_NONE;
276 vStringClear (token->string);
278 getNextChar:
281 c = fileGetc ();
282 token->lineNumber = getSourceLineNumber ();
283 token->filePosition = getInputFilePosition ();
285 while (c == '\t' || c == ' ' || c == '\n');
287 switch (c)
289 case EOF: longjmp (Exception, (int)ExceptionEOF); break;
290 case '(': token->type = TOKEN_OPEN_PAREN; break;
291 case ')': token->type = TOKEN_CLOSE_PAREN; break;
292 case ',': token->type = TOKEN_COMMA; break;
293 case '{': token->type = TOKEN_OPEN_CURLY; break;
294 case '}': token->type = TOKEN_CLOSE_CURLY; break;
295 case '[': token->type = TOKEN_OPEN_SQUARE; break;
296 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
297 case '*': token->type = TOKEN_STAR; break;
299 case '\'':
300 case '"':
301 token->type = TOKEN_STRING;
302 parseString (token->string, c);
303 token->lineNumber = getSourceLineNumber ();
304 token->filePosition = getInputFilePosition ();
305 break;
307 case '\\':
309 * All Tex tags start with a backslash.
310 * Check if the next character is an alpha character
311 * else it is not a potential tex tag.
313 c = fileGetc ();
314 if (! isalpha (c))
315 fileUngetc (c);
316 else
318 parseIdentifier (token->string, c);
319 token->lineNumber = getSourceLineNumber ();
320 token->filePosition = getInputFilePosition ();
321 token->keyword = analyzeToken (token->string, Lang_js);
322 if (isKeyword (token, KEYWORD_NONE))
323 token->type = TOKEN_IDENTIFIER;
324 else
325 token->type = TOKEN_KEYWORD;
327 break;
329 case '%':
330 fileSkipToCharacter ('\n'); /* % are single line comments */
331 goto getNextChar;
332 break;
334 default:
335 if (! isIdentChar (c))
336 token->type = TOKEN_UNDEFINED;
337 else
339 parseIdentifier (token->string, c);
340 token->lineNumber = getSourceLineNumber ();
341 token->filePosition = getInputFilePosition ();
342 token->type = TOKEN_IDENTIFIER;
344 break;
348 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
350 dest->lineNumber = src->lineNumber;
351 dest->filePosition = src->filePosition;
352 dest->type = src->type;
353 dest->keyword = src->keyword;
354 vStringCopy (dest->string, src->string);
355 vStringCopy (dest->scope, src->scope);
359 * Scanning functions
362 static boolean parseTag (tokenInfo *const token, texKind kind)
364 tokenInfo *const name = newToken ();
365 vString * fullname;
366 boolean useLongName = TRUE;
368 fullname = vStringNew ();
369 vStringClear (fullname);
372 * Tex tags are of these formats:
373 * \keyword{any number of words}
374 * \keyword[short desc]{any number of words}
375 * \keyword*[short desc]{any number of words}
377 * When a keyword is found, loop through all words within
378 * the curly braces for the tag name.
381 if (isType (token, TOKEN_KEYWORD))
383 copyToken (name, token);
384 readToken (token);
387 if (isType (token, TOKEN_OPEN_SQUARE))
389 useLongName = FALSE;
391 readToken (token);
392 while (! isType (token, TOKEN_CLOSE_SQUARE) )
394 if (isType (token, TOKEN_IDENTIFIER))
396 if (fullname->length > 0)
397 vStringCatS (fullname, " ");
398 vStringCatS (fullname, vStringValue (token->string));
400 readToken (token);
402 vStringTerminate (fullname);
403 vStringCopy (name->string, fullname);
404 makeTexTag (name, kind);
407 if (isType (token, TOKEN_STAR))
409 readToken (token);
412 if (isType (token, TOKEN_OPEN_CURLY))
414 readToken (token);
415 while (! isType (token, TOKEN_CLOSE_CURLY) )
417 /* if (isType (token, TOKEN_IDENTIFIER) && useLongName) */
418 if (useLongName)
420 if (fullname->length > 0)
421 vStringCatS (fullname, " ");
422 vStringCatS (fullname, vStringValue (token->string));
424 readToken (token);
426 if (useLongName)
428 vStringTerminate (fullname);
429 vStringCopy (name->string, fullname);
430 makeTexTag (name, kind);
434 deleteToken (name);
435 vStringDelete (fullname);
436 return TRUE;
439 static void parseTexFile (tokenInfo *const token)
443 readToken (token);
445 if (isType (token, TOKEN_KEYWORD))
447 switch (token->keyword)
449 case KEYWORD_chapter:
450 parseTag (token, TEXTAG_CHAPTER);
451 break;
452 case KEYWORD_section:
453 parseTag (token, TEXTAG_SECTION);
454 break;
455 case KEYWORD_subsection:
456 parseTag (token, TEXTAG_SUBSUBSECTION);
457 break;
458 case KEYWORD_subsubsection:
459 parseTag (token, TEXTAG_SUBSUBSECTION);
460 break;
461 case KEYWORD_part:
462 parseTag (token, TEXTAG_PART);
463 break;
464 case KEYWORD_paragraph:
465 parseTag (token, TEXTAG_PARAGRAPH);
466 break;
467 case KEYWORD_subparagraph:
468 parseTag (token, TEXTAG_SUBPARAGRAPH);
469 break;
470 case KEYWORD_include:
471 parseTag (token, TEXTAG_INCLUDE);
472 break;
473 default:
474 break;
477 } while (TRUE);
480 static void initialize (const langType language)
482 Assert (sizeof (TexKinds) / sizeof (TexKinds [0]) == TEXTAG_COUNT);
483 Lang_js = language;
484 buildTexKeywordHash ();
487 static void findTexTags (void)
489 tokenInfo *const token = newToken ();
490 exception_t exception;
492 exception = (exception_t) (setjmp (Exception));
493 while (exception == ExceptionNone)
494 parseTexFile (token);
496 deleteToken (token);
499 /* Create parser definition stucture */
500 extern parserDefinition* TexParser (void)
502 static const char *const extensions [] = { "tex", NULL };
503 parserDefinition *const def = parserNew ("Tex");
504 def->extensions = extensions;
506 * New definitions for parsing instead of regex
508 def->kinds = TexKinds;
509 def->kindCount = KIND_COUNT (TexKinds);
510 def->parser = findTexTags;
511 def->initialize = initialize;
513 return def;
515 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */