2 * $Id: tex.c 666 2008-05-15 17:47:31Z dfishburn $
4 * Copyright (c) 2008, David Fishburn
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License.
9 * This module contains functions for generating tags for TeX language files.
11 * Tex language reference:
12 * http://en.wikibooks.org/wiki/TeX#The_Structure_of_TeX
18 #include "general.h" /* must always come first */
19 #include <ctype.h> /* to define isalpha () */
36 #define isType(token,t) (boolean) ((token)->type == (t))
37 #define isKeyword(token,k) (boolean) ((token)->keyword == (k))
43 typedef enum eException
{ ExceptionNone
, ExceptionEOF
} exception_t
;
46 * Used to specify type of keyword.
48 typedef enum eKeywordId
{
53 KEYWORD_subsubsection
,
60 /* Used to determine whether keyword is valid for the token language and
63 typedef struct sKeywordDesc
{
68 typedef enum eTokenType
{
85 typedef struct sTokenInfo
{
90 unsigned long lineNumber
;
98 static langType Lang_js
;
100 static jmp_buf Exception
;
106 TEXTAG_SUBSUBSECTION
,
114 static kindOption TexKinds
[] = {
115 { TRUE
, 'c', "chapter", "chapters" },
116 { TRUE
, 's', "section", "sections" },
117 { TRUE
, 'u', "subsection", "subsections" },
118 { TRUE
, 'b', "subsubsection", "subsubsections" },
119 { TRUE
, 'p', "part", "parts" },
120 { TRUE
, 'P', "paragraph", "paragraphs" },
121 { TRUE
, 'G', "subparagraph", "subparagraphs" },
122 { TRUE
, 'i', "include", "includes" }
125 static const keywordDesc TexKeywordTable
[] = {
126 /* keyword keyword ID */
127 { "chapter", KEYWORD_chapter
},
128 { "section", KEYWORD_section
},
129 { "subsection", KEYWORD_subsection
},
130 { "subsubsection", KEYWORD_subsubsection
},
131 { "part", KEYWORD_part
},
132 { "paragraph", KEYWORD_paragraph
},
133 { "subparagraph", KEYWORD_subparagraph
},
134 { "include", KEYWORD_include
}
138 * FUNCTION DEFINITIONS
141 static boolean
isIdentChar (const int c
)
144 (isalpha (c
) || isdigit (c
) || c
== '$' ||
145 c
== '_' || c
== '#' || c
== '-' || c
== '.');
148 static void buildTexKeywordHash (void)
150 const size_t count
= sizeof (TexKeywordTable
) /
151 sizeof (TexKeywordTable
[0]);
153 for (i
= 0 ; i
< count
; ++i
)
155 const keywordDesc
* const p
= &TexKeywordTable
[i
];
156 addKeyword (p
->name
, Lang_js
, (int) p
->id
);
160 static tokenInfo
*newToken (void)
162 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
164 token
->type
= TOKEN_UNDEFINED
;
165 token
->keyword
= KEYWORD_NONE
;
166 token
->string
= vStringNew ();
167 token
->scope
= vStringNew ();
168 token
->lineNumber
= getSourceLineNumber ();
169 token
->filePosition
= getInputFilePosition ();
174 static void deleteToken (tokenInfo
*const token
)
176 vStringDelete (token
->string
);
177 vStringDelete (token
->scope
);
182 * Tag generation functions
185 static void makeConstTag (tokenInfo
*const token
, const texKind kind
)
187 if (TexKinds
[kind
].enabled
)
189 const char *const name
= vStringValue (token
->string
);
191 initTagEntry (&e
, name
);
193 e
.lineNumber
= token
->lineNumber
;
194 e
.filePosition
= token
->filePosition
;
195 e
.kindName
= TexKinds
[kind
].name
;
196 e
.kind
= TexKinds
[kind
].letter
;
202 static void makeTexTag (tokenInfo
*const token
, texKind kind
)
206 if (TexKinds
[kind
].enabled
)
209 * If a scope has been added to the token, change the token
210 * string to include the scope when making the tag.
212 if ( vStringLength (token
->scope
) > 0 )
214 fulltag
= vStringNew ();
215 vStringCopy (fulltag
, token
->scope
);
216 vStringCatS (fulltag
, ".");
217 vStringCatS (fulltag
, vStringValue (token
->string
));
218 vStringTerminate (fulltag
);
219 vStringCopy (token
->string
, fulltag
);
220 vStringDelete (fulltag
);
222 makeConstTag (token
, kind
);
230 static void parseString (vString
*const string
, const int delimiter
)
240 c
= fileGetc(); /* This maybe a ' or ". */
241 vStringPut (string
, c
);
243 else if (c
== delimiter
)
246 vStringPut (string
, c
);
248 vStringTerminate (string
);
252 * Read a C identifier beginning with "firstChar" and places it into
255 static void parseIdentifier (vString
*const string
, const int firstChar
)
258 Assert (isIdentChar (c
));
261 vStringPut (string
, c
);
263 } while (isIdentChar (c
));
265 vStringTerminate (string
);
267 fileUngetc (c
); /* unget non-identifier character */
270 static void readToken (tokenInfo
*const token
)
274 token
->type
= TOKEN_UNDEFINED
;
275 token
->keyword
= KEYWORD_NONE
;
276 vStringClear (token
->string
);
282 token
->lineNumber
= getSourceLineNumber ();
283 token
->filePosition
= getInputFilePosition ();
285 while (c
== '\t' || c
== ' ' || c
== '\n');
289 case EOF
: longjmp (Exception
, (int)ExceptionEOF
); break;
290 case '(': token
->type
= TOKEN_OPEN_PAREN
; break;
291 case ')': token
->type
= TOKEN_CLOSE_PAREN
; break;
292 case ',': token
->type
= TOKEN_COMMA
; break;
293 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
294 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
295 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
296 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
297 case '*': token
->type
= TOKEN_STAR
; break;
301 token
->type
= TOKEN_STRING
;
302 parseString (token
->string
, c
);
303 token
->lineNumber
= getSourceLineNumber ();
304 token
->filePosition
= getInputFilePosition ();
309 * All Tex tags start with a backslash.
310 * Check if the next character is an alpha character
311 * else it is not a potential tex tag.
318 parseIdentifier (token
->string
, c
);
319 token
->lineNumber
= getSourceLineNumber ();
320 token
->filePosition
= getInputFilePosition ();
321 token
->keyword
= analyzeToken (token
->string
, Lang_js
);
322 if (isKeyword (token
, KEYWORD_NONE
))
323 token
->type
= TOKEN_IDENTIFIER
;
325 token
->type
= TOKEN_KEYWORD
;
330 fileSkipToCharacter ('\n'); /* % are single line comments */
335 if (! isIdentChar (c
))
336 token
->type
= TOKEN_UNDEFINED
;
339 parseIdentifier (token
->string
, c
);
340 token
->lineNumber
= getSourceLineNumber ();
341 token
->filePosition
= getInputFilePosition ();
342 token
->type
= TOKEN_IDENTIFIER
;
348 static void copyToken (tokenInfo
*const dest
, tokenInfo
*const src
)
350 dest
->lineNumber
= src
->lineNumber
;
351 dest
->filePosition
= src
->filePosition
;
352 dest
->type
= src
->type
;
353 dest
->keyword
= src
->keyword
;
354 vStringCopy (dest
->string
, src
->string
);
355 vStringCopy (dest
->scope
, src
->scope
);
362 static boolean
parseTag (tokenInfo
*const token
, texKind kind
)
364 tokenInfo
*const name
= newToken ();
366 boolean useLongName
= TRUE
;
368 fullname
= vStringNew ();
369 vStringClear (fullname
);
372 * Tex tags are of these formats:
373 * \keyword{any number of words}
374 * \keyword[short desc]{any number of words}
375 * \keyword*[short desc]{any number of words}
377 * When a keyword is found, loop through all words within
378 * the curly braces for the tag name.
381 if (isType (token
, TOKEN_KEYWORD
))
383 copyToken (name
, token
);
387 if (isType (token
, TOKEN_OPEN_SQUARE
))
392 while (! isType (token
, TOKEN_CLOSE_SQUARE
) )
394 if (isType (token
, TOKEN_IDENTIFIER
))
396 if (fullname
->length
> 0)
397 vStringCatS (fullname
, " ");
398 vStringCatS (fullname
, vStringValue (token
->string
));
402 vStringTerminate (fullname
);
403 vStringCopy (name
->string
, fullname
);
404 makeTexTag (name
, kind
);
407 if (isType (token
, TOKEN_STAR
))
412 if (isType (token
, TOKEN_OPEN_CURLY
))
415 while (! isType (token
, TOKEN_CLOSE_CURLY
) )
417 /* if (isType (token, TOKEN_IDENTIFIER) && useLongName) */
420 if (fullname
->length
> 0)
421 vStringCatS (fullname
, " ");
422 vStringCatS (fullname
, vStringValue (token
->string
));
428 vStringTerminate (fullname
);
429 vStringCopy (name
->string
, fullname
);
430 makeTexTag (name
, kind
);
435 vStringDelete (fullname
);
439 static void parseTexFile (tokenInfo
*const token
)
445 if (isType (token
, TOKEN_KEYWORD
))
447 switch (token
->keyword
)
449 case KEYWORD_chapter
:
450 parseTag (token
, TEXTAG_CHAPTER
);
452 case KEYWORD_section
:
453 parseTag (token
, TEXTAG_SECTION
);
455 case KEYWORD_subsection
:
456 parseTag (token
, TEXTAG_SUBSUBSECTION
);
458 case KEYWORD_subsubsection
:
459 parseTag (token
, TEXTAG_SUBSUBSECTION
);
462 parseTag (token
, TEXTAG_PART
);
464 case KEYWORD_paragraph
:
465 parseTag (token
, TEXTAG_PARAGRAPH
);
467 case KEYWORD_subparagraph
:
468 parseTag (token
, TEXTAG_SUBPARAGRAPH
);
470 case KEYWORD_include
:
471 parseTag (token
, TEXTAG_INCLUDE
);
480 static void initialize (const langType language
)
482 Assert (sizeof (TexKinds
) / sizeof (TexKinds
[0]) == TEXTAG_COUNT
);
484 buildTexKeywordHash ();
487 static void findTexTags (void)
489 tokenInfo
*const token
= newToken ();
490 exception_t exception
;
492 exception
= (exception_t
) (setjmp (Exception
));
493 while (exception
== ExceptionNone
)
494 parseTexFile (token
);
499 /* Create parser definition stucture */
500 extern parserDefinition
* TexParser (void)
502 static const char *const extensions
[] = { "tex", NULL
};
503 parserDefinition
*const def
= parserNew ("Tex");
504 def
->extensions
= extensions
;
506 * New definitions for parsing instead of regex
508 def
->kinds
= TexKinds
;
509 def
->kindCount
= KIND_COUNT (TexKinds
);
510 def
->parser
= findTexTags
;
511 def
->initialize
= initialize
;
515 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */