1 #ifndef ANTLR3COLLECTIONS_H
2 #define ANTLR3COLLECTIONS_H
5 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
6 // http://www.temporal-wave.com
7 // http://www.linkedin.com/in/jimidle
9 // All rights reserved.
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 // 2. Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
19 // 3. The name of the author may not be used to endorse or promote products
20 // derived from this software without specific prior written permission.
22 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
27 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
31 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include <antlr3defs.h>
34 #include <antlr3bitset.h>
36 #define ANTLR3_HASH_TYPE_INT 0 /**< Indicates the hashed file has integer keys */
37 #define ANTLR3_HASH_TYPE_STR 1 /**< Indicates the hashed file has numeric keys */
43 typedef struct ANTLR3_HASH_KEY_struct
45 ANTLR3_UINT8 type
; /**< One of ##ANTLR3_HASH_TYPE_INT or ##ANTLR3_HASH_TYPE_STR */
49 pANTLR3_UINT8 sKey
; /**< Used if type is ANTLR3_HASH_TYPE_STR */
50 ANTLR3_INTKEY iKey
; /**< used if type is ANTLR3_HASH_TYPE_INT */
54 } ANTLR3_HASH_KEY
, *pANTLR3_HASH_KEY
;
56 /** Internal structure representing an element in a hash bucket.
57 * Stores the original key so that duplicate keys can be rejected
58 * if necessary, and contains function can be supported. If the hash key
59 * could be unique I would have invented the perfect compression algorithm ;-)
61 typedef struct ANTLR3_HASH_ENTRY_struct
63 /** Key that created this particular entry
65 ANTLR3_HASH_KEY keybase
;
67 /** Pointer to the data for this particular entry
71 /** Pointer to routine that knows how to release the memory
72 * structure pointed at by data. If this is NULL then we assume
73 * that the data pointer does not need to be freed when the entry
74 * is deleted from the table.
76 void (ANTLR3_CDECL
*free
)(void * data
);
78 /** Pointer to the next entry in this bucket if there
79 * is one. Sometimes different keys will hash to the same bucket (especially
80 * if the number of buckets is small). We could implement dual hashing algorithms
81 * to minimize this, but that seems over the top for what this is needed for.
83 struct ANTLR3_HASH_ENTRY_struct
* nextEntry
;
87 /** Internal structure of a hash table bucket, which tracks
88 * all keys that hash to the same bucket.
90 typedef struct ANTLR3_HASH_BUCKET_struct
92 /** Pointer to the first entry in the bucket (if any, it
93 * may be NULL). Duplicate entries are chained from
96 pANTLR3_HASH_ENTRY entries
;
101 /** Structure that tracks a hash table
103 typedef struct ANTLR3_HASH_TABLE_struct
105 /** Indicates whether the table allows duplicate keys
109 /** Number of buckets available in this table
111 ANTLR3_UINT32 modulo
;
113 /** Points to the memory where the array of buckets
116 pANTLR3_HASH_BUCKET buckets
;
118 /** How many elements currently exist in the table.
122 /** Whether the hash table should strdup the keys it is given or not.
124 ANTLR3_BOOLEAN doStrdup
;
126 /* String keyed hashtable functions */
127 void (*del
) (struct ANTLR3_HASH_TABLE_struct
* table
, void * key
);
128 pANTLR3_HASH_ENTRY (*remove
) (struct ANTLR3_HASH_TABLE_struct
* table
, void * key
);
130 /* Integer based hash functions */
131 void (*delI
) (struct ANTLR3_HASH_TABLE_struct
* table
, ANTLR3_INTKEY key
);
132 pANTLR3_HASH_ENTRY (*removeI
) (struct ANTLR3_HASH_TABLE_struct
* table
, ANTLR3_INTKEY key
);
137 ANTLR3_UINT32
hashSize(pANTLR3_HASH_TABLE table
);
138 void hashFree(pANTLR3_HASH_TABLE table
);
139 ANTLR3_INT32
hashPutI(pANTLR3_HASH_TABLE table
, ANTLR3_INTKEY key
, void * element
, void (ANTLR3_CDECL
*freeptr
)(void *));
140 ANTLR3_INT32
hashPut(pANTLR3_HASH_TABLE table
, void * key
, void * element
, void (ANTLR3_CDECL
*freeptr
)(void *));
141 void * hashGet(pANTLR3_HASH_TABLE table
, void * key
);
142 void * hashGetI(pANTLR3_HASH_TABLE table
, ANTLR3_INTKEY key
);
145 /** Internal structure representing an enumeration of a table.
146 * This is returned by antlr3Enumeration()
147 * Allows the programmer to traverse the table in hash order without
148 * knowing what is in the actual table.
150 * Note that it is up to the caller to ensure that the table
151 * structure does not change in the hash bucket that is currently being
152 * enumerated as this structure just tracks the next pointers in the
155 typedef struct ANTLR3_HASH_ENUM_struct
157 /* Pointer to the table we are enumerating
159 pANTLR3_HASH_TABLE table
;
161 /* Bucket we are currently enumerating (if NULL then we are done)
163 ANTLR3_UINT32 bucket
;
165 /* Next entry to return, if NULL, then move to next bucket if any
167 pANTLR3_HASH_ENTRY entry
;
171 int (*next
) (struct ANTLR3_HASH_ENUM_struct
* en
, pANTLR3_HASH_KEY
*key
, void ** data
);
172 void (*free
) (struct ANTLR3_HASH_ENUM_struct
* table
);
176 /** Structure that represents a LIST collection
178 typedef struct ANTLR3_LIST_struct
180 /** Hash table that is storing the list elements */
181 pANTLR3_HASH_TABLE table
;
185 void * listGet(pANTLR3_LIST list
, ANTLR3_INTKEY key
);
186 ANTLR3_INT32
listPut(pANTLR3_LIST list
, ANTLR3_INTKEY key
, void * element
, void (ANTLR3_CDECL
*freeptr
)(void *));
187 ANTLR3_INT32
listAdd(pANTLR3_LIST list
, void * element
, void (ANTLR3_CDECL
*freeptr
)(void *));
188 void * listRemove(pANTLR3_LIST list
, ANTLR3_INTKEY key
);
189 ANTLR3_UINT32
listSize(pANTLR3_LIST list
);
190 void listFree(pANTLR3_LIST list
);
191 void listDelete(pANTLR3_LIST list
, ANTLR3_INTKEY key
);
194 /** Structure that represents a Stack collection
196 typedef struct ANTLR3_STACK_struct
198 /** List that supports the stack structure */
199 pANTLR3_VECTOR vector
;
201 /** Used for quick access to the top of the stack */
207 void * stackGet(pANTLR3_STACK stack
, ANTLR3_INTKEY key
);
208 ANTLR3_BOOLEAN
stackPush(pANTLR3_STACK stack
, void * element
, void (ANTLR3_CDECL
*freeptr
)(void *));
209 void * stackPop(pANTLR3_STACK stack
);
210 void stackFree(pANTLR3_STACK stack
);
211 void * stackPeek(pANTLR3_STACK stack
);
214 /* Structure that represents a vector element
216 typedef struct ANTLR3_VECTOR_ELEMENT_struct
219 void (ANTLR3_CDECL
*freeptr
)(void *);
221 ANTLR3_VECTOR_ELEMENT
, *pANTLR3_VECTOR_ELEMENT
;
223 #define ANTLR3_VECTOR_INTERNAL_SIZE 16
224 /* Structure that represents a vector collection. A vector is a simple list
225 * that contains a pointer to the element and a pointer to a function that
226 * that can free the element if it is removed. It auto resizes but does not
227 * use hash techniques as it is referenced by a simple numeric index. It is not a
228 * sparse list, so if any element is deleted, then the ones following are moved
229 * down in memory and the count is adjusted.
231 typedef struct ANTLR3_VECTOR_struct
233 /** Array of pointers to vector elements */
234 pANTLR3_VECTOR_ELEMENT elements
;
236 /** Number of entries currently in the list; */
239 /** Many times, a vector holds just a few nodes in an AST and it
240 * is too much overhead to malloc the space for elements so
241 * at the expense of a few bytes of memory, we hold the first
242 * few elements internally. It means we must copy them when
243 * we grow beyond this initial size, but that is less overhead than
244 * the malloc/free callas we would otherwise require.
246 ANTLR3_VECTOR_ELEMENT internal
[ANTLR3_VECTOR_INTERNAL_SIZE
];
248 /** Indicates if the structure was made by a factory, in which
249 * case only the factory can free the memory for the actual vector,
250 * though the vector free function is called and will recurse through its
251 * entries calling any free pointers for each entry.
253 ANTLR3_BOOLEAN factoryMade
;
255 /** Total number of entries in elements at any point in time */
256 ANTLR3_UINT32 elementsSize
;
262 ANTLR3_UINT32
vectorAdd(pANTLR3_VECTOR vector
, void * element
, void (ANTLR3_CDECL
*freeptr
)(void *));
263 void vectorDel(pANTLR3_VECTOR vector
, ANTLR3_UINT32 entry
);
264 void ANTLR3_CDECL
vectorFree(pANTLR3_VECTOR vector
);
265 void * vectorGet(pANTLR3_VECTOR vector
, ANTLR3_UINT32 entry
);
266 void * vectorRemove(pANTLR3_VECTOR vector
, ANTLR3_UINT32 entry
);
267 void vectorClear(pANTLR3_VECTOR vector
);
268 ANTLR3_UINT32
vectorSet(pANTLR3_VECTOR vector
, ANTLR3_UINT32 entry
, void * element
,
269 void (ANTLR3_CDECL
*freeptr
)(void *), ANTLR3_BOOLEAN freeExisting
);
270 ANTLR3_BOOLEAN
vectorSwap(pANTLR3_VECTOR vector
, ANTLR3_UINT32 entry1
, ANTLR3_UINT32 entry2
);
272 /** Default vector pool size if otherwise unspecified
274 #define ANTLR3_FACTORY_VPOOL_SIZE 512
276 /** Structure that tracks vectors in a vector and auto deletes the vectors
277 * in the vector factory when closed.
279 typedef struct ANTLR3_VECTOR_FACTORY_struct
282 /** List of all vector pools allocated so far
284 pANTLR3_VECTOR
*pools
;
286 /** Count of the vector pools allocated so far (current active pool)
288 ANTLR3_INT32 thisPool
;
290 /** The next vector available in the pool
292 ANTLR3_UINT32 nextVector
;
294 /** Trick to quickly initialize a new vector via memcpy and not a function call
296 ANTLR3_VECTOR unTruc
;
298 /** Consumers from the factory can release a factory produced vector
299 * back to the factory so that it may be reused (and thus conserve memory)
300 * by another caller. The available vectors are stored here. Note that
301 * the only vectors avaible in the free chain are produced by this factory, so they
302 * need not be explicitly freed when the factory is closed.
304 pANTLR3_STACK freeStack
;
306 /** Function to close the vector factory
308 void (*close
) (struct ANTLR3_VECTOR_FACTORY_struct
* factory
);
310 /** Function to supply a new vector
312 pANTLR3_VECTOR (*newVector
) (struct ANTLR3_VECTOR_FACTORY_struct
* factory
);
314 /// Function to return a vector to the factory for reuse
316 void (*returnVector
) (struct ANTLR3_VECTOR_FACTORY_struct
* factory
, pANTLR3_VECTOR vector
);
319 ANTLR3_VECTOR_FACTORY
;
322 /* -------------- TRIE Interfaces ---------------- */
325 /** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE
327 typedef struct ANTLR3_TRIE_ENTRY_struct
330 void (ANTLR3_CDECL
*freeptr
)(void *);
333 ANTLR3_INTKEY intVal
;
337 struct ANTLR3_TRIE_ENTRY_struct
* next
; /* Allows duplicate entries for same key in insertion order */
339 ANTLR3_TRIE_ENTRY
, * pANTLR3_TRIE_ENTRY
;
342 /** Structure that defines an element/node in an ANTLR3_INT_TRIE
344 typedef struct ANTLR3_INT_TRIE_NODE_struct
346 ANTLR3_UINT32 bitNum
; /**< This is the left/right bit index for traversal along the nodes */
347 ANTLR3_INTKEY key
; /**< This is the actual key that the entry represents if it is a terminal node */
348 pANTLR3_TRIE_ENTRY buckets
; /**< This is the data bucket(s) that the key indexes, which may be NULL */
349 struct ANTLR3_INT_TRIE_NODE_struct
* leftN
; /**< Pointer to the left node from here when sKey & bitNum = 0 */
350 struct ANTLR3_INT_TRIE_NODE_struct
* rightN
; /**< Pointer to the right node from here when sKey & bitNum, = 1 */
352 ANTLR3_INT_TRIE_NODE
, * pANTLR3_INT_TRIE_NODE
;
354 /** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation,
355 * as you might expect, the key is turned into a "string" by looking at bit(key, depth)
356 * of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63)
357 * and potentially a huge trie. This is the algorithm for a Patricia Trie.
358 * Note also that this trie [can] accept multiple entries for the same key and is
359 * therefore a kind of elastic bucket patricia trie.
361 * If you find this code useful, please feel free to 'steal' it for any purpose
362 * as covered by the BSD license under which ANTLR is issued. You can cut the code
363 * but as the ANTLR library is only about 50K (Windows Vista), you might find it
364 * easier to just link the library. Please keep all comments and licenses and so on
365 * in any version of this you create of course.
370 typedef struct ANTLR3_INT_TRIE_struct
372 pANTLR3_INT_TRIE_NODE root
; /* Root node of this integer trie */
373 pANTLR3_INT_TRIE_NODE current
; /* Used to traverse the TRIE with the next() method */
374 ANTLR3_UINT32 count
; /* Current entry count */
375 ANTLR3_BOOLEAN allowDups
; /* Whether this trie accepts duplicate keys */
378 pANTLR3_TRIE_ENTRY (*get
) (struct ANTLR3_INT_TRIE_struct
* trie
, ANTLR3_INTKEY key
);
379 ANTLR3_BOOLEAN (*del
) (struct ANTLR3_INT_TRIE_struct
* trie
, ANTLR3_INTKEY key
);
380 ANTLR3_BOOLEAN (*add
) (struct ANTLR3_INT_TRIE_struct
* trie
, ANTLR3_INTKEY key
, ANTLR3_UINT32 type
, ANTLR3_INTKEY intVal
, void * data
, void (ANTLR3_CDECL
*freeptr
)(void *));
381 void (*free
) (struct ANTLR3_INT_TRIE_struct
* trie
);
387 * A topological sort system that given a set of dependencies of a node m on node n,
388 * can sort them in dependency order. This is a generally useful utility object
389 * that does not care what the things are it is sorting. Generally the set
390 * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR.
391 * I have provided a sort method that given ANTLR3_VECTOR as an input will sort
392 * the vector entries in place, as well as a sort method that just returns an
393 * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but
394 * some set of your own device.
396 * Of the two main algorithms that could be used, I chose to use the depth first
397 * search for unvisited nodes as a) This runs in linear time, and b) it is what
398 * we used in the ANTLR Tool to perform a topological sort of the input grammar files
399 * based on their dependencies.
401 typedef struct ANTLR3_TOPO_struct
404 * A vector of vectors of edges, built by calling the addEdge method()
405 * to indicate that node number n depends on node number m. Each entry in the vector
406 * contains a bitset, which has a bit index set for each node upon which the
407 * entry node depends.
409 pANTLR3_BITSET
* edges
;
412 * A vector used to build up the sorted output order. Note that
413 * as the vector contains UINT32 then the maximum node index is
414 * 'limited' to 2^32, as nodes should be zero based.
416 pANTLR3_UINT32 sorted
;
419 * A vector used to detect cycles in the edge dependecies. It is used
420 * as a stack and each time we descend a node to one of its edges we
421 * add the node into this stack. If we find a node that we have already
422 * visited in the stack, then it means there wasa cycle such as 9->8->1->9
423 * as the only way a node can be on the stack is if we are currently
424 * descnding from it as we remove it from the stack as we exit from
425 * descending its dependencies
427 pANTLR3_UINT32 cycle
;
430 * A flag that indicates the algorithm found a cycle in the edges
432 * If this flag is set after you have called one of the sort routines
433 * then the detected cycle will be contained in the cycle array and
434 * cycleLimit will point to the one after the last entry in the cycle.
436 ANTLR3_BOOLEAN hasCycle
;
439 * A watermark used to accumulate potential cycles in the cycle array.
440 * This should be zero when we are done. Check hasCycle after calling one
441 * of the sort methods and if it is ANTLR3_TRUE then you can find the cycle
442 * in cycle[0]...cycle[cycleMark-1]
444 ANTLR3_UINT32 cycleMark
;
447 * One more than the largest node index that is contained in edges/sorted.
452 * The set of visited nodes as determined by a set entry in
455 pANTLR3_BITSET visited
;
458 * A method that adds an edge from one node to another. An edge
459 * of n -> m indicates that node n is dependent on node m. Note that
460 * while building these edges, it is perfectly OK to add nodes out of
461 * sequence. So, if you have edges:
467 * The you can add them in that order and so add node 3 before nodes 2 and 1
470 void (*addEdge
) (struct ANTLR3_TOPO_struct
* topo
, ANTLR3_UINT32 edge
, ANTLR3_UINT32 dependency
);
474 * A method that returns a pointer to an array of sorted node indexes.
475 * The array is sorted in topological sorted order. Note that the array
476 * is only as large as the largest node index you created an edge for. This means
477 * that if you had an input of 32 nodes, but that largest node with an edge
478 * was 16, then the returned array will be the sorted order of the first 16
479 * nodes and the last 16 nodes of your array are basically fine as they are
480 * as they had no dependencies and do not need any particular sort order.
482 * NB: If the structure that contains the array is freed, then the sorted
483 * array will be freed too so you should use the value of limit to
484 * make a long term copy of this array if you do not want to keep the topo
485 * structure around as well.
487 pANTLR3_UINT32 (*sortToArray
) (struct ANTLR3_TOPO_struct
* topo
);
490 * A method that sorts the supplied ANTLR3_VECTOR in place based
491 * on the previously supplied edge data.
493 void (*sortVector
) (struct ANTLR3_TOPO_struct
* topo
, pANTLR3_VECTOR v
);
496 * A method to free this structure and any associated memory.
498 void (*free
) (struct ANTLR3_TOPO_struct
* topo
);