Now also parse new diffs
[handlerosm.git] / osmparser.c
blobcd1e52574ea8b7d650a5f321e196ac2b5246a83a
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <sys/mman.h>
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include <fcntl.h>
7 #include <string.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <unistd.h>
13 * <osm>
14 * <node>
15 * <tag k=".." v=".." />
16 * </node>
17 * <way>
18 * <nd>
19 * <tag>
20 * </way>
21 * <relation>
22 * <member>
23 * <tag>
24 * </relation>
27 #define file_nodes "nodes.csv"
29 #ifdef BENCHMARK
30 #define file_nodes_uint "nodes_uint.csv"
31 #define file_nodes_gis "nodes_gis.csv"
32 #endif
34 #define file_node_tags "node_tags.csv"
35 #define file_ways "ways.csv"
36 #define file_way_tags "way_tags.csv"
37 #define file_way_nds "way_nds.csv"
38 #define file_relations "relations.csv"
39 #define file_relation_tags "relation_tags.csv"
40 #define file_relation_member_node "relation_member_node.csv"
41 #define file_relation_member_relation "relation_member_relation.csv"
42 #define file_relation_member_way "relation_member_way.csv"
44 unsigned int coordtouint(char *input) {
45 double maxbit = (double) 4294967296.0 / (double) 360.0;
46 double proper = strtod(input, NULL) * maxbit;
47 return (unsigned int) proper;
50 char * escape_string(char *instr)
52 unsigned int i, j=0, need = 0;
53 unsigned int len = strlen(instr);
54 char *outstr;
56 for (i=0;i<len;i++)
57 if (instr[i]=='\\' || instr[i]=='\'') need++;
59 len += need;
60 outstr = malloc(len + 1);
62 for (i=0;i<=strlen(instr);i++) {
63 if (instr[i]=='\\' || instr[i]=='\'')
64 outstr[j++]='\\';
65 outstr[j++]=instr[i];
67 return outstr;
71 static void parser(char *range, unsigned long int max) {
72 typedef enum { OSM = 0, NODE = 1, WAY = 2, RELATION = 3, TAG = 4, ND = 5, MEMBER = 6 } osm_state_t;
73 typedef enum { UNKNOWN = 0, ID, LAT, LON, USER, UID, TIMESTAMP, KEY, VALUE, TYPE, REF, ROLE} key_state_t;
74 char *attr_id = NULL, *attr_lat = NULL, *attr_lon = NULL, *attr_user = NULL, *attr_uid = NULL,
75 *attr_timestamp = NULL, *attr_key = NULL, *attr_value = NULL, *attr_type = NULL, *attr_ref = NULL,
76 *attr_role = NULL;
77 #ifdef BENCHMARK
78 unsigned int attr_lat_uint = 0;
79 unsigned int attr_lon_uint = 0;
80 #endif
82 FILE *fd_nodes = fopen(file_nodes, "w");
83 if (fd_nodes == NULL) { perror("Open:"); exit(-1); }
84 #ifdef BENCHMARK
85 FILE *fd_nodes_uint = fopen(file_nodes_uint, "w");
86 if (fd_nodes_uint == NULL) { perror("Open:"); exit(-1); }
87 FILE *fd_nodes_gis = fopen(file_nodes_gis, "w");
88 if (fd_nodes_gis == NULL) { perror("Open:"); exit(-1); }
89 #endif
90 FILE *fd_node_tags = fopen(file_node_tags, "w");
91 if (fd_node_tags == NULL) { perror("Open:"); exit(-1); }
92 FILE *fd_ways = fopen(file_ways, "w");
93 if (fd_ways == NULL) { perror("Open:"); exit(-1); }
94 FILE *fd_way_tags = fopen(file_way_tags, "w");
95 if (fd_way_tags == NULL) { perror("Open:"); exit(-1); }
96 FILE *fd_way_nds = fopen(file_way_nds, "w");
97 if (fd_way_nds == NULL) { perror("Open:"); exit(-1); }
98 FILE *fd_relations = fopen(file_relations, "w");
99 if (fd_relations == NULL) { perror("Open:"); exit(-1); }
100 FILE *fd_relation_tags = fopen(file_relation_tags, "w");
101 if (fd_relation_tags == NULL) { perror("Open:"); exit(-1); }
102 FILE *fd_members_node = fopen(file_relation_member_node, "w");
103 if (fd_members_node == NULL) { perror("Open:"); exit(-1); }
104 FILE *fd_members_relation = fopen(file_relation_member_relation, "w");
105 if (fd_members_relation == NULL) { perror("Open:"); exit(-1); }
106 FILE *fd_members_way = fopen(file_relation_member_way, "w");
107 if (fd_members_way == NULL) { perror("Open:"); exit(-1); }
109 unsigned long int count_nodes = 0, count_node_tags = 0,
110 count_ways = 0, count_way_tags = 0, count_way_nds = 0,
111 count_relations = 0, count_relation_tags = 0, count_members_node = 0, count_members_relation = 0, count_members_way = 0;
113 unsigned long int sequence = 0;
116 osm_state_t current_tag = OSM;
117 osm_state_t parent_tag = OSM;
119 char *start, *end, *nodename, *nodename_end;
121 start = range;
122 end = strchrnul((const char*) start, '\n');
124 if (strncmp(start, "<?xml", 5) != 0)
125 return;
127 start = end + 1;
128 end = strchrnul((const char*) start, '\n');
130 if (strncmp(start, "<osm", 4) != 0)
131 return;
133 start = end + 1;
135 do {
136 end = strchrnul((const char*) start, '\n');
138 nodename = strchrnul(start, '<') + 1;
139 nodename_end = strchrnul(nodename, ' ');
141 if (nodename[0] == '/') {
142 free(attr_id);
143 free(attr_lat);
144 free(attr_lon);
145 free(attr_timestamp);
146 free(attr_user);
147 free(attr_uid);
149 attr_id = attr_lat = attr_lon = attr_user = attr_uid = attr_timestamp = NULL;
151 sequence = 0;
153 start = end + 1;
154 continue;
157 switch (nodename_end - nodename) {
158 case 2:
159 current_tag = ND;
160 break;
161 case 3: {
162 switch (nodename[0]) {
163 case 'o':
164 current_tag = OSM;
165 break;
166 case 'w':
167 current_tag = WAY;
168 break;
169 case 't':
170 current_tag = TAG;
171 break;
172 default:
173 fprintf(stderr, "--> %c%c", nodename[0], nodename[1]);
175 break;
177 case 4:
178 current_tag = NODE;
179 break;
180 case 5:
181 /* BOUND */
182 start = end + 1;
183 continue;
184 case 6:
185 current_tag = MEMBER;
186 break;
187 case 8:
188 current_tag = RELATION;
189 break;
190 default:
191 fprintf(stderr, "--> %c%c", nodename[0], nodename[1]);
195 char *key, *key_end, *value_end;
196 key = nodename_end + 1;
198 do {
199 char *value;
200 key_state_t current_key = UNKNOWN;
201 key_end = strchrnul(key, '=');
203 if (key_end == NULL || key_end >= end)
204 break;
206 switch (key_end - key) {
207 case 1: {
208 switch (key[0]) {
209 case 'k':
210 current_key = KEY;
211 break;
212 case 'v':
213 current_key = VALUE;
214 break;
215 default:
216 current_key = UNKNOWN;
218 break;
220 case 2:
221 current_key = ID;
222 break;
223 case 3: {
224 switch (key[1]) {
225 case 'a':
226 current_key = LAT;
227 break;
228 case 'o':
229 current_key = LON;
230 break;
231 case 'e':
232 current_key = REF;
233 break;
234 case 'i':
235 current_key = UID;
236 break;
237 default:
238 current_key = UNKNOWN;
239 fprintf(stderr, "--> %c%c\n", key[0], key[1]);
241 break;
243 case 4: {
244 switch (key[0]) {
245 case 'u':
246 current_key = USER;
247 break;
248 case 'r':
249 current_key = ROLE;
250 break;
251 case 't':
252 current_key = TYPE;
253 break;
254 default:
255 current_key = UNKNOWN;
256 fprintf(stderr, "--> %c%c\n", key[0], key[1]);
258 break;
260 case 9:
261 current_key = TIMESTAMP;
262 break;
263 default: {
264 char *thingie = strndup(key, (key_end - key));
265 current_key = UNKNOWN;
267 fprintf(stderr, "UNKNOWN ATTR %s-> %c%c\n", thingie, key[0], key[1]);
268 free(thingie);
272 value = key_end + 2;
273 value_end = value;
274 value_end = strchr(value_end, '"');
276 if (value_end > end)
277 break;
279 switch (current_key) {
280 case ID:
281 if (attr_id) free(attr_id);
282 attr_id = strndup(value, (value_end - value));
283 break;
285 case LAT:
286 if (attr_lat) free(attr_lat);
287 attr_lat = strndup(value, (value_end - value));
288 #ifdef BENCHMARK
289 attr_lat_uint = coordtouint(attr_lat);
290 #endif
291 break;
293 case LON:
294 if (attr_lon) free(attr_lon);
295 attr_lon = strndup(value, (value_end - value));
296 #ifdef BENCHMARK
297 attr_lon_uint = coordtouint(attr_lon);
298 #endif
299 break;
301 case TIMESTAMP:
302 if (attr_timestamp) free(attr_timestamp);
303 attr_timestamp = strndup(value, (value_end - value));
304 // attr_timestamp[10] = ' '; /* Stupid timestamp fix */
305 break;
307 case USER: {
308 char *tmp;
309 if (attr_user) free(attr_user);
310 attr_user = strndup(value, (value_end - value));
311 tmp = escape_string(attr_user);
312 free(attr_user);
313 attr_user = tmp;
314 break;
317 case UID: {
318 if (attr_uid) free(attr_uid);
319 attr_uid = strndup(value, (value_end - value));
320 break;
323 case KEY: {
324 char *tmp;
325 if (attr_key) free(attr_key);
326 attr_key = strndup(value, (value_end - value));
327 tmp = escape_string(attr_key);
328 free(attr_key);
329 attr_key = tmp;
330 break;
333 case VALUE: {
334 char *tmp;
335 if (attr_value) free(attr_value);
336 attr_value = strndup(value, (value_end - value));
337 tmp = escape_string(attr_value);
338 free(attr_value);
339 attr_value = tmp;
340 break;
343 case TYPE:
344 if (attr_type) free(attr_type);
345 attr_type = strndup(value, (value_end - value));
346 break;
348 case REF:
349 if (attr_ref) free(attr_ref);
350 attr_ref = strndup(value, (value_end - value));
351 break;
353 case ROLE: {
354 char *tmp;
355 if (attr_role) free(attr_role);
356 attr_role = strndup(value, (value_end - value));
357 tmp = escape_string(attr_role);
358 free(attr_role);
359 attr_role = tmp;
360 break;
363 default:
364 fprintf(stderr, "--> %c%c\n", value[0], value[1]);
367 key = value_end + 2;
368 } while (key < end);
370 switch (current_tag) {
371 case NODE:
372 fprintf(fd_nodes, "%s, %s, %s, %s, '%s'\n", attr_id, attr_lat, attr_lon, (attr_uid != NULL ? attr_uid : 0), attr_timestamp);
373 #ifdef BENCHMARK
374 fprintf(fd_nodes_uint, "%s, %d, %d, %s, '%s'\n", attr_id, attr_lat_uint, attr_lon_uint, (attr_uid != NULL ? attr_uid : 0), attr_timestamp);
375 fprintf(fd_nodes_gis, "%s, 'POINT( %s %s )', %s, '%s'\n", attr_id, attr_lon, attr_lat, (attr_uid != NULL ? attr_uid : 0), attr_timestamp);
376 #endif
377 count_nodes++;
378 break;
379 case TAG: {
380 switch (parent_tag) {
381 case NODE:
382 fprintf(fd_node_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
383 count_node_tags++;
384 break;
385 case WAY:
386 fprintf(fd_way_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
387 count_way_tags++;
388 break;
389 case RELATION:
390 fprintf(fd_relation_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
391 count_relation_tags++;
392 break;
393 default:
394 break;
396 break;
398 case WAY:
399 fprintf(fd_ways, "%s, %s, '%s'\n", attr_id, (attr_uid != NULL ? attr_uid : 0), attr_timestamp);
400 count_ways++;
401 // fprintf(fd_way_tags, "%s, '%s', '%s'\n", attr_id, "type", "way");
402 // count_way_tags++;
403 break;
404 case RELATION:
405 fprintf(fd_relations, "%s, %s, '%s'\n", attr_id, (attr_uid != NULL ? attr_uid : 0), attr_timestamp);
406 count_relations++;
407 break;
408 case MEMBER:
409 if (strcmp(attr_type, "node") == 0) {
410 fprintf(fd_members_node, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
411 count_members_node++;
412 } else if (strcmp(attr_type, "way") == 0) {
413 fprintf(fd_members_way, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
414 count_members_way++;
415 } else if (strcmp(attr_type, "relation") == 0) {
416 fprintf(fd_members_relation, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
417 count_members_relation++;
419 sequence++;
420 break;
421 case ND:
422 fprintf(fd_way_nds, "%s, %lu, %s\n", attr_id, sequence, attr_ref);
423 sequence++;
424 count_way_nds++;
425 break;
426 default:
427 break;
430 if (end[-2] == '/') {
431 switch (current_tag) {
432 case NODE:
433 free(attr_lat);
434 free(attr_lon);
435 attr_lat = NULL;
436 attr_lon = NULL;
437 #ifdef BENCHMARK
438 attr_lat_uint = 0;
439 attr_lon_uint = 0;
440 #endif
441 /* no break! */
443 case WAY:
444 case RELATION:
445 free(attr_id);
446 free(attr_timestamp);
447 free(attr_user);
448 free(attr_uid);
450 attr_id = attr_user = attr_uid = attr_timestamp = NULL;
452 sequence = 0;
453 break;
455 case TAG:
456 free(attr_key);
457 free(attr_value);
459 attr_key = NULL;
460 attr_value = NULL;
461 break;
463 case ND:
464 case MEMBER:
465 free(attr_type);
466 free(attr_ref);
467 free(attr_role);
469 attr_type = NULL;
470 attr_ref = NULL;
471 attr_role = NULL;
472 default:
473 break;
475 } else if (current_tag == NODE || current_tag == WAY || current_tag == RELATION) {
476 parent_tag = current_tag;
479 } while ((start = ++end) < (range + max));
481 free(attr_id);
482 free(attr_lat);
483 free(attr_lon);
484 free(attr_timestamp);
485 free(attr_user);
486 free(attr_uid);
488 free(attr_key);
489 free(attr_value);
491 fclose(fd_nodes);
492 #ifdef BENCHMARK
493 fclose(fd_nodes_uint);
494 fclose(fd_nodes_gis);
495 #endif
496 fclose(fd_node_tags);
497 fclose(fd_ways);
498 fclose(fd_way_tags);
499 fclose(fd_way_nds);
500 fclose(fd_relations);
501 fclose(fd_relation_tags);
502 fclose(fd_members_node);
503 fclose(fd_members_relation);
504 fclose(fd_members_way);
506 char *current = get_current_dir_name();
508 printf("START TRANSACTION;\n");
510 printf("CREATE TABLE nodes_legacy (id integer, long double, lat double, uid integer, timestamp timestamptz);\n");
511 #ifdef BENCHMARK
512 printf("CREATE TABLE nodes_legacy_uint (id integer, long integer, lat integer, uid integer, timestamp timestamptz);\n");
513 printf("CREATE TABLE nodes_legacy_gis (id integer, poi point, uid integer, timestamp timestamptz);\n");
514 #endif
515 printf("CREATE TABLE node_tags (node integer, k varchar(255), v varchar(1024));\n");
516 printf("CREATE TABLE ways (id integer,uid integer, timestamp timestamptz);\n");
517 printf("CREATE TABLE way_tags (way integer, k varchar(255), v varchar(1024));\n");
518 printf("CREATE TABLE way_nds (way integer, idx integer, to_node integer);\n");
519 printf("CREATE TABLE relations(id integer, uid integer, timestamp timestamptz);\n");
520 printf("CREATE TABLE relation_members_node (relation integer, idx integer, to_node integer, role varchar(255));\n");
521 printf("CREATE TABLE relation_members_relation (relation integer, idx integer, to_relation integer, role varchar(255));\n");
522 printf("CREATE TABLE relation_members_way (relation integer, idx integer, to_way integer, role varchar(255));\n");
523 printf("CREATE TABLE relation_tags (relation integer, k varchar(255), v varchar(1024));\n");
525 printf("COPY %lu RECORDS INTO nodes_legacy from '%s/" file_nodes "' USING DELIMITERS ',', '\\n', '''';\n", count_nodes, current);
526 #ifdef BENCHMARK
527 printf("COPY %lu RECORDS INTO nodes_legacy_uint from '%s/" file_nodes_uint "' USING DELIMITERS ',', '\\n', '''';\n", count_nodes, current);
528 printf("COPY %lu RECORDS INTO nodes_legacy_gis from '%s/" file_nodes_gis "' USING DELIMITERS ',', '\\n', '''';\n", count_nodes, current);
529 #endif
530 printf("COPY %lu RECORDS INTO node_tags from '%s/" file_node_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_node_tags, current);
531 printf("COPY %lu RECORDS INTO ways from '%s/" file_ways "' USING DELIMITERS ',', '\\n', '''';\n", count_ways, current);
532 printf("COPY %lu RECORDS INTO way_tags from '%s/" file_way_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_way_tags, current);
533 printf("COPY %lu RECORDS INTO way_nds from '%s/" file_way_nds "' USING DELIMITERS ',', '\\n', '''';\n", count_way_nds, current);
534 printf("COPY %lu RECORDS INTO relations from '%s/" file_relations "' USING DELIMITERS ',', '\\n', '''';\n", count_relations, current);
535 printf("COPY %lu RECORDS INTO relation_tags from '%s/" file_relation_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_relation_tags, current);
536 printf("COPY %lu RECORDS INTO relation_members_node from '%s/" file_relation_member_node "' USING DELIMITERS ',', '\\n', '''';\n", count_members_node, current);
537 printf("COPY %lu RECORDS INTO relation_members_relation from '%s/" file_relation_member_relation "' USING DELIMITERS ',', '\\n', '''';\n", count_members_relation, current);
538 printf("COPY %lu RECORDS INTO relation_members_way from '%s/" file_relation_member_way "' USING DELIMITERS ',', '\\n', '''';\n", count_members_way, current);
540 printf("COMMIT;\n");
542 printf("START TRANSACTION;\n");
544 printf("CREATE SEQUENCE s_nodes AS INTEGER;\n");
545 printf("ALTER SEQUENCE s_nodes RESTART WITH (SELECT MAX(id) FROM nodes_legacy);\n");
546 printf("ALTER TABLE nodes_legacy ALTER COLUMN id SET NOT NULL;\n");
547 printf("ALTER TABLE nodes_legacy ALTER COLUMN id SET DEFAULT NEXT VALUE FOR \"sys\".\"s_nodes\";\n");
548 printf("ALTER TABLE nodes_legacy ADD CONSTRAINT pk_nodes_id PRIMARY KEY (id);\n");
550 printf("CREATE SEQUENCE s_ways AS INTEGER;\n");
551 printf("ALTER SEQUENCE s_ways RESTART WITH (SELECT MAX(id) FROM ways);\n");
552 printf("ALTER TABLE ways ALTER COLUMN id SET NOT NULL;\n");
553 printf("ALTER TABLE ways ALTER COLUMN id SET DEFAULT NEXT VALUE FOR \"sys\".\"s_ways\";\n");
554 printf("ALTER TABLE ways ADD CONSTRAINT pk_ways_id PRIMARY KEY (id);\n");
556 printf("CREATE SEQUENCE s_relations AS INTEGER;\n");
557 printf("ALTER SEQUENCE s_relations RESTART WITH (SELECT MAX(id) FROM relations);\n");
558 printf("ALTER TABLE relations ALTER COLUMN id SET NOT NULL;\n");
559 printf("ALTER TABLE relations ALTER COLUMN id SET DEFAULT NEXT VALUE FOR \"sys\".\"s_relations\";\n");
560 printf("ALTER TABLE relations ADD CONSTRAINT pk_relations_id PRIMARY KEY (id);\n");
562 printf("ALTER TABLE relation_members_node ADD CONSTRAINT pk_relation_members_node PRIMARY KEY (relation, idx);\n");
563 printf("ALTER TABLE relation_members_way ADD CONSTRAINT pk_relation_members_way PRIMARY KEY (relation,idx);\n");
564 printf("ALTER TABLE relation_members_relation ADD CONSTRAINT pk_relation_members_relation PRIMARY KEY (relation,idx);\n");
566 printf("COMMIT;\n");
569 printf("START TRANSACTION;\n");
571 printf("ALTER TABLE node_tags ADD CONSTRAINT pk_node_tags PRIMARY KEY (node, k);\n");
572 printf("ALTER TABLE node_tags ADD CONSTRAINT fk_node_tags_node FOREIGN KEY (node) REFERENCES nodes_legacy (id);\n");
574 printf("ALTER TABLE way_tags ADD CONSTRAINT pk_way_tags PRIMARY KEY (way, k);\n");
575 printf("ALTER TABLE way_tags ADD CONSTRAINT fk_way_tags_way FOREIGN KEY (way) REFERENCES ways (id);\n");
577 printf("ALTER TABLE way_nds ADD CONSTRAINT pk_way_nds PRIMARY KEY (way, idx);\n");
578 printf("ALTER TABLE way_nds ADD CONSTRAINT fk_way_nds_way FOREIGN KEY (way) REFERENCES ways (id);\n");
579 printf("ALTER TABLE way_nds ADD CONSTRAINT fk_way_nds_node FOREIGN KEY (to_node) REFERENCES nodes_legacy (id);\n");
581 printf("ALTER TABLE relation_tags ADD CONSTRAINT pk_relation_tags PRIMARY KEY (relation, k);\n");
582 printf("ALTER TABLE relation_tags ADD CONSTRAINT fk_relation_tags FOREIGN KEY (relation) REFERENCES relations (id);\n");
584 printf("ALTER TABLE relation_members_node ADD CONSTRAINT fk_relation_members_node FOREIGN KEY (relation) REFERENCES relations (id);\n");
585 printf("ALTER TABLE relation_members_node ADD CONSTRAINT fk_relation_members_tonode FOREIGN KEY (to_node) REFERENCES nodes_legacy (id);\n");
587 printf("ALTER TABLE relation_members_way ADD CONSTRAINT fk_relation_members_way FOREIGN KEY (relation) REFERENCES relations (id);\n");
588 printf("ALTER TABLE relation_members_way ADD CONSTRAINT fk_relation_members_toway FOREIGN KEY (to_way) REFERENCES ways (id);\n");
590 printf("ALTER TABLE relation_members_relation ADD CONSTRAINT fk_relation_members_relation FOREIGN KEY (relation) REFERENCES relations (id);\n");
591 printf("ALTER TABLE relation_members_relation ADD CONSTRAINT fk_relation_members_torelation FOREIGN KEY (to_relation) REFERENCES relations (id);\n");
593 printf("COMMIT;\n");
596 free(current);
601 int main(int argc, char *argv[]) {
602 int fd;
603 struct stat statbuf;
605 if (argc != 2)
606 exit(-1);
608 fprintf(stderr, "Analysing %s...\n", argv[1]);
610 fd = open(argv[1], O_RDONLY);
612 if (fd < 0)
613 exit(-1);
615 if (fstat (fd, &statbuf) == -1) { perror("fstat:"); exit(-1); }
617 if (statbuf.st_size > 0) {
618 char *range = NULL;
619 range = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fd, (off_t) 0);
620 if (range == MAP_FAILED) { perror("Mmap:"); printf("(did you compile PAE in the kernel?)\n"); exit(-1); }
621 parser(range, statbuf.st_size / sizeof(char));
622 munmap(range, statbuf.st_size);
625 close(fd);
626 exit(0);