Osmsucker is the streaming equivalent of osmparser. Pipe something in
[handlerosm.git] / osmsucker.c
blob57b592b207306d2ba0af60a5848956a845828dde
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <sys/mman.h>
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include <fcntl.h>
7 #include <string.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <unistd.h>
13 * <osm>
14 * <node>
15 * <tag k=".." v=".." />
16 * </node>
17 * <way>
18 * <nd>
19 * <tag>
20 * </way>
21 * <relation>
22 * <member>
23 * <tag>
24 * </relation>
27 #define nextline free(start); start = NULL; tmp = getline(&start, &tmplen, stdin); if (tmp == -1) { goto exit; }
28 //#define nextline start = end + 1
31 #define file_nodes "nodes.csv"
33 #ifdef BENCHMARK
34 #define file_nodes_uint "nodes_uint.csv"
35 #define file_nodes_gis "nodes_gis.csv"
36 #endif
38 #define file_node_tags "node_tags.csv"
39 #define file_ways "ways.csv"
40 #define file_way_tags "way_tags.csv"
41 #define file_way_nds "way_nds.csv"
42 #define file_relations "relations.csv"
43 #define file_relation_tags "relation_tags.csv"
44 #define file_relation_member_node "relation_member_node.csv"
45 #define file_relation_member_relation "relation_member_relation.csv"
46 #define file_relation_member_way "relation_member_way.csv"
48 unsigned int coordtouint(char *input) {
49 double maxbit = (double) 4294967296.0 / (double) 360.0;
50 double proper = strtod(input, NULL) * maxbit;
51 return (unsigned int) proper;
54 char * escape_string(char *instr)
56 unsigned int i, j=0, need = 0;
57 unsigned int len = strlen(instr);
58 char *outstr;
60 for (i=0;i<len;i++)
61 if (instr[i]=='\\' || instr[i]=='\'') need++;
63 len += need;
64 outstr = malloc(len + 1);
66 for (i=0;i<=strlen(instr);i++) {
67 if (instr[i]=='\\' || instr[i]=='\'')
68 outstr[j++]='\\';
69 outstr[j++]=instr[i];
71 return outstr;
75 static void parser() {
76 typedef enum { OSM = 0, NODE = 1, WAY = 2, RELATION = 3, TAG = 4, ND = 5, MEMBER = 6 } osm_state_t;
77 typedef enum { UNKNOWN = 0, ID, LAT, LON, USER, UID, TIMESTAMP, KEY, VALUE, TYPE, REF, ROLE} key_state_t;
78 char *attr_id = NULL, *attr_lat = NULL, *attr_lon = NULL, *attr_user = NULL, *attr_uid = NULL, *attr_timestamp = NULL, *attr_key = NULL, *attr_value = NULL,
79 *attr_type = NULL, *attr_ref = NULL, *attr_role = NULL;
81 unsigned int attr_lat_uint = 0;
82 unsigned int attr_lon_uint = 0;
84 FILE *fd_nodes = fopen(file_nodes, "w");
85 if (fd_nodes == NULL) { perror("Open:"); exit(-1); }
86 #ifdef BENCHMARK
87 FILE *fd_nodes_uint = fopen(file_nodes_uint, "w");
88 if (fd_nodes_uint == NULL) { perror("Open:"); exit(-1); }
89 FILE *fd_nodes_gis = fopen(file_nodes_gis, "w");
90 if (fd_nodes_gis == NULL) { perror("Open:"); exit(-1); }
91 #endif
92 FILE *fd_node_tags = fopen(file_node_tags, "w");
93 if (fd_node_tags == NULL) { perror("Open:"); exit(-1); }
94 FILE *fd_ways = fopen(file_ways, "w");
95 if (fd_ways == NULL) { perror("Open:"); exit(-1); }
96 FILE *fd_way_tags = fopen(file_way_tags, "w");
97 if (fd_way_tags == NULL) { perror("Open:"); exit(-1); }
98 FILE *fd_way_nds = fopen(file_way_nds, "w");
99 if (fd_way_nds == NULL) { perror("Open:"); exit(-1); }
100 FILE *fd_relations = fopen(file_relations, "w");
101 if (fd_relations == NULL) { perror("Open:"); exit(-1); }
102 FILE *fd_relation_tags = fopen(file_relation_tags, "w");
103 if (fd_relation_tags == NULL) { perror("Open:"); exit(-1); }
104 FILE *fd_members_node = fopen(file_relation_member_node, "w");
105 if (fd_members_node == NULL) { perror("Open:"); exit(-1); }
106 FILE *fd_members_relation = fopen(file_relation_member_relation, "w");
107 if (fd_members_relation == NULL) { perror("Open:"); exit(-1); }
108 FILE *fd_members_way = fopen(file_relation_member_way, "w");
109 if (fd_members_way == NULL) { perror("Open:"); exit(-1); }
111 unsigned long int count_nodes = 0, count_node_tags = 0,
112 count_ways = 0, count_way_tags = 0, count_way_nds = 0,
113 count_relations = 0, count_relation_tags = 0, count_members_node = 0, count_members_relation = 0, count_members_way = 0;
115 unsigned long int sequence = 0;
118 osm_state_t current_tag = OSM;
119 osm_state_t parent_tag = OSM;
121 char *start = NULL, *end, *nodename, *nodename_end;
122 ssize_t tmp;
123 size_t tmplen = 0;
125 nextline;
126 end = strchrnul((const char*) start, '\n');
128 if (strncmp(start, "<?xml", 5) != 0)
129 return;
131 nextline;
132 end = strchrnul((const char*) start, '\n');
134 if (strncmp(start, "<osm", 4) != 0)
135 return;
137 nextline;
139 do {
140 end = strchrnul((const char*) start, '\n');
142 nodename = strchrnul(start, '<') + 1;
143 nodename_end = strchrnul(nodename, ' ');
145 if (nodename[0] == '/') {
146 free(attr_id);
147 free(attr_lat);
148 free(attr_lon);
149 free(attr_timestamp);
150 free(attr_user);
151 free(attr_uid);
153 attr_id = attr_lat = attr_lon = attr_user = attr_uid = attr_timestamp = NULL;
155 sequence = 0;
157 nextline;
158 continue;
161 switch (nodename_end - nodename) {
162 case 2:
163 current_tag = ND;
164 break;
165 case 3: {
166 switch (nodename[0]) {
167 case 'o':
168 current_tag = OSM;
169 break;
170 case 'w':
171 current_tag = WAY;
172 break;
173 case 't':
174 current_tag = TAG;
175 break;
176 default:
177 fprintf(stderr, "--> %c%c", nodename[0], nodename[1]);
179 break;
181 case 4:
182 current_tag = NODE;
183 break;
184 case 5:
185 nextline;
186 continue;
187 case 6:
188 current_tag = MEMBER;
189 break;
190 case 8:
191 current_tag = RELATION;
192 break;
193 default:
194 fprintf(stderr, "--> %c%c", nodename[0], nodename[1]);
198 char *key, *key_end, *value_end;
199 key = nodename_end + 1;
201 do {
202 char *value;
203 key_state_t current_key = UNKNOWN;
204 key_end = strchrnul(key, '=');
206 if (key_end == NULL || key_end >= end)
207 break;
209 switch (key_end - key) {
210 case 1: {
211 switch (key[0]) {
212 case 'k':
213 current_key = KEY;
214 break;
215 case 'v':
216 current_key = VALUE;
217 break;
218 default:
219 current_key = UNKNOWN;
221 break;
223 case 2:
224 current_key = ID;
225 break;
226 case 3: {
227 switch (key[1]) {
228 case 'a':
229 current_key = LAT;
230 break;
231 case 'o':
232 current_key = LON;
233 break;
234 case 'e':
235 current_key = REF;
236 break;
237 case 'i':
238 current_key = UID;
239 break;
240 default:
241 current_key = UNKNOWN;
242 fprintf(stderr, "--> %c%c\n", key[0], key[1]);
244 break;
246 case 4: {
247 switch (key[0]) {
248 case 'u':
249 current_key = USER;
250 break;
251 case 'r':
252 current_key = ROLE;
253 break;
254 case 't':
255 current_key = TYPE;
256 break;
257 default:
258 current_key = UNKNOWN;
259 fprintf(stderr, "--> %c%c\n", key[0], key[1]);
261 break;
263 case 9:
264 current_key = TIMESTAMP;
265 break;
266 default: {
267 char *thingie = strndup(key, (key_end - key));
268 current_key = UNKNOWN;
270 fprintf(stderr, "UNKNOWN ATTR %s-> %c%c\n", thingie, key[0], key[1]);
271 free(thingie);
275 value = key_end + 2;
276 value_end = value;
277 value_end = strchr(value_end, '"');
279 if (value_end > end)
280 break;
282 switch (current_key) {
283 case ID:
284 if (attr_id) free(attr_id);
285 attr_id = strndup(value, (value_end - value));
286 break;
288 case LAT:
289 if (attr_lat) free(attr_lat);
290 attr_lat = strndup(value, (value_end - value));
291 attr_lat_uint = coordtouint(attr_lat);
292 break;
294 case LON:
295 if (attr_lon) free(attr_lon);
296 attr_lon = strndup(value, (value_end - value));
297 attr_lon_uint = coordtouint(attr_lon);
298 break;
300 case TIMESTAMP:
301 if (attr_timestamp) free(attr_timestamp);
302 // attr_timestamp = strndup(value, (value_end - value));
303 attr_timestamp = strndup(value, (value_end - (value + 1))); /* another stupid fix */
304 // attr_timestamp[10] = ' '; /* Stupid timestamp fix */
305 break;
307 case USER: {
308 char *tmp;
309 if (attr_user) free(attr_user);
310 attr_user = strndup(value, (value_end - value));
311 tmp = escape_string(attr_user);
312 free(attr_user);
313 attr_user = tmp;
314 break;
317 case UID: {
318 if (attr_uid) free(attr_uid);
319 attr_uid = strndup(value, (value_end - value));
320 break;
323 case KEY: {
324 char *tmp;
325 if (attr_key) free(attr_key);
326 attr_key = strndup(value, (value_end - value));
327 tmp = escape_string(attr_key);
328 free(attr_key);
329 attr_key = tmp;
330 break;
333 case VALUE: {
334 char *tmp;
335 if (attr_value) free(attr_value);
336 attr_value = strndup(value, (value_end - value));
337 tmp = escape_string(attr_value);
338 free(attr_value);
339 attr_value = tmp;
340 break;
343 case TYPE:
344 if (attr_type) free(attr_type);
345 attr_type = strndup(value, (value_end - value));
346 break;
348 case REF:
349 if (attr_ref) free(attr_ref);
350 attr_ref = strndup(value, (value_end - value));
351 break;
353 case ROLE: {
354 char *tmp;
355 if (attr_role) free(attr_role);
356 attr_role = strndup(value, (value_end - value));
357 tmp = escape_string(attr_role);
358 free(attr_role);
359 attr_role = tmp;
360 break;
363 default:
364 fprintf(stderr, "--> %c%c\n", value[0], value[1]);
367 key = value_end + 2;
368 } while (key < end);
370 switch (current_tag) {
371 case NODE:
372 fprintf(fd_nodes, "%s, %s, %s, '%s', '%s'\n", attr_id, attr_lat, attr_lon, attr_uid, attr_timestamp);
373 #ifdef BENCHMARK
374 fprintf(fd_nodes_uint, "%s, %d, %d, '%s', '%s'\n", attr_id, attr_lat_uint, attr_lon_uint, attr_uid, attr_timestamp);
375 fprintf(fd_nodes_gis, "%s, 'POINT( %s %s )', '%s', '%s'\n", attr_id, attr_lon, attr_lat, attr_uid, attr_timestamp);
376 #endif
377 count_nodes++;
378 break;
379 case TAG: {
380 switch (parent_tag) {
381 case NODE:
382 fprintf(fd_node_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
383 count_node_tags++;
384 break;
385 case WAY:
386 fprintf(fd_way_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
387 count_way_tags++;
388 break;
389 case RELATION:
390 fprintf(fd_relation_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
391 count_relation_tags++;
392 break;
393 default:
394 break;
396 break;
398 case WAY:
399 fprintf(fd_ways, "%s, '%s', '%s'\n", attr_id, attr_uid, attr_timestamp);
400 count_ways++;
401 // fprintf(fd_way_tags, "%s, '%s', '%s'\n", attr_id, "type", "way");
402 // count_way_tags++;
403 break;
404 case RELATION:
405 fprintf(fd_relations, "%s, '%s', '%s'\n", attr_id, attr_uid, attr_timestamp);
406 count_relations++;
407 break;
408 case MEMBER:
409 if (strcmp(attr_type, "node") == 0) {
410 fprintf(fd_members_node, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
411 count_members_node++;
412 } else if (strcmp(attr_type, "way") == 0) {
413 fprintf(fd_members_way, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
414 count_members_way++;
415 } else if (strcmp(attr_type, "relation") == 0) {
416 fprintf(fd_members_relation, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
417 count_members_relation++;
419 sequence++;
420 break;
421 case ND:
422 fprintf(fd_way_nds, "%s, %lu, %s\n", attr_id, sequence, attr_ref);
423 sequence++;
424 count_way_nds++;
425 break;
426 default:
427 break;
430 if (end[-2] == '/') {
431 switch (current_tag) {
432 case NODE:
433 free(attr_lat);
434 free(attr_lon);
435 attr_lat = NULL;
436 attr_lon = NULL;
437 attr_lat_uint = 0;
438 attr_lon_uint = 0;
439 /* no break! */
441 case WAY:
442 case RELATION:
443 free(attr_id);
444 free(attr_timestamp);
445 free(attr_user);
446 free(attr_uid);
448 attr_id = attr_user = attr_uid = attr_timestamp = NULL;
450 sequence = 0;
451 break;
453 case TAG:
454 free(attr_key);
455 free(attr_value);
457 attr_key = NULL;
458 attr_value = NULL;
459 break;
461 case ND:
462 case MEMBER:
463 free(attr_type);
464 free(attr_ref);
465 free(attr_role);
467 attr_type = NULL;
468 attr_ref = NULL;
469 attr_role = NULL;
470 default:
471 break;
473 } else if (current_tag == NODE || current_tag == WAY || current_tag == RELATION) {
474 parent_tag = current_tag;
478 nextline;
479 // } while ((start = ++end) < (range + max));
480 } while (1);
481 exit:
483 free(attr_id);
484 free(attr_lat);
485 free(attr_lon);
486 free(attr_timestamp);
487 free(attr_user);
488 free(attr_uid);
490 free(attr_key);
491 free(attr_value);
493 fclose(fd_nodes);
494 #ifdef BENCHMARK
495 fclose(fd_nodes_uint);
496 fclose(fd_nodes_gis);
497 #endif
498 fclose(fd_node_tags);
499 fclose(fd_ways);
500 fclose(fd_way_tags);
501 fclose(fd_way_nds);
502 fclose(fd_relations);
503 fclose(fd_relation_tags);
504 fclose(fd_members_node);
505 fclose(fd_members_relation);
507 char *current = get_current_dir_name();
509 printf("START TRANSACTION;\n");
511 printf("CREATE TABLE nodes_legacy (id integer, long double, lat double, uid long, timestamp timestamptz);\n");
512 #ifdef BENCHMARK
513 printf("CREATE TABLE nodes_legacy_uint (id integer, long integer, lat integer, uid long, timestamp timestamptz);\n");
514 printf("CREATE TABLE nodes_legacy_gis (id integer, poi point, uid long, timestamp timestamptz);\n");
515 #endif
516 printf("CREATE TABLE node_tags (node integer, k varchar(255), v varchar(1024));\n");
517 printf("CREATE TABLE ways (id integer,uid long, timestamp timestamptz);\n");
518 printf("CREATE TABLE way_tags (way integer, k varchar(255), v varchar(1024));\n");
519 printf("CREATE TABLE way_nds (way integer, idx integer, to_node integer);\n");
520 printf("CREATE TABLE relations(id integer, uid long, timestamp timestamptz);\n");
521 printf("CREATE TABLE relation_members_node (relation integer, idx integer, to_node integer, role varchar(255));\n");
522 printf("CREATE TABLE relation_members_relation (relation integer, idx integer, to_relation integer, role varchar(255));\n");
523 printf("CREATE TABLE relation_members_way (relation integer, idx integer, to_way integer, role varchar(255));\n");
524 printf("CREATE TABLE relation_tags (relation integer, k varchar(255), v varchar(1024));\n");
526 printf("COPY %lu RECORDS INTO nodes_legacy from '%s/" file_nodes "' USING DELIMITERS ',', '\\n', '''';\n", count_nodes, current);
527 #ifdef BENCHMARK
528 printf("COPY %lu RECORDS INTO nodes_legacy_uint from '%s/" file_nodes_uint "' USING DELIMITERS ',', '\\n', '''';\n", count_nodes, current);
529 printf("COPY %lu RECORDS INTO nodes_legacy_gis from '%s/" file_nodes_gis "' USING DELIMITERS ',', '\\n', '''';\n", count_nodes, current);
530 #endif
531 printf("COPY %lu RECORDS INTO node_tags from '%s/" file_node_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_node_tags, current);
532 printf("COPY %lu RECORDS INTO ways from '%s/" file_ways "' USING DELIMITERS ',', '\\n', '''';\n", count_ways, current);
533 printf("COPY %lu RECORDS INTO way_tags from '%s/" file_way_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_way_tags, current);
534 printf("COPY %lu RECORDS INTO way_nds from '%s/" file_way_nds "' USING DELIMITERS ',', '\\n', '''';\n", count_way_nds, current);
535 printf("COPY %lu RECORDS INTO relations from '%s/" file_relations "' USING DELIMITERS ',', '\\n', '''';\n", count_relations, current);
536 printf("COPY %lu RECORDS INTO relation_tags from '%s/" file_relation_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_relation_tags, current);
537 printf("COPY %lu RECORDS INTO relation_members_node from '%s/" file_relation_member_node "' USING DELIMITERS ',', '\\n', '''';\n", count_members_node, current);
538 printf("COPY %lu RECORDS INTO relation_members_relation from '%s/" file_relation_member_relation "' USING DELIMITERS ',', '\\n', '''';\n", count_members_relation, current);
539 printf("COPY %lu RECORDS INTO relation_members_way from '%s/" file_relation_member_way "' USING DELIMITERS ',', '\\n', '''';\n", count_members_way, current);
541 printf("COMMIT;\n");
543 printf("START TRANSACTION;\n");
545 printf("CREATE SEQUENCE s_nodes AS INTEGER;\n");
546 printf("ALTER SEQUENCE s_nodes RESTART WITH (SELECT MAX(id) FROM nodes_legacy);\n");
547 printf("ALTER TABLE nodes_legacy ALTER COLUMN id SET NOT NULL;\n");
548 printf("ALTER TABLE nodes_legacy ALTER COLUMN id SET DEFAULT NEXT VALUE FOR \"sys\".\"s_nodes\";\n");
549 printf("ALTER TABLE nodes_legacy ADD CONSTRAINT pk_nodes_id PRIMARY KEY (id);\n");
551 printf("CREATE SEQUENCE s_ways AS INTEGER;\n");
552 printf("ALTER SEQUENCE s_ways RESTART WITH (SELECT MAX(id) FROM ways);\n");
553 printf("ALTER TABLE ways ALTER COLUMN id SET NOT NULL;\n");
554 printf("ALTER TABLE ways ALTER COLUMN id SET DEFAULT NEXT VALUE FOR \"sys\".\"s_ways\";\n");
555 printf("ALTER TABLE ways ADD CONSTRAINT pk_ways_id PRIMARY KEY (id);\n");
557 printf("CREATE SEQUENCE s_relations AS INTEGER;\n");
558 printf("ALTER SEQUENCE s_relations RESTART WITH (SELECT MAX(id) FROM relations);\n");
559 printf("ALTER TABLE relations ALTER COLUMN id SET NOT NULL;\n");
560 printf("ALTER TABLE relations ALTER COLUMN id SET DEFAULT NEXT VALUE FOR \"sys\".\"s_relations\";\n");
561 printf("ALTER TABLE relations ADD CONSTRAINT pk_relations_id PRIMARY KEY (id);\n");
563 printf("ALTER TABLE relation_members_node ADD CONSTRAINT pk_relation_members_node PRIMARY KEY (relation, idx);\n");
564 printf("ALTER TABLE relation_members_way ADD CONSTRAINT pk_relation_members_way PRIMARY KEY (relation,idx);\n");
565 printf("ALTER TABLE relation_members_relation ADD CONSTRAINT pk_relation_members_relation PRIMARY KEY (relation,idx);\n");
567 printf("COMMIT;\n");
570 printf("START TRANSACTION;\n");
572 printf("ALTER TABLE node_tags ADD CONSTRAINT pk_node_tags PRIMARY KEY (node, k);\n");
573 printf("ALTER TABLE node_tags ADD CONSTRAINT fk_node_tags_node FOREIGN KEY (node) REFERENCES nodes_legacy (id);\n");
575 printf("ALTER TABLE way_tags ADD CONSTRAINT pk_way_tags PRIMARY KEY (way, k);\n");
576 printf("ALTER TABLE way_tags ADD CONSTRAINT fk_way_tags_way FOREIGN KEY (way) REFERENCES ways (id);\n");
578 printf("ALTER TABLE way_nds ADD CONSTRAINT pk_way_nds PRIMARY KEY (way, idx);\n");
579 printf("ALTER TABLE way_nds ADD CONSTRAINT fk_way_nds_way FOREIGN KEY (way) REFERENCES ways (id);\n");
580 printf("ALTER TABLE way_nds ADD CONSTRAINT fk_way_nds_node FOREIGN KEY (to_node) REFERENCES nodes_legacy (id);\n");
582 printf("ALTER TABLE relation_tags ADD CONSTRAINT pk_relation_tags PRIMARY KEY (relation, k);\n");
583 printf("ALTER TABLE relation_tags ADD CONSTRAINT fk_relation_tags FOREIGN KEY (relation) REFERENCES relations (id);\n");
585 printf("ALTER TABLE relation_members_node ADD CONSTRAINT fk_relation_members_node FOREIGN KEY (relation) REFERENCES relations (id);\n");
586 printf("ALTER TABLE relation_members_node ADD CONSTRAINT fk_relation_members_tonode FOREIGN KEY (to_node) REFERENCES nodes_legacy (id);\n");
588 printf("ALTER TABLE relation_members_way ADD CONSTRAINT fk_relation_members_way FOREIGN KEY (relation) REFERENCES relations (id);\n");
589 printf("ALTER TABLE relation_members_way ADD CONSTRAINT fk_relation_members_toway FOREIGN KEY (to_way) REFERENCES ways (id);\n");
591 printf("ALTER TABLE relation_members_relation ADD CONSTRAINT fk_relation_members_relation FOREIGN KEY (relation) REFERENCES relations (id);\n");
592 printf("ALTER TABLE relation_members_relation ADD CONSTRAINT fk_relation_members_torelation FOREIGN KEY (to_relation) REFERENCES relations (id);\n");
594 printf("COMMIT;\n");
596 free(current);
601 int main(int argc, char *argv[]) {
602 parser();
603 exit(0);