Osmsucker is the streaming equivalent of osmparser. Pipe something in
[handlerosm.git] / osmparser.c
blob4de811a2726c7514dc3a7ee500f8723faa7523f4
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <sys/mman.h>
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include <fcntl.h>
7 #include <string.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <unistd.h>
13 * <osm>
14 * <node>
15 * <tag k=".." v=".." />
16 * </node>
17 * <way>
18 * <nd>
19 * <tag>
20 * </way>
21 * <relation>
22 * <member>
23 * <tag>
24 * </relation>
27 #define file_nodes "nodes.csv"
28 #define file_nodes_uint "nodes_uint.csv"
29 #define file_nodes_gis "nodes_gis.csv"
30 #define file_node_tags "node_tags.csv"
31 #define file_ways "ways.csv"
32 #define file_way_tags "way_tags.csv"
33 #define file_way_nds "way_nds.csv"
34 #define file_relations "relations.csv"
35 #define file_relation_tags "relation_tags.csv"
36 #define file_relation_member_node "relation_member_node.csv"
37 #define file_relation_member_relation "relation_member_relation.csv"
38 #define file_relation_member_way "relation_member_way.csv"
40 unsigned int coordtouint(char *input) {
41 double maxbit = (double) 4294967296.0 / (double) 360.0;
42 double proper = strtod(input, NULL) * maxbit;
43 return (unsigned int) proper;
46 char * escape_string(char *instr)
48 unsigned int i, j=0, need = 0;
49 unsigned int len = strlen(instr);
50 char *outstr;
52 for (i=0;i<len;i++)
53 if (instr[i]=='\\' || instr[i]=='\'') need++;
55 len += need;
56 outstr = malloc(len + 1);
58 for (i=0;i<=strlen(instr);i++) {
59 if (instr[i]=='\\' || instr[i]=='\'')
60 outstr[j++]='\\';
61 outstr[j++]=instr[i];
63 return outstr;
67 static void parser(char *range, unsigned long int max) {
68 typedef enum { OSM = 0, NODE = 1, WAY = 2, RELATION = 3, TAG = 4, ND = 5, MEMBER = 6 } osm_state_t;
69 typedef enum { UNKNOWN = 0, ID, LAT, LON, USER, TIMESTAMP, KEY, VALUE, TYPE, REF, ROLE} key_state_t;
70 char *attr_id = NULL, *attr_lat = NULL, *attr_lon = NULL, *attr_user = NULL, *attr_timestamp = NULL, *attr_key = NULL, *attr_value = NULL,
71 *attr_type = NULL, *attr_ref = NULL, *attr_role = NULL;
73 unsigned int attr_lat_uint = 0;
74 unsigned int attr_lon_uint = 0;
76 FILE *fd_nodes = fopen(file_nodes, "w");
77 if (fd_nodes == NULL) { perror("Open:"); exit(-1); }
78 FILE *fd_nodes_uint = fopen(file_nodes_uint, "w");
79 if (fd_nodes_uint == NULL) { perror("Open:"); exit(-1); }
80 FILE *fd_nodes_gis = fopen(file_nodes_gis, "w");
81 if (fd_nodes_gis == NULL) { perror("Open:"); exit(-1); }
82 FILE *fd_node_tags = fopen(file_node_tags, "w");
83 if (fd_node_tags == NULL) { perror("Open:"); exit(-1); }
84 FILE *fd_ways = fopen(file_ways, "w");
85 if (fd_ways == NULL) { perror("Open:"); exit(-1); }
86 FILE *fd_way_tags = fopen(file_way_tags, "w");
87 if (fd_way_tags == NULL) { perror("Open:"); exit(-1); }
88 FILE *fd_way_nds = fopen(file_way_nds, "w");
89 if (fd_way_nds == NULL) { perror("Open:"); exit(-1); }
90 FILE *fd_relations = fopen(file_relations, "w");
91 if (fd_relations == NULL) { perror("Open:"); exit(-1); }
92 FILE *fd_relation_tags = fopen(file_relation_tags, "w");
93 if (fd_relation_tags == NULL) { perror("Open:"); exit(-1); }
94 FILE *fd_members_node = fopen(file_relation_member_node, "w");
95 if (fd_members_node == NULL) { perror("Open:"); exit(-1); }
96 FILE *fd_members_relation = fopen(file_relation_member_relation, "w");
97 if (fd_members_relation == NULL) { perror("Open:"); exit(-1); }
98 FILE *fd_members_way = fopen(file_relation_member_way, "w");
99 if (fd_members_way == NULL) { perror("Open:"); exit(-1); }
101 unsigned long int count_nodes = 0, count_node_tags = 0,
102 count_ways = 0, count_way_tags = 0, count_way_nds = 0,
103 count_relations = 0, count_relation_tags = 0, count_members_node = 0, count_members_relation = 0, count_members_way = 0;
105 unsigned long int sequence = 0;
108 osm_state_t current_tag = OSM;
109 osm_state_t parent_tag = OSM;
111 char *start, *end, *nodename, *nodename_end;
113 start = range;
114 end = strchrnul((const char*) start, '\n');
116 if (strncmp(start, "<?xml", 5) != 0)
117 return;
119 start = end + 1;
120 end = strchrnul((const char*) start, '\n');
122 if (strncmp(start, "<osm", 4) != 0)
123 return;
125 start = end + 1;
127 do {
128 end = strchrnul((const char*) start, '\n');
130 nodename = strchrnul(start, '<') + 1;
131 nodename_end = strchrnul(nodename, ' ');
133 if (nodename[0] == '/') {
134 free(attr_id);
135 free(attr_lat);
136 free(attr_lon);
137 free(attr_timestamp);
138 free(attr_user);
140 attr_id = NULL;
141 attr_lat = NULL;
142 attr_lon = NULL;
143 attr_user = NULL;
144 attr_timestamp = NULL;
146 sequence = 0;
148 start = end + 1;
149 continue;
152 switch (nodename_end - nodename) {
153 case 2:
154 current_tag = ND;
155 break;
156 case 3: {
157 switch (nodename[0]) {
158 case 'o':
159 current_tag = OSM;
160 break;
161 case 'w':
162 current_tag = WAY;
163 break;
164 case 't':
165 current_tag = TAG;
166 break;
167 default:
168 fprintf(stderr, "--> %c%c", nodename[0], nodename[1]);
170 break;
172 case 4:
173 current_tag = NODE;
174 break;
175 case 6:
176 current_tag = MEMBER;
177 break;
178 case 8:
179 current_tag = RELATION;
180 break;
181 default:
182 fprintf(stderr, "--> %c%c", nodename[0], nodename[1]);
186 char *key, *key_end, *value_end;
187 key = nodename_end + 1;
189 do {
190 char *value;
191 key_state_t current_key = UNKNOWN;
192 key_end = strchrnul(key, '=');
194 if (key_end == NULL || key_end >= end)
195 break;
197 switch (key_end - key) {
198 case 1: {
199 switch (key[0]) {
200 case 'k':
201 current_key = KEY;
202 break;
203 case 'v':
204 current_key = VALUE;
205 break;
206 default:
207 current_key = UNKNOWN;
209 break;
211 case 2:
212 current_key = ID;
213 break;
214 case 3: {
215 switch (key[1]) {
216 case 'a':
217 current_key = LAT;
218 break;
219 case 'o':
220 current_key = LON;
221 break;
222 case 'e':
223 current_key = REF;
224 break;
225 default:
226 current_key = UNKNOWN;
227 fprintf(stderr, "--> %c%c\n", key[0], key[1]);
229 break;
231 case 4: {
232 switch (key[0]) {
233 case 'u':
234 current_key = USER;
235 break;
236 case 'r':
237 current_key = ROLE;
238 break;
239 case 't':
240 current_key = TYPE;
241 break;
242 default:
243 current_key = UNKNOWN;
244 fprintf(stderr, "--> %c%c\n", key[0], key[1]);
246 break;
248 case 9:
249 current_key = TIMESTAMP;
250 break;
251 default: {
252 char *thingie = strndup(key, (key_end - key));
253 current_key = UNKNOWN;
255 fprintf(stderr, "UNKNOWN ATTR %s-> %c%c\n", thingie, key[0], key[1]);
256 free(thingie);
260 value = key_end + 2;
261 value_end = value;
262 value_end = strchr(value_end, '"');
264 if (value_end > end)
265 break;
267 switch (current_key) {
268 case ID:
269 if (attr_id) free(attr_id);
270 attr_id = strndup(value, (value_end - value));
271 break;
273 case LAT:
274 if (attr_lat) free(attr_lat);
275 attr_lat = strndup(value, (value_end - value));
276 attr_lat_uint = coordtouint(attr_lat);
277 break;
279 case LON:
280 if (attr_lon) free(attr_lon);
281 attr_lon = strndup(value, (value_end - value));
282 attr_lon_uint = coordtouint(attr_lon);
283 break;
285 case TIMESTAMP:
286 if (attr_timestamp) free(attr_timestamp);
287 attr_timestamp = strndup(value, (value_end - value));
288 // attr_timestamp[10] = ' '; /* Stupid timestamp fix */
289 break;
291 case USER: {
292 char *tmp;
293 if (attr_user) free(attr_user);
294 attr_user = strndup(value, (value_end - value));
295 tmp = escape_string(attr_user);
296 free(attr_user);
297 attr_user = tmp;
298 break;
301 case KEY: {
302 char *tmp;
303 if (attr_key) free(attr_key);
304 attr_key = strndup(value, (value_end - value));
305 tmp = escape_string(attr_key);
306 free(attr_key);
307 attr_key = tmp;
308 break;
311 case VALUE: {
312 char *tmp;
313 if (attr_value) free(attr_value);
314 attr_value = strndup(value, (value_end - value));
315 tmp = escape_string(attr_value);
316 free(attr_value);
317 attr_value = tmp;
318 break;
321 case TYPE:
322 if (attr_type) free(attr_type);
323 attr_type = strndup(value, (value_end - value));
324 break;
326 case REF:
327 if (attr_ref) free(attr_ref);
328 attr_ref = strndup(value, (value_end - value));
329 break;
331 case ROLE: {
332 char *tmp;
333 if (attr_role) free(attr_role);
334 attr_role = strndup(value, (value_end - value));
335 tmp = escape_string(attr_role);
336 free(attr_role);
337 attr_role = tmp;
338 break;
341 default:
342 fprintf(stderr, "--> %c%c\n", value[0], value[1]);
345 key = value_end + 2;
346 } while (key < end);
348 switch (current_tag) {
349 case NODE:
350 fprintf(fd_nodes, "%s, %s, %s, '%s', %s\n", attr_id, attr_lat, attr_lon, attr_user, attr_timestamp);
351 fprintf(fd_nodes_uint, "%s, %d, %d, '%s', %s\n", attr_id, attr_lat_uint, attr_lon_uint, attr_user, attr_timestamp);
352 fprintf(fd_nodes_gis, "%s, 'POINT( %s %s )', '%s', %s\n", attr_id, attr_lon, attr_lat, attr_user, attr_timestamp);
353 count_nodes++;
354 break;
355 case TAG: {
356 switch (parent_tag) {
357 case NODE:
358 fprintf(fd_node_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
359 count_node_tags++;
360 break;
361 case WAY:
362 fprintf(fd_way_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
363 count_way_tags++;
364 break;
365 case RELATION:
366 fprintf(fd_relation_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
367 count_relation_tags++;
368 break;
369 default:
370 break;
372 break;
374 case WAY:
375 fprintf(fd_ways, "%s, '%s', '%s'\n", attr_id, attr_user, attr_timestamp);
376 count_ways++;
377 // fprintf(fd_way_tags, "%s, '%s', '%s'\n", attr_id, "type", "way");
378 // count_way_tags++;
379 break;
380 case RELATION:
381 fprintf(fd_relations, "%s, '%s', '%s'\n", attr_id, attr_user, attr_timestamp);
382 count_relations++;
383 break;
384 case MEMBER:
385 if (strcmp(attr_type, "node") == 0) {
386 fprintf(fd_members_node, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
387 count_members_node++;
388 } else if (strcmp(attr_type, "way") == 0) {
389 fprintf(fd_members_way, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
390 count_members_way++;
391 } else if (strcmp(attr_type, "relation") == 0) {
392 fprintf(fd_members_relation, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
393 count_members_relation++;
395 sequence++;
396 break;
397 case ND:
398 fprintf(fd_way_nds, "%s, %lu, %s\n", attr_id, sequence, attr_ref);
399 sequence++;
400 count_way_nds++;
401 break;
402 default:
403 break;
406 if (end[-2] == '/') {
407 switch (current_tag) {
408 case NODE:
409 free(attr_lat);
410 free(attr_lon);
411 attr_lat = NULL;
412 attr_lon = NULL;
413 attr_lat_uint = 0;
414 attr_lon_uint = 0;
415 /* no break! */
417 case WAY:
418 case RELATION:
419 free(attr_id);
420 free(attr_timestamp);
421 free(attr_user);
423 attr_id = NULL;
424 attr_user = NULL;
425 attr_timestamp = NULL;
427 sequence = 0;
428 break;
430 case TAG:
431 free(attr_key);
432 free(attr_value);
434 attr_key = NULL;
435 attr_value = NULL;
436 break;
438 case ND:
439 case MEMBER:
440 free(attr_type);
441 free(attr_ref);
442 free(attr_role);
444 attr_type = NULL;
445 attr_ref = NULL;
446 attr_role = NULL;
447 default:
448 break;
450 } else if (current_tag == NODE || current_tag == WAY || current_tag == RELATION) {
451 parent_tag = current_tag;
454 } while ((start = ++end) < (range + max));
456 free(attr_id);
457 free(attr_lat);
458 free(attr_lon);
459 free(attr_timestamp);
460 free(attr_user);
462 free(attr_key);
463 free(attr_value);
465 fclose(fd_nodes);
466 fclose(fd_nodes_uint);
467 fclose(fd_nodes_gis);
468 fclose(fd_node_tags);
469 fclose(fd_ways);
470 fclose(fd_way_tags);
471 fclose(fd_way_nds);
472 fclose(fd_relations);
473 fclose(fd_relation_tags);
474 fclose(fd_members_node);
475 fclose(fd_members_relation);
477 char *current = get_current_dir_name();
479 printf("START TRANSACTION;\n");
481 printf("CREATE TABLE nodes_legacy (id serial, long double, lat double, username varchar(255), timestamp timestamptz);\n");
482 printf("CREATE TABLE nodes_legacy_uint (id serial, long integer, lat integer, username varchar(255), timestamp timestamptz);\n");
483 printf("CREATE TABLE nodes_legacy_gis (id serial, poi point, username varchar(255), timestamp timestamptz);\n");
484 printf("CREATE TABLE node_tags (node integer, k varchar(255), v varchar(1024), primary key (node, k), foreign key(node) references nodes_legacy);\n");
485 printf("CREATE TABLE ways (id serial,username varchar(255), timestamp timestamptz);\n");
486 printf("CREATE TABLE way_tags (way integer, k varchar(255), v varchar(1024), primary key (way, k), foreign key(way) references ways);\n");
487 printf("CREATE TABLE way_nds (way integer, idx integer, to_node integer, foreign key(way) references ways, foreign key(to_node) references nodes_legacy, primary key(way, idx));\n");
488 printf("CREATE TABLE relations(id serial, username varchar(255), timestamp timestamptz);\n");
489 printf("CREATE TABLE relation_members_node (relation integer, idx integer, to_node integer, role varchar(255), foreign key(relation) references relations, foreign key(to_node) references nodes_legacy, primary key(relation, idx));\n");
490 printf("CREATE TABLE relation_members_relation (relation integer, idx integer, to_relation integer, role varchar(255), foreign key(relation) references relations, foreign key(to_relation) references relations, primary key(relation, idx));\n");
491 printf("CREATE TABLE relation_members_way (relation integer, idx integer, to_way integer, role varchar(255), foreign key(relation) references relations, foreign key(to_way) references ways, primary key(relation, idx));\n");
492 printf("CREATE TABLE relation_tags (relation integer, k varchar(255), v varchar(1024), foreign key(relation) references relations, primary key(relation, k));\n");
494 printf("COPY %lu RECORDS INTO nodes_legacy from '%s/" file_nodes "' USING DELIMITERS ',', '\\n', '''';\n", count_nodes, current);
495 printf("COPY %lu RECORDS INTO nodes_legacy_uint from '%s/" file_nodes_uint "' USING DELIMITERS ',', '\\n', '''';\n", count_nodes, current);
496 printf("COPY %lu RECORDS INTO nodes_legacy_gis from '%s/" file_nodes_gis "' USING DELIMITERS ',', '\\n', '''';\n", count_nodes, current);
497 printf("COPY %lu RECORDS INTO node_tags from '%s/" file_node_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_node_tags, current);
498 printf("COPY %lu RECORDS INTO ways from '%s/" file_ways "' USING DELIMITERS ',', '\\n', '''';\n", count_ways, current);
499 printf("COPY %lu RECORDS INTO way_tags from '%s/" file_way_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_way_tags, current);
500 printf("COPY %lu RECORDS INTO way_nds from '%s/" file_way_nds "' USING DELIMITERS ',', '\\n', '''';\n", count_way_nds, current);
501 printf("COPY %lu RECORDS INTO relations from '%s/" file_relations "' USING DELIMITERS ',', '\\n', '''';\n", count_relations, current);
502 printf("COPY %lu RECORDS INTO relation_tags from '%s/" file_relation_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_relation_tags, current);
503 printf("COPY %lu RECORDS INTO relation_members_node from '%s/" file_relation_member_node "' USING DELIMITERS ',', '\\n', '''';\n", count_members_node, current);
504 printf("COPY %lu RECORDS INTO relation_members_relation from '%s/" file_relation_member_relation "' USING DELIMITERS ',', '\\n', '''';\n", count_members_relation, current);
505 printf("COPY %lu RECORDS INTO relation_members_way from '%s/" file_relation_member_way "' USING DELIMITERS ',', '\\n', '''';\n", count_members_way, current);
506 printf("COMMIT;\n");
508 free(current);
513 int main(int argc, char *argv[]) {
514 int fd;
515 struct stat statbuf;
517 if (argc != 2)
518 exit(-1);
520 fprintf(stderr, "Analysing %s...\n", argv[1]);
522 fd = open(argv[1], O_RDONLY);
524 if (fd < 0)
525 exit(-1);
527 if (fstat (fd, &statbuf) == -1) { perror("fstat:"); exit(-1); }
529 if (statbuf.st_size > 0) {
530 char *range = NULL;
531 range = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fd, (off_t) 0);
532 if (range == MAP_FAILED) { perror("Mmap:"); printf("(did you compile PAE in the kernel?)\n"); exit(-1); }
533 parser(range, statbuf.st_size / sizeof(char));
534 munmap(range, statbuf.st_size);
537 close(fd);
538 exit(0);