Updates with respect to the SQL table lat/lon y/x bug.
[handlerosm.git] / osmparser.c
blob5992434d961bf36992e8c8ccb7d39ca2cd2f4df8
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <sys/mman.h>
4 #include <sys/types.h>
5 #include <sys/stat.h>
6 #include <fcntl.h>
7 #include <string.h>
8 #include <errno.h>
9 #include <math.h>
10 #include <unistd.h>
13 * <osm>
14 * <node>
15 * <tag k=".." v=".." />
16 * </node>
17 * <way>
18 * <nd>
19 * <tag>
20 * </way>
21 * <relation>
22 * <member>
23 * <tag>
24 * </relation>
27 #define file_nodes "nodes.csv"
28 #define file_node_tags "node_tags.csv"
29 #define file_ways "ways.csv"
30 #define file_way_tags "way_tags.csv"
31 #define file_way_nds "way_nds.csv"
32 #define file_relations "relations.csv"
33 #define file_relation_tags "relation_tags.csv"
34 #define file_relation_member_node "relation_member_node.csv"
35 #define file_relation_member_relation "relation_member_relation.csv"
36 #define file_relation_member_way "relation_member_way.csv"
38 char * escape_string(char *instr)
40 unsigned int i, j=0, need = 0;
41 unsigned int len = strlen(instr);
42 char *outstr;
44 for (i=0;i<len;i++)
45 if (instr[i]=='\\' || instr[i]=='\'') need++;
47 len += need;
48 outstr = malloc(len + 1);
50 for (i=0;i<=strlen(instr);i++) {
51 if (instr[i]=='\\' || instr[i]=='\'')
52 outstr[j++]='\\';
53 outstr[j++]=instr[i];
55 return outstr;
59 static void parser(char *range, unsigned long int max) {
60 typedef enum { OSM = 0, NODE = 1, WAY = 2, RELATION = 3, TAG = 4, ND = 5, MEMBER = 6 } osm_state_t;
61 typedef enum { UNKNOWN = 0, ID, LAT, LON, USER, TIMESTAMP, KEY, VALUE, TYPE, REF, ROLE} key_state_t;
62 char *attr_id = NULL, *attr_lat = NULL, *attr_lon = NULL, *attr_user = NULL, *attr_timestamp = NULL, *attr_key = NULL, *attr_value = NULL,
63 *attr_type = NULL, *attr_ref = NULL, *attr_role = NULL;
65 FILE *fd_nodes = fopen(file_nodes, "w");
66 if (fd_nodes == NULL) { perror("Open:"); exit(-1); }
67 FILE *fd_node_tags = fopen(file_node_tags, "w");
68 if (fd_node_tags == NULL) { perror("Open:"); exit(-1); }
69 FILE *fd_ways = fopen(file_ways, "w");
70 if (fd_ways == NULL) { perror("Open:"); exit(-1); }
71 FILE *fd_way_tags = fopen(file_way_tags, "w");
72 if (fd_way_tags == NULL) { perror("Open:"); exit(-1); }
73 FILE *fd_way_nds = fopen(file_way_nds, "w");
74 if (fd_way_nds == NULL) { perror("Open:"); exit(-1); }
75 FILE *fd_relations = fopen(file_relations, "w");
76 if (fd_relations == NULL) { perror("Open:"); exit(-1); }
77 FILE *fd_relation_tags = fopen(file_relation_tags, "w");
78 if (fd_relation_tags == NULL) { perror("Open:"); exit(-1); }
79 FILE *fd_members_node = fopen(file_relation_member_node, "w");
80 if (fd_members_node == NULL) { perror("Open:"); exit(-1); }
81 FILE *fd_members_relation = fopen(file_relation_member_relation, "w");
82 if (fd_members_relation == NULL) { perror("Open:"); exit(-1); }
83 FILE *fd_members_way = fopen(file_relation_member_way, "w");
84 if (fd_members_way == NULL) { perror("Open:"); exit(-1); }
86 unsigned long int count_nodes = 0, count_node_tags = 0,
87 count_ways = 0, count_way_tags = 0, count_way_nds = 0,
88 count_relations = 0, count_relation_tags = 0, count_members_node = 0, count_members_relation = 0, count_members_way = 0;
90 unsigned long int sequence = 0;
93 osm_state_t current_tag = OSM;
94 osm_state_t parent_tag = OSM;
96 char *start, *end, *nodename, *nodename_end;
98 start = range;
99 end = strchrnul((const char*) start, '\n');
101 if (strncmp(start, "<?xml", 5) != 0)
102 return;
104 start = end + 1;
105 end = strchrnul((const char*) start, '\n');
107 if (strncmp(start, "<osm", 4) != 0)
108 return;
110 start = end + 1;
112 do {
113 end = strchrnul((const char*) start, '\n');
115 nodename = strchrnul(start, '<') + 1;
116 nodename_end = strchrnul(nodename, ' ');
118 if (nodename[0] == '/') {
119 free(attr_id);
120 free(attr_lat);
121 free(attr_lon);
122 free(attr_timestamp);
123 free(attr_user);
125 attr_id = NULL;
126 attr_lat = NULL;
127 attr_lon = NULL;
128 attr_user = NULL;
129 attr_timestamp = NULL;
131 sequence = 0;
133 start = end + 1;
134 continue;
137 switch (nodename_end - nodename) {
138 case 2:
139 current_tag = ND;
140 break;
141 case 3: {
142 switch (nodename[0]) {
143 case 'o':
144 current_tag = OSM;
145 break;
146 case 'w':
147 current_tag = WAY;
148 break;
149 case 't':
150 current_tag = TAG;
151 break;
152 default:
153 fprintf(stderr, "--> %c%c", nodename[0], nodename[1]);
155 break;
157 case 4:
158 current_tag = NODE;
159 break;
160 case 6:
161 current_tag = MEMBER;
162 break;
163 case 8:
164 current_tag = RELATION;
165 break;
166 default:
167 fprintf(stderr, "--> %c%c", nodename[0], nodename[1]);
171 char *key, *key_end, *value_end;
172 key = nodename_end + 1;
174 do {
175 char *value;
176 key_state_t current_key = UNKNOWN;
177 key_end = strchrnul(key, '=');
179 if (key_end == NULL || key_end >= end)
180 break;
182 switch (key_end - key) {
183 case 1: {
184 switch (key[0]) {
185 case 'k':
186 current_key = KEY;
187 break;
188 case 'v':
189 current_key = VALUE;
190 break;
191 default:
192 current_key = UNKNOWN;
194 break;
196 case 2:
197 current_key = ID;
198 break;
199 case 3: {
200 switch (key[1]) {
201 case 'a':
202 current_key = LAT;
203 break;
204 case 'o':
205 current_key = LON;
206 break;
207 case 'e':
208 current_key = REF;
209 break;
210 default:
211 current_key = UNKNOWN;
212 fprintf(stderr, "--> %c%c\n", key[0], key[1]);
214 break;
216 case 4: {
217 switch (key[0]) {
218 case 'u':
219 current_key = USER;
220 break;
221 case 'r':
222 current_key = ROLE;
223 break;
224 case 't':
225 current_key = TYPE;
226 break;
227 default:
228 current_key = UNKNOWN;
229 fprintf(stderr, "--> %c%c\n", key[0], key[1]);
231 break;
233 case 9:
234 current_key = TIMESTAMP;
235 break;
236 default: {
237 char *thingie = strndup(key, (key_end - key));
238 current_key = UNKNOWN;
240 fprintf(stderr, "UNKNOWN ATTR %s-> %c%c\n", thingie, key[0], key[1]);
241 free(thingie);
245 value = key_end + 2;
246 value_end = value;
247 value_end = strchr(value_end, '"');
249 if (value_end > end)
250 break;
252 switch (current_key) {
253 case ID:
254 if (attr_id) free(attr_id);
255 attr_id = strndup(value, (value_end - value));
256 break;
258 case LAT:
259 if (attr_lat) free(attr_lat);
260 attr_lat = strndup(value, (value_end - value));
261 break;
263 case LON:
264 if (attr_lon) free(attr_lon);
265 attr_lon = strndup(value, (value_end - value));
266 break;
268 case TIMESTAMP:
269 if (attr_timestamp) free(attr_timestamp);
270 attr_timestamp = strndup(value, (value_end - value));
271 // attr_timestamp[10] = ' '; /* Stupid timestamp fix */
272 break;
274 case USER: {
275 char *tmp;
276 if (attr_user) free(attr_user);
277 attr_user = strndup(value, (value_end - value));
278 tmp = escape_string(attr_user);
279 free(attr_user);
280 attr_user = tmp;
281 break;
284 case KEY: {
285 char *tmp;
286 if (attr_key) free(attr_key);
287 attr_key = strndup(value, (value_end - value));
288 tmp = escape_string(attr_key);
289 free(attr_key);
290 attr_key = tmp;
291 break;
294 case VALUE: {
295 char *tmp;
296 if (attr_value) free(attr_value);
297 attr_value = strndup(value, (value_end - value));
298 tmp = escape_string(attr_value);
299 free(attr_value);
300 attr_value = tmp;
301 break;
304 case TYPE:
305 if (attr_type) free(attr_type);
306 attr_type = strndup(value, (value_end - value));
307 break;
309 case REF:
310 if (attr_ref) free(attr_ref);
311 attr_ref = strndup(value, (value_end - value));
312 break;
314 case ROLE: {
315 char *tmp;
316 if (attr_role) free(attr_role);
317 attr_role = strndup(value, (value_end - value));
318 tmp = escape_string(attr_role);
319 free(attr_role);
320 attr_role = tmp;
321 break;
324 default:
325 fprintf(stderr, "--> %c%c\n", value[0], value[1]);
328 key = value_end + 2;
329 } while (key < end);
331 switch (current_tag) {
332 case NODE:
333 fprintf(fd_nodes, "%s, %s, %s, '%s', %s\n", attr_id, attr_lat, attr_lon, attr_user, attr_timestamp);
334 count_nodes++;
335 break;
336 case TAG: {
337 switch (parent_tag) {
338 case NODE:
339 fprintf(fd_node_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
340 count_node_tags++;
341 break;
342 case WAY:
343 fprintf(fd_way_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
344 count_way_tags++;
345 break;
346 case RELATION:
347 fprintf(fd_relation_tags, "%s, '%s', '%s'\n", attr_id, attr_key, attr_value);
348 count_relation_tags++;
349 break;
350 default:
351 break;
353 break;
355 case WAY:
356 fprintf(fd_ways, "%s, '%s', '%s'\n", attr_id, attr_user, attr_timestamp);
357 count_ways++;
358 // fprintf(fd_way_tags, "%s, '%s', '%s'\n", attr_id, "type", "way");
359 // count_way_tags++;
360 break;
361 case RELATION:
362 fprintf(fd_relations, "%s, '%s', '%s'\n", attr_id, attr_user, attr_timestamp);
363 count_relations++;
364 break;
365 case MEMBER:
366 if (strcmp(attr_type, "node") == 0) {
367 fprintf(fd_members_node, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
368 count_members_node++;
369 } else if (strcmp(attr_type, "way") == 0) {
370 fprintf(fd_members_way, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
371 count_members_way++;
372 } else if (strcmp(attr_type, "relation") == 0) {
373 fprintf(fd_members_relation, "%s, %lu, %s, '%s'\n", attr_id, sequence, attr_ref, attr_role);
374 count_members_relation++;
376 sequence++;
377 break;
378 case ND:
379 fprintf(fd_way_nds, "%s, %lu, %s\n", attr_id, sequence, attr_ref);
380 sequence++;
381 count_way_nds++;
382 break;
383 default:
384 break;
387 if (end[-2] == '/') {
388 switch (current_tag) {
389 case NODE:
390 free(attr_lat);
391 free(attr_lon);
392 attr_lat = NULL;
393 attr_lon = NULL;
394 /* no break! */
396 case WAY:
397 case RELATION:
398 free(attr_id);
399 free(attr_timestamp);
400 free(attr_user);
402 attr_id = NULL;
403 attr_user = NULL;
404 attr_timestamp = NULL;
406 sequence = 0;
407 break;
409 case TAG:
410 free(attr_key);
411 free(attr_value);
413 attr_key = NULL;
414 attr_value = NULL;
415 break;
417 case ND:
418 case MEMBER:
419 free(attr_type);
420 free(attr_ref);
421 free(attr_role);
423 attr_type = NULL;
424 attr_ref = NULL;
425 attr_role = NULL;
426 default:
427 break;
429 } else if (current_tag == NODE || current_tag == WAY || current_tag == RELATION) {
430 parent_tag = current_tag;
433 } while ((start = ++end) < (range + max));
435 free(attr_id);
436 free(attr_lat);
437 free(attr_lon);
438 free(attr_timestamp);
439 free(attr_user);
441 free(attr_key);
442 free(attr_value);
444 fclose(fd_nodes);
445 fclose(fd_node_tags);
446 fclose(fd_ways);
447 fclose(fd_way_tags);
448 fclose(fd_way_nds);
449 fclose(fd_relations);
450 fclose(fd_relation_tags);
451 fclose(fd_members_node);
452 fclose(fd_members_relation);
454 char *current = get_current_dir_name();
456 printf("START TRANSACTION;\n");
458 printf("CREATE TABLE nodes_legacy (id serial, long float, lat float, username varchar(255), timestamp timestamptz);\n");
459 printf("CREATE TABLE node_tags (node integer, k varchar(255), v varchar(1024), primary key (node, k), foreign key(node) references nodes_legacy);\n");
460 printf("CREATE TABLE ways (id serial,username varchar(255), timestamp timestamptz);\n");
461 printf("CREATE TABLE way_tags (way integer, k varchar(255), v varchar(1024), primary key (way, k), foreign key(way) references ways);\n");
462 printf("CREATE TABLE way_nds (way integer, idx integer, to_node integer, foreign key(way) references ways, foreign key(to_node) references nodes_legacy, primary key(way, idx));\n");
463 printf("CREATE TABLE relations(id serial, username varchar(255), timestamp timestamptz);\n");
464 printf("CREATE TABLE relation_members_node (relation integer, idx integer, to_node integer, role varchar(255), foreign key(relation) references relations, foreign key(to_node) references nodes_legacy, primary key(relation, idx));\n");
465 printf("CREATE TABLE relation_members_relation (relation integer, idx integer, to_relation integer, role varchar(255), foreign key(relation) references relations, foreign key(to_relation) references relations, primary key(relation, idx));\n");
466 printf("CREATE TABLE relation_members_way (relation integer, idx integer, to_way integer, role varchar(255), foreign key(relation) references relations, foreign key(to_way) references ways, primary key(relation, idx));\n");
467 printf("CREATE TABLE relation_tags (relation integer, k varchar(255), v varchar(1024), foreign key(relation) references relations, primary key(relation, k));\n");
469 printf("COPY %lu RECORDS INTO nodes_legacy from '%s/" file_nodes "' USING DELIMITERS ',', '\\n', '''';\n", count_nodes, current);
470 printf("COPY %lu RECORDS INTO node_tags from '%s/" file_node_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_node_tags, current);
471 printf("COPY %lu RECORDS INTO ways from '%s/" file_ways "' USING DELIMITERS ',', '\\n', '''';\n", count_ways, current);
472 printf("COPY %lu RECORDS INTO way_tags from '%s/" file_way_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_way_tags, current);
473 printf("COPY %lu RECORDS INTO way_nds from '%s/" file_way_nds "' USING DELIMITERS ',', '\\n', '''';\n", count_way_nds, current);
474 printf("COPY %lu RECORDS INTO relations from '%s/" file_relations "' USING DELIMITERS ',', '\\n', '''';\n", count_relations, current);
475 printf("COPY %lu RECORDS INTO relation_tags from '%s/" file_relation_tags "' USING DELIMITERS ',', '\\n', '''';\n", count_relation_tags, current);
476 printf("COPY %lu RECORDS INTO relation_members_node from '%s/" file_relation_member_node "' USING DELIMITERS ',', '\\n', '''';\n", count_members_node, current);
477 printf("COPY %lu RECORDS INTO relation_members_relation from '%s/" file_relation_member_relation "' USING DELIMITERS ',', '\\n', '''';\n", count_members_relation, current);
478 printf("COPY %lu RECORDS INTO relation_members_way from '%s/" file_relation_member_way "' USING DELIMITERS ',', '\\n', '''';\n", count_members_way, current);
479 printf("COMMIT;\n");
481 free(current);
486 int main(int argc, char *argv[]) {
487 int fd;
488 struct stat statbuf;
490 if (argc != 2)
491 exit(-1);
493 fprintf(stderr, "Analysing %s...\n", argv[1]);
495 fd = open(argv[1], O_RDONLY);
497 if (fd < 0)
498 exit(-1);
500 if (fstat (fd, &statbuf) == -1) { perror("fstat:"); exit(-1); }
502 if (statbuf.st_size > 0) {
503 char *range = NULL;
504 range = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fd, (off_t) 0);
505 if (range == MAP_FAILED) { perror("Mmap:"); printf("(did you compile PAE in the kernel?)\n"); exit(-1); }
506 parser(range, statbuf.st_size / sizeof(char));
507 munmap(range, statbuf.st_size);
510 close(fd);
511 exit(0);