8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / cmd / cmd-inet / usr.bin / nca / ncab2clf.c
blobd0d78c4df7841ab60b9a1a2b8c771e9d6ecd6414
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
20 * CDDL HEADER END
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
29 * Converts binary log files to CLF (Common Log Format).
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <strings.h>
36 #include <sys/types.h>
37 #include <fcntl.h>
38 #include <stdio.h>
39 #include <locale.h>
40 #include <errno.h>
41 #include <time.h>
42 #include <synch.h>
43 #include <syslog.h>
45 #ifndef TRUE
46 #define TRUE 1
47 #endif /* TRUE */
49 #ifndef FALSE
50 #define FALSE 0
51 #endif /* FALSE */
53 #include "ncadoorhdr.h"
54 #include "ncalogd.h"
56 extern char *gettext();
58 typedef enum { /* Boolean type */
59 false = 0,
60 true = 1
61 } bool;
63 static const char *const
64 g_method_strings[8] = {
65 "UNKNOWN",
66 "OPTIONS",
67 "GET",
68 "HEAD",
69 "POST",
70 "PUT",
71 "DELETE",
72 "TRACE"
75 /* Short month strings */
76 static const char * const sMonthStr [12] = {
77 "Jan",
78 "Feb",
79 "Mar",
80 "Apr",
81 "May",
82 "Jun",
83 "Jul",
84 "Aug",
85 "Sep",
86 "Oct",
87 "Nov",
88 "Dec",
91 #define SEC_PER_MIN (60)
92 #define SEC_PER_HOUR (60*60)
93 #define SEC_PER_DAY (24*60*60)
94 #define SEC_PER_YEAR (365*24*60*60)
95 #define LEAP_TO_70 (70/4)
97 #define KILO_BYTE (1024)
98 #define MEGA_BYTE (KILO_BYTE * KILO_BYTE)
99 #define GIGA_BYTE (KILO_BYTE * MEGA_BYTE)
101 #define CLF_DATE_BUF_LENGTH (128)
102 #define OUTFILE_BUF_SIZE (256 * KILO_BYTE)
104 static bool g_enable_directio = true;
105 static ssize_t g_invalid_count = 0;
106 static ssize_t g_skip_count = 0;
107 static char *g_start_time_str = NULL;
109 /* init value must match logd & NCA kmod */
110 static ssize_t g_n_log_upcall = 0;
112 /* input binary file was written in 64k chunks by default */
113 static ssize_t g_infile_blk_size = NCA_DEFAULT_LOG_BUF_SIZE;
115 /* num of output records, by default infinite */
116 static ssize_t g_out_records = -1;
118 /* start time for log output, default none (i.e. output all) */
119 static struct tm g_start_time;
122 * http_version(version)
124 * Returns out the string of a given http version
127 static char *
128 http_version(int http_ver)
130 char *ver_num;
132 switch (http_ver) {
133 case HTTP_0_9:
134 case HTTP_0_0:
135 ver_num = "HTTP/0.9";
136 break;
137 case HTTP_ERR:
138 case HTTP_1_0:
139 ver_num = "HTTP/1.0";
140 break;
141 case HTTP_1_1:
142 ver_num = "HTTP/1.1";
143 break;
144 default:
145 ver_num = "HTTP/unknown";
148 return (ver_num);
151 static bool
152 valid_version(int http_ver)
154 switch (http_ver) {
155 case HTTP_0_9:
156 case HTTP_0_0:
157 case HTTP_1_0:
158 case HTTP_1_1:
159 return (true);
160 default:
161 break;
164 return (false);
167 static bool
168 valid_method(int method)
170 switch (method) {
171 case NCA_OPTIONS:
172 case NCA_GET:
173 case NCA_HEAD:
174 case NCA_POST:
175 case NCA_PUT:
176 case NCA_DELETE:
177 case NCA_TRACE:
178 return (true);
179 default:
180 break;
183 return (false);
187 * http_method
189 * Returns the method string for the given method.
192 static char *
193 http_method(int method)
195 if (method < sizeof (g_method_strings) / sizeof (g_method_strings[0]))
196 return ((char *)(g_method_strings[method]));
197 else
198 return ((char *)(g_method_strings[0]));
201 /* sMonth: Return short month string */
203 static const char *
204 sMonth(int index)
206 return (sMonthStr[index]);
210 * Debug formatting routine. Returns a character string representation of the
211 * addr in buf, of the form xxx.xxx.xxx.xxx. This routine takes the address
212 * as a pointer. The "xxx" parts including left zero padding so the final
213 * string will fit easily in tables. It would be nice to take a padding
214 * length argument instead.
217 static char *
218 ip_dot_saddr(uchar_t *addr, char *buf)
220 (void) sprintf(buf, "%03d.%03d.%03d.%03d",
221 addr[0] & 0xFF, addr[1] & 0xFF, addr[2] & 0xFF, addr[3] & 0xFF);
222 return (buf);
226 * Debug formatting routine. Returns a character string representation of the
227 * addr in buf, of the form xxx.xxx.xxx.xxx. This routine takes the address
228 * in the form of a ipaddr_t and calls ip_dot_saddr with a pointer.
231 static char *
232 ip_dot_addr(ipaddr_t addr, char *buf)
234 return (ip_dot_saddr((uchar_t *)&addr, buf));
237 static int
238 http_clf_date(char *buf, int bufsize, time_t t)
240 struct tm local_time;
241 long time_zone_info;
242 char sign;
244 if (localtime_r(&t, &local_time) == NULL)
245 return (0);
247 if (g_start_time.tm_year > 0 &&
248 (local_time.tm_year < g_start_time.tm_year ||
249 (local_time.tm_year == g_start_time.tm_year &&
250 local_time.tm_mon < g_start_time.tm_mon ||
251 (local_time.tm_mon == g_start_time.tm_mon &&
252 local_time.tm_mday < g_start_time.tm_mday ||
253 (local_time.tm_mday == g_start_time.tm_mday &&
254 local_time.tm_hour < g_start_time.tm_hour ||
255 (local_time.tm_hour == g_start_time.tm_hour &&
256 local_time.tm_min < g_start_time.tm_min ||
257 (local_time.tm_min == g_start_time.tm_min &&
258 local_time.tm_sec < g_start_time.tm_sec))))))) {
259 /* clf record before the specified start time */
260 return (1);
263 if (local_time.tm_isdst)
264 time_zone_info = -timezone + SEC_PER_HOUR;
265 else
266 time_zone_info = -timezone;
268 if (time_zone_info < 0) {
269 sign = '-';
270 time_zone_info = -time_zone_info;
271 } else {
272 sign = '+';
275 (void) snprintf(buf, bufsize,
276 "[%02d/%s/%04d:%02d:%02d:%02d %c%02ld%02ld]",
277 local_time.tm_mday, sMonth(local_time.tm_mon),
278 1900 + local_time.tm_year, local_time.tm_hour,
279 local_time.tm_min, local_time.tm_sec,
280 sign, time_zone_info / SEC_PER_HOUR,
281 time_zone_info % SEC_PER_HOUR);
283 return (0);
287 * xmalloc(size)
288 * Abort if malloc fails
291 static void *
292 xmalloc(size_t size)
294 void *p;
296 if (! size)
297 size = 1;
299 if ((p = malloc(size)) == NULL) {
300 syslog(LOG_ERR, gettext("Error: ncab2clf: Out of memory\n"));
301 abort();
304 return (p);
308 * xstrdup(string)
309 * duplicate string
312 static char *
313 xstrdup(const char *string)
315 char *new_string;
317 if (string) {
318 new_string = xmalloc(strlen(string) + 1);
319 (void) strcpy(new_string, string);
321 return (new_string);
324 return (NULL);
327 static void
328 usage(void)
330 (void) fprintf(stderr, gettext(
331 "\nncab2clf [-Dhv] [-b <block-size>] [-i <binary-log-file>] "
332 "[-n <n>]\n"
333 " [-o <output-file>] [-s <date/time>]\n"
334 "\tconverts a NCA binary log file to HTTP CLF"
335 " (Common Log Format)\n\n"
336 "\t-b <block-size>\n"
337 "\t\tinput file blocking size in KB\n"
338 "\t\t- default is 64K bytes\n"
339 "\t-D\tdisable directio on <output-file-name>\n"
340 "\t-h\tthis usage message\n"
341 "\t-i <binary-log-file>\n"
342 "\t\tspecify input file\n"
343 "\t-n <n>\n"
344 "\t\toutput <n> CLF records\n"
345 "\t-o <output-file>\n"
346 "\t\tspecify output file\n"
347 "\t-s <date/time>\n"
348 "\t\tskip any records before <date/time>\n"
349 "\t\t- <date/time> may be in CLF format\n"
350 "\t\t- <date/time> may be in time format as specified "
351 "by touch(1)\n"
352 "\t-v\tverbose output\n"
353 "\tNote: if no <output-file> - output goes to standard output\n"
354 "\tNote: if no <binary-log-file> - input is taken from standard "
355 "input\n"));
357 exit(3);
361 * atoi_for2(p, value)
362 * - stores the numerical value of the two digit string p into value
363 * - return TRUE upon success and FALSE upon failure
366 static int
367 atoi_for2(char *p, int *value)
370 *value = (*p - '0') * 10 + *(p+1) - '0';
371 if ((*value < 0) || (*value > 99))
372 return (FALSE);
373 return (TRUE);
377 * parse_time(t, tm)
378 * - parses the string t to retrieve the UNIX time format as specified by
379 * touch(1).
380 * - return TRUE upon success and FALSE upon failure
383 static int
384 parse_time(char *t, struct tm *tm)
386 int century = 0;
387 int seconds = 0;
388 time_t when;
389 char *p;
392 * time in the following format (defined by the touch(1) spec):
393 * [[CC]YY]MMDDhhmm[.SS]
395 if ((p = strchr(t, '.')) != NULL) {
396 if (strchr(p+1, '.') != NULL)
397 return (FALSE);
398 if (!atoi_for2(p+1, &seconds))
399 return (FALSE);
400 *p = '\0';
403 when = time(0);
404 bzero(tm, sizeof (struct tm));
405 tm->tm_year = localtime(&when)->tm_year;
407 switch (strlen(t)) {
408 case 12: /* CCYYMMDDhhmm */
409 if (!atoi_for2(t, &century))
410 return (FALSE);
411 t += 2;
412 /* FALLTHROUGH */
413 case 10: /* YYMMDDhhmm */
414 if (!atoi_for2(t, &tm->tm_year))
415 return (FALSE);
416 t += 2;
417 if (century == 0) {
418 if (tm->tm_year < 69)
419 tm->tm_year += 100;
420 } else
421 tm->tm_year += (century - 19) * 100;
422 /* FALLTHROUGH */
423 case 8: /* MMDDhhmm */
424 if (!atoi_for2(t, &tm->tm_mon))
425 return (FALSE);
426 tm->tm_mon--;
427 t += 2;
429 if (!atoi_for2(t, &tm->tm_mday))
430 return (FALSE);
431 t += 2;
433 if (!atoi_for2(t, &tm->tm_hour))
434 return (FALSE);
435 t += 2;
437 if (!atoi_for2(t, &tm->tm_min))
438 return (FALSE);
440 tm->tm_sec = seconds;
441 break;
442 default:
443 return (FALSE);
446 return (TRUE);
449 static void
450 close_files(int ifd, int ofd)
452 if (ifd != STDIN_FILENO)
453 (void) close(ifd);
455 if (ofd != STDOUT_FILENO)
456 (void) close(ofd);
460 * Read the requested number of bytes from the given file descriptor
463 static ssize_t
464 read_n_bytes(int fd, char *buf, ssize_t bufsize)
466 ssize_t num_to_read = bufsize;
467 ssize_t num_already_read = 0;
468 ssize_t i;
470 while (num_to_read > 0) {
472 i = read(fd, &(buf[num_already_read]), num_to_read);
473 if (i < 0) {
474 if (errno == EINTR)
475 continue;
476 else
477 (void) fprintf(stderr, gettext(
478 "Error: ncab2clf: "
479 "reading input file: %s\n"),
480 strerror(errno));
481 return (-1); /* some wierd interrupt */
484 if (i == 0)
485 break;
487 num_already_read += i;
488 num_to_read -= i;
491 return (num_already_read);
495 * Write the requested number of bytes to the given file descriptor
498 static ssize_t
499 write_n_bytes(int fd, char *buf, ssize_t bufsize)
501 ssize_t num_to_write = bufsize;
502 ssize_t num_written = 0;
503 ssize_t i;
505 while (num_to_write > 0) {
507 i = write(fd, &(buf[num_written]), num_to_write);
508 if (i < 0) {
509 if (errno == EINTR)
510 continue;
511 else
512 (void) fprintf(stderr, gettext(
513 "Error: ncab2clf: "
514 "writing output file: %s\n"),
515 strerror(errno));
516 return (-1); /* some wierd interrupt */
519 num_written += i;
520 num_to_write -= i;
523 return (num_written);
526 /* do constraint checks and determine if it's a valid header */
528 static bool
529 is_valid_header(void *ibuf)
531 nca_log_buf_hdr_t *h;
532 nca_log_stat_t *s;
534 h = (nca_log_buf_hdr_t *)ibuf;
536 /* Do some validity checks on ibuf */
538 if (((h->nca_loghdr).nca_version != NCA_LOG_VERSION1) ||
539 ((h->nca_loghdr).nca_op != log_op)) {
540 return (false);
543 s = &(h->nca_logstats);
545 if (g_n_log_upcall == 0) {
546 g_n_log_upcall = s->n_log_upcall;
547 } else {
548 if ((++g_n_log_upcall) != (ssize_t)s->n_log_upcall) {
549 (void) fprintf(stderr, gettext(
550 "Warning: ncab2clf:"
551 " expected record number (%d) is"
552 " different from the one seen (%d)\n."
553 " Resetting the expected record"
554 " number.\n"), g_n_log_upcall, s->n_log_upcall);
556 g_n_log_upcall = s->n_log_upcall;
560 return (true);
563 /* convert input binary buffer into CLF */
565 static int
566 b2clf_buf(
567 void *ibuf,
568 char *obuf,
569 ssize_t isize,
570 ssize_t osize,
571 ssize_t *out_size)
573 nca_log_buf_hdr_t *h;
574 nca_log_stat_t *s;
575 nca_request_log_t *r;
577 char *br;
578 void *er;
579 char ip_buf[64];
580 ssize_t max_input_size, num_bytes_read;
581 int n_recs;
582 bool error_seen;
584 ssize_t count;
585 char clf_timebuf[CLF_DATE_BUF_LENGTH];
586 char *method;
587 char *http_version_string;
588 char *ruser;
589 char *req_url;
590 char *remote_ip;
592 h = (nca_log_buf_hdr_t *)ibuf;
593 s = &(h->nca_logstats);
594 r = (nca_request_log_t *)(&(h[1]));
596 /* OK, it's a valid buffer which we can use, go ahead and convert it */
598 max_input_size = (ssize_t)isize - sizeof (nca_log_buf_hdr_t);
600 *out_size = 0;
601 error_seen = false;
602 num_bytes_read = 0;
603 for (n_recs = 0; n_recs < s->n_log_recs; n_recs++) {
605 /* Make sure there is enough space in the output buffer */
607 if ((*out_size >= osize) ||
608 (num_bytes_read >= max_input_size)) {
609 error_seen = true;
610 break;
613 if (http_clf_date(clf_timebuf, sizeof (clf_timebuf),
614 ((time_t)r->start_process_time))) {
615 /* A start time was speced and we're not there yet */
616 ++g_skip_count;
617 goto skip;
620 /* Only logs valid HTTP ops */
622 if ((! valid_method((int)r->method)) ||
623 (! valid_version((int)r->version))) {
624 ++g_invalid_count;
625 goto skip;
628 method = http_method((int)r->method);
629 http_version_string = http_version((int)r->version);
631 remote_ip = ip_dot_addr(r->remote_host, (char *)&ip_buf);
632 if (r->remote_user_len) {
633 ruser = NCA_REQLOG_RDATA(r, remote_user);
634 } else {
635 ruser = "-";
638 if (r->request_url_len) {
639 req_url = NCA_REQLOG_RDATA(r, request_url);
640 } else {
641 req_url = "UNKNOWN";
644 count = (ssize_t)snprintf(&(obuf[*out_size]), osize - *out_size,
645 "%s %s %s %s \"%s %s %s\" %d %d\n",
646 ((remote_ip) ? remote_ip : "-"),
647 /* should be remote_log_name */
648 "-",
649 ruser,
650 clf_timebuf,
651 method,
652 req_url,
653 http_version_string,
654 r->response_status,
655 r->response_len);
657 *out_size += count;
658 skip:
659 br = (char *)r;
660 er = ((char *)r) + NCA_LOG_REC_SIZE(r);
662 /*LINTED*/
663 r = (nca_request_log_t *)NCA_LOG_ALIGN(er);
664 num_bytes_read += (ssize_t)(((char *)r) - br);
665 if (g_out_records > 0 && --g_out_records == 0)
666 break;
669 if (error_seen) {
670 (void) fprintf(stderr, gettext(
671 "Error: ncab2clf: Input buffer not fully converted.\n"));
673 if (n_recs != s->n_log_recs)
674 (void) fprintf(stderr, gettext(
675 "Warning: ncab2clf: "
676 "Converted only %d of %d records\n"),
677 n_recs, s->n_log_recs);
680 return (0);
683 static int
684 b2clf(int ifd, int ofd)
686 char *ibuf;
687 char *obuf;
688 bool error_seen;
689 bool eof_seen;
690 ssize_t num_iterations, ni, nh, no, olen;
692 nca_log_buf_hdr_t *h;
693 nca_log_stat_t *s;
695 ibuf = xmalloc(g_infile_blk_size);
696 obuf = xmalloc(OUTFILE_BUF_SIZE);
697 error_seen = false;
699 eof_seen = false;
700 num_iterations = 0;
701 while (! eof_seen && g_out_records != 0) {
702 ++num_iterations;
704 nh = ni = no = 0;
706 /* read the binary header first */
707 nh = read_n_bytes(ifd, ibuf, sizeof (nca_log_buf_hdr_t));
708 if (nh != sizeof (nca_log_buf_hdr_t)) {
709 eof_seen = true;
710 break;
713 if (! is_valid_header(ibuf)) {
714 (void) fprintf(stderr, gettext(
715 "Error: ncab2clf: "
716 "Can't convert the input data to CLF\n"));
717 continue;
720 /* read the data to be converted */
721 /* LINTED */
722 h = (nca_log_buf_hdr_t *)ibuf;
723 s = &(h->nca_logstats);
725 if (s->n_log_size == 0)
726 continue;
728 ni = read_n_bytes(ifd, &(ibuf[nh]), (ssize_t)s->n_log_size);
729 if (ni < 0) {
730 error_seen = true;
731 break;
732 } else if (ni < (ssize_t)s->n_log_size) {
733 eof_seen = true;
736 if (ni == 0)
737 break;
739 /* convert binary input into text output */
741 if (b2clf_buf(ibuf, obuf, ni + nh, OUTFILE_BUF_SIZE, &olen)) {
742 (void) fprintf(stderr, gettext(
743 "Error: ncab2clf: "
744 "Can't convert the input data to CLF\n"));
745 error_seen = true;
746 break;
749 /* write out the text data */
750 no = write_n_bytes(ofd, obuf, olen);
751 if (no != olen) {
752 error_seen = true;
753 break;
756 bzero(ibuf, nh + ni);
757 bzero(obuf, no);
760 free(ibuf);
761 free(obuf);
763 if (error_seen)
764 return (-1);
766 return (0);
771 main(int argc, char **argv)
773 int c;
774 int ifd; /* input fd - binary log file */
775 int ofd;
776 struct tm t;
778 char *infile = NULL; /* input file name */
779 char *outfile = NULL; /* output file name */
781 char monstr[64];
783 (void) setlocale(LC_ALL, "");
785 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
786 #define TEXT_DOMAIN "SYS_TEST"
787 #endif
789 (void) textdomain(TEXT_DOMAIN);
791 /* parse any arguments */
792 while ((c = getopt(argc, argv, "hvDi:o:b:n:s:")) != EOF) {
793 switch (c) {
794 case 'h':
795 usage();
796 break;
797 case 'i':
798 infile = xstrdup(optarg);
799 break;
800 case 'D':
801 g_enable_directio = false;
802 break;
803 case 'o':
804 outfile = xstrdup(optarg);
805 break;
806 case 'b':
807 g_infile_blk_size = (KILO_BYTE * atoi(optarg));
808 break;
809 case 'n':
810 g_out_records = atoi(optarg);
811 break;
812 case 's':
813 g_start_time_str = strdup(optarg);
814 bzero(&t, sizeof (t));
815 if (sscanf(optarg, "%d/%3s/%d:%d:%d:%d", &t.tm_mday,
816 &monstr[0], &t.tm_year, &t.tm_hour, &t.tm_min,
817 &t.tm_sec) == 6) {
818 /* Valid CLF time (e.g. 06/Apr/2001:09:14:14) */
819 t.tm_mon = 0;
820 do {
821 if (strcasecmp(monstr,
822 sMonthStr[t.tm_mon]) == 0)
823 break;
824 } while (t.tm_mon++ < 12);
825 t.tm_year -= 1900;
826 g_start_time = t;
827 } else if (parse_time(optarg, &t)) {
828 g_start_time = t;
829 } else {
830 (void) fprintf(stderr,
831 gettext("Error: ncab2clf:"
832 " %s: unrecognized date/time.\n"),
833 optarg);
835 break;
836 case 'v':
837 (void) fprintf(stderr, gettext("Error: ncab2clf: "
838 "verbose functionality not yet supported\n"));
839 exit(3);
840 break;
841 case '?':
842 usage();
843 break;
847 /* set up the input stream */
849 if (infile) {
851 if ((ifd = open(infile, O_RDONLY)) < 0) {
852 (void) fprintf(stderr,
853 gettext("Error: ncab2clf: "
854 "Failure to open binary log file %s: %s\n"),
855 infile, strerror(errno));
856 exit(1);
859 } else {
860 ifd = STDIN_FILENO;
863 /* set up the output stream */
865 if (outfile) {
867 if ((ofd = open(outfile, O_WRONLY|O_CREAT, 0644)) < 0) {
868 (void) fprintf(stderr, gettext(
869 "Error: ncab2clf: "
870 "Failure to open output file %s: %s\n"),
871 outfile, strerror(errno));
872 exit(1);
875 /* Enable directio on output stream if specified */
877 if (g_enable_directio)
878 (void) directio(ofd, DIRECTIO_ON);
880 } else {
881 ofd = STDOUT_FILENO;
884 if ((b2clf(ifd, ofd) != 0)) {
885 close_files(ifd, ofd);
886 exit(2);
889 close_files(ifd, ofd);
891 if (g_invalid_count) {
892 (void) fprintf(stderr, gettext("Warning: ncab2clf: %d"
893 " number of invalid log records encountered in binary input"
894 " file were skipped\n"), g_invalid_count);
896 if (g_skip_count) {
897 (void) fprintf(stderr, gettext("Warning: ncab2clf:"
898 " %d log records in binary input file before %s"
899 " were skipped\n"),
900 g_skip_count, g_start_time_str);
903 return (0);