4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
29 * Converts binary log files to CLF (Common Log Format).
36 #include <sys/types.h>
53 #include "ncadoorhdr.h"
56 extern char *gettext();
58 typedef enum { /* Boolean type */
63 static const char *const
64 g_method_strings
[8] = {
75 /* Short month strings */
76 static const char * const sMonthStr
[12] = {
91 #define SEC_PER_MIN (60)
92 #define SEC_PER_HOUR (60*60)
93 #define SEC_PER_DAY (24*60*60)
94 #define SEC_PER_YEAR (365*24*60*60)
95 #define LEAP_TO_70 (70/4)
97 #define KILO_BYTE (1024)
98 #define MEGA_BYTE (KILO_BYTE * KILO_BYTE)
99 #define GIGA_BYTE (KILO_BYTE * MEGA_BYTE)
101 #define CLF_DATE_BUF_LENGTH (128)
102 #define OUTFILE_BUF_SIZE (256 * KILO_BYTE)
104 static bool g_enable_directio
= true;
105 static ssize_t g_invalid_count
= 0;
106 static ssize_t g_skip_count
= 0;
107 static char *g_start_time_str
= NULL
;
109 /* init value must match logd & NCA kmod */
110 static ssize_t g_n_log_upcall
= 0;
112 /* input binary file was written in 64k chunks by default */
113 static ssize_t g_infile_blk_size
= NCA_DEFAULT_LOG_BUF_SIZE
;
115 /* num of output records, by default infinite */
116 static ssize_t g_out_records
= -1;
118 /* start time for log output, default none (i.e. output all) */
119 static struct tm g_start_time
;
122 * http_version(version)
124 * Returns out the string of a given http version
128 http_version(int http_ver
)
135 ver_num
= "HTTP/0.9";
139 ver_num
= "HTTP/1.0";
142 ver_num
= "HTTP/1.1";
145 ver_num
= "HTTP/unknown";
152 valid_version(int http_ver
)
168 valid_method(int method
)
189 * Returns the method string for the given method.
193 http_method(int method
)
195 if (method
< sizeof (g_method_strings
) / sizeof (g_method_strings
[0]))
196 return ((char *)(g_method_strings
[method
]));
198 return ((char *)(g_method_strings
[0]));
201 /* sMonth: Return short month string */
206 return (sMonthStr
[index
]);
210 * Debug formatting routine. Returns a character string representation of the
211 * addr in buf, of the form xxx.xxx.xxx.xxx. This routine takes the address
212 * as a pointer. The "xxx" parts including left zero padding so the final
213 * string will fit easily in tables. It would be nice to take a padding
214 * length argument instead.
218 ip_dot_saddr(uchar_t
*addr
, char *buf
)
220 (void) sprintf(buf
, "%03d.%03d.%03d.%03d",
221 addr
[0] & 0xFF, addr
[1] & 0xFF, addr
[2] & 0xFF, addr
[3] & 0xFF);
226 * Debug formatting routine. Returns a character string representation of the
227 * addr in buf, of the form xxx.xxx.xxx.xxx. This routine takes the address
228 * in the form of a ipaddr_t and calls ip_dot_saddr with a pointer.
232 ip_dot_addr(ipaddr_t addr
, char *buf
)
234 return (ip_dot_saddr((uchar_t
*)&addr
, buf
));
238 http_clf_date(char *buf
, int bufsize
, time_t t
)
240 struct tm local_time
;
244 if (localtime_r(&t
, &local_time
) == NULL
)
247 if (g_start_time
.tm_year
> 0 &&
248 (local_time
.tm_year
< g_start_time
.tm_year
||
249 (local_time
.tm_year
== g_start_time
.tm_year
&&
250 local_time
.tm_mon
< g_start_time
.tm_mon
||
251 (local_time
.tm_mon
== g_start_time
.tm_mon
&&
252 local_time
.tm_mday
< g_start_time
.tm_mday
||
253 (local_time
.tm_mday
== g_start_time
.tm_mday
&&
254 local_time
.tm_hour
< g_start_time
.tm_hour
||
255 (local_time
.tm_hour
== g_start_time
.tm_hour
&&
256 local_time
.tm_min
< g_start_time
.tm_min
||
257 (local_time
.tm_min
== g_start_time
.tm_min
&&
258 local_time
.tm_sec
< g_start_time
.tm_sec
))))))) {
259 /* clf record before the specified start time */
263 if (local_time
.tm_isdst
)
264 time_zone_info
= -timezone
+ SEC_PER_HOUR
;
266 time_zone_info
= -timezone
;
268 if (time_zone_info
< 0) {
270 time_zone_info
= -time_zone_info
;
275 (void) snprintf(buf
, bufsize
,
276 "[%02d/%s/%04d:%02d:%02d:%02d %c%02ld%02ld]",
277 local_time
.tm_mday
, sMonth(local_time
.tm_mon
),
278 1900 + local_time
.tm_year
, local_time
.tm_hour
,
279 local_time
.tm_min
, local_time
.tm_sec
,
280 sign
, time_zone_info
/ SEC_PER_HOUR
,
281 time_zone_info
% SEC_PER_HOUR
);
288 * Abort if malloc fails
299 if ((p
= malloc(size
)) == NULL
) {
300 syslog(LOG_ERR
, gettext("Error: ncab2clf: Out of memory\n"));
313 xstrdup(const char *string
)
318 new_string
= xmalloc(strlen(string
) + 1);
319 (void) strcpy(new_string
, string
);
330 (void) fprintf(stderr
, gettext(
331 "\nncab2clf [-Dhv] [-b <block-size>] [-i <binary-log-file>] "
333 " [-o <output-file>] [-s <date/time>]\n"
334 "\tconverts a NCA binary log file to HTTP CLF"
335 " (Common Log Format)\n\n"
336 "\t-b <block-size>\n"
337 "\t\tinput file blocking size in KB\n"
338 "\t\t- default is 64K bytes\n"
339 "\t-D\tdisable directio on <output-file-name>\n"
340 "\t-h\tthis usage message\n"
341 "\t-i <binary-log-file>\n"
342 "\t\tspecify input file\n"
344 "\t\toutput <n> CLF records\n"
345 "\t-o <output-file>\n"
346 "\t\tspecify output file\n"
348 "\t\tskip any records before <date/time>\n"
349 "\t\t- <date/time> may be in CLF format\n"
350 "\t\t- <date/time> may be in time format as specified "
352 "\t-v\tverbose output\n"
353 "\tNote: if no <output-file> - output goes to standard output\n"
354 "\tNote: if no <binary-log-file> - input is taken from standard "
361 * atoi_for2(p, value)
362 * - stores the numerical value of the two digit string p into value
363 * - return TRUE upon success and FALSE upon failure
367 atoi_for2(char *p
, int *value
)
370 *value
= (*p
- '0') * 10 + *(p
+1) - '0';
371 if ((*value
< 0) || (*value
> 99))
378 * - parses the string t to retrieve the UNIX time format as specified by
380 * - return TRUE upon success and FALSE upon failure
384 parse_time(char *t
, struct tm
*tm
)
392 * time in the following format (defined by the touch(1) spec):
393 * [[CC]YY]MMDDhhmm[.SS]
395 if ((p
= strchr(t
, '.')) != NULL
) {
396 if (strchr(p
+1, '.') != NULL
)
398 if (!atoi_for2(p
+1, &seconds
))
404 bzero(tm
, sizeof (struct tm
));
405 tm
->tm_year
= localtime(&when
)->tm_year
;
408 case 12: /* CCYYMMDDhhmm */
409 if (!atoi_for2(t
, ¢ury
))
413 case 10: /* YYMMDDhhmm */
414 if (!atoi_for2(t
, &tm
->tm_year
))
418 if (tm
->tm_year
< 69)
421 tm
->tm_year
+= (century
- 19) * 100;
423 case 8: /* MMDDhhmm */
424 if (!atoi_for2(t
, &tm
->tm_mon
))
429 if (!atoi_for2(t
, &tm
->tm_mday
))
433 if (!atoi_for2(t
, &tm
->tm_hour
))
437 if (!atoi_for2(t
, &tm
->tm_min
))
440 tm
->tm_sec
= seconds
;
450 close_files(int ifd
, int ofd
)
452 if (ifd
!= STDIN_FILENO
)
455 if (ofd
!= STDOUT_FILENO
)
460 * Read the requested number of bytes from the given file descriptor
464 read_n_bytes(int fd
, char *buf
, ssize_t bufsize
)
466 ssize_t num_to_read
= bufsize
;
467 ssize_t num_already_read
= 0;
470 while (num_to_read
> 0) {
472 i
= read(fd
, &(buf
[num_already_read
]), num_to_read
);
477 (void) fprintf(stderr
, gettext(
479 "reading input file: %s\n"),
481 return (-1); /* some wierd interrupt */
487 num_already_read
+= i
;
491 return (num_already_read
);
495 * Write the requested number of bytes to the given file descriptor
499 write_n_bytes(int fd
, char *buf
, ssize_t bufsize
)
501 ssize_t num_to_write
= bufsize
;
502 ssize_t num_written
= 0;
505 while (num_to_write
> 0) {
507 i
= write(fd
, &(buf
[num_written
]), num_to_write
);
512 (void) fprintf(stderr
, gettext(
514 "writing output file: %s\n"),
516 return (-1); /* some wierd interrupt */
523 return (num_written
);
526 /* do constraint checks and determine if it's a valid header */
529 is_valid_header(void *ibuf
)
531 nca_log_buf_hdr_t
*h
;
534 h
= (nca_log_buf_hdr_t
*)ibuf
;
536 /* Do some validity checks on ibuf */
538 if (((h
->nca_loghdr
).nca_version
!= NCA_LOG_VERSION1
) ||
539 ((h
->nca_loghdr
).nca_op
!= log_op
)) {
543 s
= &(h
->nca_logstats
);
545 if (g_n_log_upcall
== 0) {
546 g_n_log_upcall
= s
->n_log_upcall
;
548 if ((++g_n_log_upcall
) != (ssize_t
)s
->n_log_upcall
) {
549 (void) fprintf(stderr
, gettext(
551 " expected record number (%d) is"
552 " different from the one seen (%d)\n."
553 " Resetting the expected record"
554 " number.\n"), g_n_log_upcall
, s
->n_log_upcall
);
556 g_n_log_upcall
= s
->n_log_upcall
;
563 /* convert input binary buffer into CLF */
573 nca_log_buf_hdr_t
*h
;
575 nca_request_log_t
*r
;
580 ssize_t max_input_size
, num_bytes_read
;
585 char clf_timebuf
[CLF_DATE_BUF_LENGTH
];
587 char *http_version_string
;
592 h
= (nca_log_buf_hdr_t
*)ibuf
;
593 s
= &(h
->nca_logstats
);
594 r
= (nca_request_log_t
*)(&(h
[1]));
596 /* OK, it's a valid buffer which we can use, go ahead and convert it */
598 max_input_size
= (ssize_t
)isize
- sizeof (nca_log_buf_hdr_t
);
603 for (n_recs
= 0; n_recs
< s
->n_log_recs
; n_recs
++) {
605 /* Make sure there is enough space in the output buffer */
607 if ((*out_size
>= osize
) ||
608 (num_bytes_read
>= max_input_size
)) {
613 if (http_clf_date(clf_timebuf
, sizeof (clf_timebuf
),
614 ((time_t)r
->start_process_time
))) {
615 /* A start time was speced and we're not there yet */
620 /* Only logs valid HTTP ops */
622 if ((! valid_method((int)r
->method
)) ||
623 (! valid_version((int)r
->version
))) {
628 method
= http_method((int)r
->method
);
629 http_version_string
= http_version((int)r
->version
);
631 remote_ip
= ip_dot_addr(r
->remote_host
, (char *)&ip_buf
);
632 if (r
->remote_user_len
) {
633 ruser
= NCA_REQLOG_RDATA(r
, remote_user
);
638 if (r
->request_url_len
) {
639 req_url
= NCA_REQLOG_RDATA(r
, request_url
);
644 count
= (ssize_t
)snprintf(&(obuf
[*out_size
]), osize
- *out_size
,
645 "%s %s %s %s \"%s %s %s\" %d %d\n",
646 ((remote_ip
) ? remote_ip
: "-"),
647 /* should be remote_log_name */
660 er
= ((char *)r
) + NCA_LOG_REC_SIZE(r
);
663 r
= (nca_request_log_t
*)NCA_LOG_ALIGN(er
);
664 num_bytes_read
+= (ssize_t
)(((char *)r
) - br
);
665 if (g_out_records
> 0 && --g_out_records
== 0)
670 (void) fprintf(stderr
, gettext(
671 "Error: ncab2clf: Input buffer not fully converted.\n"));
673 if (n_recs
!= s
->n_log_recs
)
674 (void) fprintf(stderr
, gettext(
675 "Warning: ncab2clf: "
676 "Converted only %d of %d records\n"),
677 n_recs
, s
->n_log_recs
);
684 b2clf(int ifd
, int ofd
)
690 ssize_t num_iterations
, ni
, nh
, no
, olen
;
692 nca_log_buf_hdr_t
*h
;
695 ibuf
= xmalloc(g_infile_blk_size
);
696 obuf
= xmalloc(OUTFILE_BUF_SIZE
);
701 while (! eof_seen
&& g_out_records
!= 0) {
706 /* read the binary header first */
707 nh
= read_n_bytes(ifd
, ibuf
, sizeof (nca_log_buf_hdr_t
));
708 if (nh
!= sizeof (nca_log_buf_hdr_t
)) {
713 if (! is_valid_header(ibuf
)) {
714 (void) fprintf(stderr
, gettext(
716 "Can't convert the input data to CLF\n"));
720 /* read the data to be converted */
722 h
= (nca_log_buf_hdr_t
*)ibuf
;
723 s
= &(h
->nca_logstats
);
725 if (s
->n_log_size
== 0)
728 ni
= read_n_bytes(ifd
, &(ibuf
[nh
]), (ssize_t
)s
->n_log_size
);
732 } else if (ni
< (ssize_t
)s
->n_log_size
) {
739 /* convert binary input into text output */
741 if (b2clf_buf(ibuf
, obuf
, ni
+ nh
, OUTFILE_BUF_SIZE
, &olen
)) {
742 (void) fprintf(stderr
, gettext(
744 "Can't convert the input data to CLF\n"));
749 /* write out the text data */
750 no
= write_n_bytes(ofd
, obuf
, olen
);
756 bzero(ibuf
, nh
+ ni
);
771 main(int argc
, char **argv
)
774 int ifd
; /* input fd - binary log file */
778 char *infile
= NULL
; /* input file name */
779 char *outfile
= NULL
; /* output file name */
783 (void) setlocale(LC_ALL
, "");
785 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
786 #define TEXT_DOMAIN "SYS_TEST"
789 (void) textdomain(TEXT_DOMAIN
);
791 /* parse any arguments */
792 while ((c
= getopt(argc
, argv
, "hvDi:o:b:n:s:")) != EOF
) {
798 infile
= xstrdup(optarg
);
801 g_enable_directio
= false;
804 outfile
= xstrdup(optarg
);
807 g_infile_blk_size
= (KILO_BYTE
* atoi(optarg
));
810 g_out_records
= atoi(optarg
);
813 g_start_time_str
= strdup(optarg
);
814 bzero(&t
, sizeof (t
));
815 if (sscanf(optarg
, "%d/%3s/%d:%d:%d:%d", &t
.tm_mday
,
816 &monstr
[0], &t
.tm_year
, &t
.tm_hour
, &t
.tm_min
,
818 /* Valid CLF time (e.g. 06/Apr/2001:09:14:14) */
821 if (strcasecmp(monstr
,
822 sMonthStr
[t
.tm_mon
]) == 0)
824 } while (t
.tm_mon
++ < 12);
827 } else if (parse_time(optarg
, &t
)) {
830 (void) fprintf(stderr
,
831 gettext("Error: ncab2clf:"
832 " %s: unrecognized date/time.\n"),
837 (void) fprintf(stderr
, gettext("Error: ncab2clf: "
838 "verbose functionality not yet supported\n"));
847 /* set up the input stream */
851 if ((ifd
= open(infile
, O_RDONLY
)) < 0) {
852 (void) fprintf(stderr
,
853 gettext("Error: ncab2clf: "
854 "Failure to open binary log file %s: %s\n"),
855 infile
, strerror(errno
));
863 /* set up the output stream */
867 if ((ofd
= open(outfile
, O_WRONLY
|O_CREAT
, 0644)) < 0) {
868 (void) fprintf(stderr
, gettext(
870 "Failure to open output file %s: %s\n"),
871 outfile
, strerror(errno
));
875 /* Enable directio on output stream if specified */
877 if (g_enable_directio
)
878 (void) directio(ofd
, DIRECTIO_ON
);
884 if ((b2clf(ifd
, ofd
) != 0)) {
885 close_files(ifd
, ofd
);
889 close_files(ifd
, ofd
);
891 if (g_invalid_count
) {
892 (void) fprintf(stderr
, gettext("Warning: ncab2clf: %d"
893 " number of invalid log records encountered in binary input"
894 " file were skipped\n"), g_invalid_count
);
897 (void) fprintf(stderr
, gettext("Warning: ncab2clf:"
898 " %d log records in binary input file before %s"
900 g_skip_count
, g_start_time_str
);