updated on Thu Jan 26 16:09:46 UTC 2012
[aur-mirror.git] / gooscan / gooscan.c
blob1e565ca8409854a2dc852cb4f45c46782c527649
1 /* V1.0 Security Patches and upgrades by Mike Schiraldi */
3 #include <stdio.h>
4 #include <time.h>
5 #include <stdlib.h>
6 #include <string.h>
7 #include <getopt.h>
8 #include <unistd.h>
9 #include <sys/types.h>
10 #include <sys/socket.h>
11 #include <netinet/in.h>
12 #include <netdb.h>
13 #include <signal.h>
15 char *type = NULL;
16 char *description = NULL;
17 char *google_server = NULL;
18 char *proxyaddr = NULL;
19 char recvbuf[65000];
20 char pretty_query[1024];
21 char mid_query[1024];
22 char out_query[1024];
23 char *site = NULL;
24 char total_string[256];
25 char *xtra_stuff = NULL;
27 int aflag = 0;
28 int dflag = 0;
29 int oflag = 0;
30 int pflag = 0;
31 int sflag = 0;
32 int vflag = 0;
33 int xflag = 0;
34 int total = 0;
35 int cooked = 0;
36 int proxyport = 0;
37 long results = 0;
39 struct hostent *he;
40 struct hostent *pr;
42 long google_query(char *gooflag, char *type, char *value );
43 void writeit(FILE *fp, char *out_query, char *mid_query, int results, char *flag);
44 long parse_results(char *recvbuf);
45 char* inet_convert(char *inbound, int dflag);
46 int inet_send(char *sendbuf);
47 int usage();
49 int main(int argc, char *argv[])
52 extern char *description;
53 extern char *proxyaddr;
54 extern char *google_server;
55 extern char pretty_query[1024];
56 extern char mid_query[1024];
57 extern char out_query[1024];
58 extern char *site;
59 extern char *xtra_stuff;
60 extern char total_string[256];
61 extern int dflag,oflag,sflag,vflag,xflag;
62 extern int total;
63 extern int cooked;
64 extern int proxyport;
65 extern long results;
67 char *in_query = NULL;
68 char list[1024];
69 char *proxy = NULL;
70 char *in_filename = NULL;
71 char *out_filename = NULL;
72 char *type,*value,*build_buf;
74 int c;
75 int fflag = 0, qflag = 0;
76 int indexofflag = 0;
77 int in_count=0;
79 FILE *scanlist;
80 FILE *outfile;
82 time_t timer;
84 build_buf=calloc(256, sizeof(char));
86 if (argc == 1) usage();
88 while ((c = getopt(argc,argv,"acdhvi:o:p:q:s:t:x:")) != -1) {
89 switch (c) {
90 case 'd': //decode flag
91 dflag=1;
92 break;
93 case 'p': // proxy server
94 if ( (proxyaddr=calloc(256, sizeof(char))) == NULL)
96 printf("Error allocating memory for proxyaddr. Aborting.\n");
97 exit(1);
99 if (strstr(optarg,":") == NULL) {
100 printf("Error: Proxy address must be like address:port.\n");
101 usage();
103 pflag=1;
104 proxy=optarg;
105 snprintf(proxyaddr,256,"%s",strtok(proxy,":"));
106 proxyport=atoi(strtok(NULL,"\0"));
107 //printf("%s|%d",proxyaddr,proxyport);
108 break;
109 case 'q': // inline query
110 qflag=1;
111 in_query=optarg;
112 break;
113 case 'i': // input query file
114 fflag=1;
115 in_filename=optarg;
116 break;
117 case 'o': // output html file
118 oflag=1;
119 out_filename=optarg;
120 break;
121 case 's': // site variable
122 sflag=1;
123 site=optarg;
124 break;
125 case 't': // target_server
126 if ( (google_server=calloc(256, sizeof(char))) == NULL)
128 printf("Error allocating memory for google_server. Aborting.\n");
129 exit(1);
131 google_server=optarg;
132 break;
133 case 'v': // verbose
134 vflag=1;
135 break;
136 case 'x': // extra stuff for appliances
137 xflag=1;
138 xtra_stuff=optarg;
139 break;
140 case 'h': //help
141 usage();
142 default:
143 usage();
147 //clean up the server, if necessary
148 if (strstr(google_server,"http://") != NULL)
149 strncpy(google_server,google_server+7,256);
151 // A bunch of errors for mixed-up user parameters
152 if (strlen(google_server) == 0) {
153 printf("You must provide the google server with the -t argument!\n");
154 usage();
157 if (strstr(google_server,"www.google.com") != NULL) {
158 printf("\n***!!! WARNING: You are querying a www.google.com server !!!***\n");
159 printf("This tool was designed to query Google appliances, not the google.com website.\n");
160 printf("The google.com scanning functionality is included for EDUCATIONAL PURPOSES ONLY \n");
161 printf("to help webmasters determine the potential Google exposure of their sites.\n");
162 printf("\nDo you acknowledge that: \n");
163 printf(" - You are knowingly violating Google's terms of service found at\n");
164 printf(" http://www.google.com/terms_of_service.html\n");
165 printf(" - You are using this tool to assess your own web site's exposure\n");
166 printf(" - The use of this tool in this way is not condoned by the author\n");
167 printf(" - You will not hold the author liable in any way for the use of this tool\n\n");
168 printf("Agree? (y/n) [n] ");
169 if ( getchar() != 'y' ) exit(1);
172 if (!qflag && !fflag) {
173 printf("Error: supply either -q or -f flag.\n");
174 usage();
176 if (qflag && fflag) {
177 printf("Error: -q and -f should not be used together.\n");
178 printf("-q allows for a single Google query from the command line,\n");
179 printf("-f allows for multiple Google queries read from a file.\n");
180 usage();
183 // OK, the user wants output. Open the file, write the header.
184 if (oflag) {
185 if (( outfile = fopen(out_filename,"a")) == NULL) {
186 perror(out_filename);
187 exit(1);
189 writeit(outfile,"","<br><CENTER><b>Gooscan Results</b></CENTER><br>",0,"raw");
190 snprintf(build_buf,256,"<center>site: %s<br>input file: %s<br>",\
191 sflag?site:"none" , fflag?in_filename:"none");
192 writeit(outfile," ",build_buf,0,"raw");
193 timer=time(NULL);
194 snprintf(build_buf,256,"Executed: %s</CENTER><br>",asctime(localtime(&timer)));
195 writeit(outfile," ",build_buf,0,"raw");
196 writeit(outfile,NULL,NULL,0,"header");
199 // OK, user wants one query. Run it, write it, close the outfile, exit.
200 if (qflag) {
201 results=google_query("raw","raw",in_query);
202 printf("\"%s\" returned %ld results.\n",in_query,results);
203 if (oflag) writeit(outfile,out_query,pretty_query,results,"line");
206 // Here's the fun stuff. Queries from an infile.
207 else if (fflag) {
209 if (sflag) printf("Searching google for site %s...\n",site);
211 //fopen the infile
212 if (fflag)
213 if (( scanlist = fopen(in_filename,"r")) == NULL) {
214 perror(in_filename);
215 exit(1);
219 // If we're searching a site, do a generic index.of.
220 // If that fails, never try index.of search again.
221 if (sflag) {
222 results=(google_query("raw","raw","intitle:index.of"));
223 if ( results == 0) {
224 indexofflag = 0;
225 printf("Generic index.of search returned no results.\n");
226 printf("Skipping index.of tests.\n");
228 else {
229 indexofflag = 1;
230 printf("Generic index.of search returned %ld results\n",results);
233 else indexofflag = 1;
235 //The main file reading loop
236 while ( fgets(list,1024,scanlist) != NULL) {
237 total =0;
238 cooked=0;
239 memset(total_string,'\0',256);
240 memset(mid_query,'\0',256);
241 memset(out_query,'\0',256);
243 if ((type=calloc(48, sizeof(char))) == NULL)
245 printf("Error allocating memory for type. Aborting.\n");
246 exit(1);
248 if ((value=calloc(200, sizeof(char))) == NULL)
250 printf("Error allocating memory for value. Aborting.\n");
251 exit(1);
253 if ((description=calloc(2048, sizeof(char))) == NULL)
255 printf("Error allocating memory for description. Aborting.\n");
256 exit(1);
259 snprintf(type,48,"%s",strtok(list,"|"));
260 snprintf(value,200,"%s",strtok(NULL,"|"));
261 in_count=atoi(strtok(NULL,"|"));
262 //printf("type:%s,value:%s:count:%d,description:%s\n",type,value,in_count,description);
263 if ( type != NULL && value != NULL) {
264 if ( strcmp(type,"cooked") == 0) cooked=1;
265 // for "file" lines, we do inurl and indexof searches
266 if ( strcmp(type,"file") == 0 ) {
267 if (indexofflag == 1) {
268 results=google_query("indexof",type,value);
269 if (oflag) writeit(outfile,out_query,pretty_query,results,"line");
271 //inurl: test
272 results=google_query("inurl",type,value);
273 if (oflag) writeit(outfile,out_query,pretty_query,results,"line");
275 if (results >0 || vflag) printf("%sTotal:%d\n",total_string,total);
277 else {
278 results=google_query(type,"Results",value);
279 if (results>=0) {
280 if (oflag) writeit(outfile,out_query,pretty_query,results,"line");
281 if (results >0 || vflag) printf("%s\n",total_string);
283 else if (vflag) printf("Unknown type in data file (%s). Skipping...\n",type);
285 free(description);
286 free(type);
287 free(value);
288 //printf("mid_q:%s\n",mid_query);
291 fclose(scanlist);
294 if (oflag) {
295 writeit(outfile,NULL,NULL,0,"footer");
296 fclose(outfile);
299 //printf("out_query: %s\n",out_query);
300 return 0;
303 long google_query(char *gooflag, char *type, char *value ) {
304 extern char pretty_query[1024]; //a human-readable version
305 extern char mid_query[1024];//a version for the web server
306 extern char *site;
307 extern char *xtra_stuff;
308 extern char total_string[256];
309 extern int sflag;
310 extern int vflag;
311 extern long results;
313 char scratch[1024];
315 if (strlen(total_string) ==0) {
316 //printf("null total string");
317 sprintf(total_string,"%s:%s",type,value);
321 if (vflag) printf("(verbose) command line query => %s\n",value);
323 if (strcmp(gooflag,"indexof") ==0 ) {
324 sprintf(pretty_query, "intitle:index.of+%s",value);
326 else if (strcmp(gooflag,"inurl") ==0 ) {
327 sprintf(pretty_query, "inurl:%s",value);
329 else if (strcmp(gooflag,"intitle") ==0 ) {
330 sprintf(pretty_query, "intitle:%s",value);
332 else if (strcmp(gooflag,"raw") ==0 ) sprintf(pretty_query,"%s",value);
333 else if (strcmp(gooflag,"filetype") ==0) {
334 sprintf(pretty_query,"filetype:%s",value);
336 else return(-1);
338 // pretty_query will not have any of the below ugliness
339 sprintf(mid_query,"&q=%s",pretty_query);
341 if (sflag) {
342 sprintf(scratch,"%s+site:%s",mid_query,site);
343 strncpy(mid_query,scratch,1024);
344 if (vflag)
345 printf("(verbose) site added. query is now: %s\n",mid_query);
348 if (xflag) {
349 sprintf(scratch,"%s&%s",mid_query,xtra_stuff);
350 strncpy(mid_query,scratch,1024);
351 if (vflag)
352 printf("(verbose) extra stuff added. query is now: %s\n",mid_query);
355 if (vflag) printf("(verbose) Sending query => %s\n",mid_query);
358 if (inet_send(mid_query) == 1) {
359 printf("Error sending query.\n");
360 return(0);
362 else { results=parse_results(recvbuf); }
363 sprintf(scratch,"%s %s:%ld ",total_string,gooflag,results);
364 strncpy(total_string,scratch,256);
365 total+=results;
366 return(results);
370 void writeit(FILE *fp, char *out_query, char *mid_query, int results, char *flag) {
371 extern char *google_server;
372 if (strcmp(flag,"header") == 0) {
373 fprintf(fp,"<style>BODY {FONT-FAMILY: Verdana,Helvetica; FONT-SIZE: 12px}</style>");
374 fprintf(fp,"<CENTER><TABLE BORDER=1>\n");
375 fprintf(fp,"<TR><TD>Search</TD><TD>Link</TD><TD>Results</TD></TR>");
377 else if (strcmp(flag,"line") == 0) {
378 char * bold_start = (results == 0) ? "" : "<b>";
379 char * bold_end = (results == 0) ? "" : "</b>";
380 fprintf(fp,"<TR><TD>%s%s%s</TD><TD><A HREF=\"http://%s/search?%s\">link</A></TD>"
381 "<TD align=\"RIGHT\">%d</TD></TR>\n", bold_start,mid_query,bold_end,
382 google_server,inet_convert(out_query,1),results);
384 else if (strcmp(flag,"footer") == 0) {
385 fprintf(fp,"</TABLE>\n");
386 fprintf(fp,"<BR>gooscan by j0hnny<br><A HREF=\"http://johnny.ihackstuff.com\">http://johnny.ihackstuff.com</A><BR>");
388 else if (strcmp(flag,"raw") == 0) fprintf(fp,"%s",mid_query);
390 //fflush(fp);
395 //Pull apart Google's HTML to find this string:
396 // "Results <b>1</b> - <b>10</b> of <b>1000</b>."
397 // (numbers and [of|of about] may vary) This means we got hits.
398 long parse_results(char *recvbuf) {
399 extern int oflag,vflag;
400 char *errorLine;
401 char *ptr;
402 char *buf;
403 char copy[25];
404 int i =0, j =0;
406 errorLine=calloc(1024, sizeof(char));
408 // Grab the first line from the web server
409 strncat(errorLine,recvbuf,(strcspn(recvbuf,"\n")-1));
411 if (vflag)
412 printf("(verbose) Status line from server: \"%s\"\n",errorLine);
414 if (strstr(errorLine,"200") == NULL) {
415 printf("Error! Received \"%s\" from server!\n",errorLine);
416 return(0);
419 ptr=strstr(recvbuf,"Results <b>");
421 if (ptr != NULL) {
422 // Find the actual number of results.
423 // Follow </b> tags, skip the "of" or "of about", then
424 // read between the > and < for the number. gay, i know.
425 buf=strtok(ptr,"Results <b>");
426 buf=strtok(NULL," </b>");
427 buf=strtok(NULL," </b>");
428 buf=strtok(NULL,">");
429 buf=strtok(NULL,">");
430 buf=strtok(NULL,"<");
431 //printf("%s returned %s results.\n",mid_query,buf);
432 if (buf != 0) {
433 j=0;
434 for (i=0;i<strlen(buf);i++) {
435 if (buf[i] != ',') {
436 copy[j]=buf[i];
437 j++;
440 copy[j++]='\0';
441 return(atol(copy));
442 } else return(0);
444 else {
445 ptr=strstr(recvbuf,"of about <b>");
446 if (ptr != NULL) {
447 buf=strtok(ptr,"of about <b>");
448 //printf("|%s|",buf);
449 return(atol(buf));
452 return(0);
459 // Convert a string (the query) to modified web-hex (! becomes %25, etc)
460 // oddities: (space) becomes +
461 // + stays +
462 // (alpha) stays (alpha)
464 char* inet_convert(char *inbound,int dflag) {
466 int i;
467 extern int vflag;
468 char *outbound;
469 char *scratch;
470 outbound=calloc(1024, sizeof(char));
471 scratch=calloc(1024, sizeof(char));
473 for ( i=0;i<strlen(inbound);i++){
474 //if it's a space or a plus, make it a plus
475 if (inbound[i] == ' '|| inbound[i] == '+')
476 sprintf(scratch,"%c",'+');
477 //if it's an alpha, keep it an alpha
478 else if (inbound[i] == '&' || inbound[i] == '=')
479 sprintf(scratch,"%c",inbound[i]);
480 else if ( (inbound[i] >= 65 && inbound[i] <= 90 ) ||
481 (inbound[i] >= 97 && inbound[i] <= 122) )
482 sprintf(scratch,"%c", inbound[i]);
483 //else sprintf(outbound,"%s%%%x",outbound,inbound[i]);
484 else {
485 // if the dflag is off, just print the character
486 if (dflag == 0) sprintf(scratch,"%c",inbound[i]);
487 // otherwise, print it as a hex character
488 if (dflag == 1) sprintf(scratch,"%%%x",inbound[i]);
490 strcat(outbound, scratch);
492 //strncpy(out_query, scratch, 1024);
493 if (vflag) printf("(verbose) post-converted string: \"%s\"\n",outbound);
494 //exit(1);
495 return(outbound);
500 //Format and send constructed string to Google
502 int inet_send(char *sendbuf) {
504 extern char *proxyaddr;
505 extern char *google_server;
506 extern char recvbuf[65000];
507 extern char mid_query[1024];
508 extern char out_query[1024];
509 extern int proxyport;
510 extern int cooked,vflag,pflag,dflag;
511 char *get_line;
512 int sock;
513 struct sockaddr_in http;
514 extern struct hostent *he;
516 if ( (get_line=calloc(1024, sizeof(char))) == NULL)
518 printf("Error allocating memory for get_line. Aborting.\n");
519 exit(1);
522 // if you pass in a "cooked" line, it's assumed that this
523 // is ready to send without any formatting
524 if(cooked)
525 strncpy(get_line,out_query,1024);
526 else
528 strncpy(out_query,inet_convert(sendbuf,dflag),1024);
529 if (!pflag)
530 sprintf(get_line,"GET /search?%s HTTP/1.0\n\n",out_query);
531 else
532 sprintf(get_line,"GET http://%s/search?%s HTTP/1.0\n\n",google_server,out_query);
535 if (vflag) printf("(verbose) GET Line => %s\n",get_line);
536 // if there's no resolved hostname...
537 if (he == NULL) {
538 printf("doing lookup of %s...\n",google_server);
539 // get one.
541 if (pflag) {
542 if ((he = gethostbyname(proxyaddr)) == NULL) {
543 perror("gethostbyname");
544 exit(1);
547 else {
548 if ((he = gethostbyname(google_server)) == NULL) {
549 printf("Host not found: %s\n",google_server);
550 exit(1);
555 if ((sock = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
556 perror("socket");
557 exit(1);
560 http.sin_family = AF_INET;
561 if (pflag) http.sin_port = htons(proxyport);
562 else http.sin_port = htons(80);
563 http.sin_addr = *((struct in_addr *) he->h_addr);
564 bzero(&(http.sin_zero), 8);
566 if (connect(sock, (struct sockaddr *) & http, sizeof(struct sockaddr)) == -1)
568 close(sock);
569 perror("connect");
570 return(1);
573 send(sock, get_line, strlen(get_line), 0);
574 recv(sock, recvbuf, sizeof(recvbuf), MSG_WAITALL);
575 close(sock);
577 return(0);
581 int usage() {
582 printf("gooscan <-q query | -i query_file> <-t target>\n");
583 printf(" [-o output_file] [-p proxy:port] [-v] [-d] \n");
584 printf(" [-s site] [-x xtra_appliance_fields]\n");
585 printf(" ----------------------------------------------------------------\n");
586 printf(" (query) is a standard google query (EX: \"intitle:index.of\")\n");
587 printf(" (query_file) is a list of google queries (see README)\n");
588 printf(" (target) is the Google appliance/server\n");
589 printf(" (output_file) is where the HTML-formatted list of results goes\n");
590 printf(" (proxy:port) address:port of a valid HTTP proxy for bouncing\n");
591 printf(" (site) restricts search to one domain, like microsoft.com\n");
592 printf(" (xtra_appliance_fields) are required for appliance scans\n");
593 printf(" -v turns on verbose mode\n");
594 printf(" -d hex-encodes all non-alpha characters\n");
595 printf("Friendly example: \n");
596 printf("gooscan -t google.fda.gov -q food \n");
597 printf(" -x \"&client=FDA&site=FDA&output=xml_no_dtd&oe=&lr=&proxystylesheet=FDA\"\n");
598 printf("Google terms-of-service violations:\n");
599 printf("gooscan -t www.google.com -q \"linux\" \n");
600 printf("gooscan -t www.google.com -q \"linux\" -s microsoft.com \n");
601 printf("gooscan -t www.google.com -f gdork.gs\n\n");
603 printf("Gooscan google scanner by j0hnny http://johnny.ihackstuff.com\n");
605 exit(1);