9 #include "MurmurHash3.h"
11 #include "2bitarray.h"
16 //KSEQ_INIT(gzFile, gzread)
19 const char *argp_program_version =
20 "readscorr 0.1 @"__TIME__ "," __DATE__;
21 const char *argp_program_bug_address =
22 "<huxuesong@genomics.org.cn>";
24 /* Program documentation. */
26 "a Solexa Reads Corrector using Bloom filter"
32 /* A description of the arguments we accept. */
33 static char args_doc[] = "input_list (of FASTA or FASTQ files, gzipped is OK)";
35 /* The options we understand. */
36 static struct argp_option options[] = {
37 {"interactive", 'i', 0, 0, "Prompt arguments before procedure"},
38 {"verbose", 'v', 0, 0, "Produce verbose output" },
39 {"quiet", 'q', 0, 0, "Don't produce any output" },
40 //{"silent", 's', 0, OPTION_ALIAS },
41 {"outprefix",'o', "./out",0, "Output to [./out.{dat,stat,log}]" },
42 {"kmersize", 'k', "21", 0, "K-mer size, must be odd number" },
43 {"bloomsize",'b', "256", 0, "Size in MiB for Bloom Filter" },
47 /* Used by main to communicate with parse_opt. */
49 char *args[1]; /* arg1 */
50 uint_fast8_t silent, verbose, interactive; //_Bool is the same under amd64, as typedef unsigned char uint_fast8_t;
51 int bloomsize, kmersize;
55 /* Parse a single option. */
57 parse_opt (int key, char *arg, struct argp_state *state) {
58 /* Get the input argument from argp_parse, which we
59 know is a pointer to our arguments structure. */
60 struct arguments *arguments = state->input;
65 arguments->silent = 1;
68 arguments->verbose = 1;
71 arguments->interactive = 1;
74 arguments->outprefix = arg;
77 tmpArgValue = atoi(arg);
78 if (tmpArgValue>2 && tmpArgValue%2) { // odd numbers only
79 arguments->kmersize = tmpArgValue;
81 errx(2,"-k \"%s\"=%i is not a positive odd number !",arg,tmpArgValue);
85 tmpArgValue = atoi(arg);
87 arguments->bloomsize = tmpArgValue;
89 errx(2,"-b \"%s\"=%i is not a positive number !",arg,tmpArgValue);
93 if (state->arg_num >= 1)
94 /* Too many arguments. */
97 arguments->args[state->arg_num] = arg;
102 if (state->arg_num < 1) /* Not enough arguments. */
107 return ARGP_ERR_UNKNOWN;
112 /* Our argp parser. */
113 static struct argp argp = { options, parse_opt, args_doc, doc };
116 int main (int argc, char **argv)
119 MurmurHash3_x64_128("test",4,123,tout);
121 struct arguments arguments;
124 arguments.silent = 0;
125 arguments.verbose = 0;
126 arguments.outprefix = "./out";
128 // Parse our arguments; every option seen by parse_opt will be reflected in arguments.
129 argp_parse (&argp, argc, argv, 0, 0, &arguments);
131 printf ("ARG1 = %s\nOutputPrefix = %s\n"
132 "VERBOSE = %s\nSILENT = %s\n",
135 arguments.verbose ? "yes" : "no",
136 arguments.silent ? "yes" : "no");
138 char * str=(char *) arguments.outprefix;
139 size_t len=strlen(str);
140 uint32_t key=atoi(arguments.args[0]);
141 MurmurHash3_x64_128(str,len,key,tout);
142 printf("Str:[%s] Seed:[%i] -> [%016lx %016lx]\n",str,key,tout[0],tout[1]);
148 fp = gzopen(arguments.args[0], "r");
150 while ((l = kseq_read(seq)) >= 0) {
151 printf("name: %s\n", seq->name.s);
152 if (seq->comment.l) printf("comment: %s\n", seq->comment.s);
153 printf("seq: %s\n", seq->seq.s);
154 if (seq->qual.l) printf("qual: %s\n", seq->qual.s);
156 printf("return value: %d\n", l);
165 int main (int argc, char **argv) {
166 struct arguments arguments;
169 arguments.silent = 0;
170 arguments.verbose = 0;
171 arguments.interactive = 0;
172 arguments.outprefix = "./out";
173 arguments.kmersize = 21;
174 arguments.bloomsize = 256;
176 // Parse our arguments; every option seen by parse_opt will be reflected in arguments.
177 argp_parse (&argp, argc, argv, 0, 0, &arguments);
179 size_t bloomLen = 512*1024*arguments.bloomsize;
180 if (arguments.interactive) {
181 printf ("ARG1 = %s\nOutputPrefix = %s\n"
182 "VERBOSE = %s\nSILENT = %s\n"
183 "kmersize = %i, bloomsize = %i MiB (%zu Bytes)\n",
186 arguments.verbose ? "yes" : "no",
187 arguments.silent ? "yes" : "no",
188 arguments.kmersize,arguments.bloomsize,bloomLen);
190 //printf("[%i]\n",pressAnyKey());
194 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
197 for (t=0;t<=65;t++) {
200 printf("%d -> %d\n",t,k);
208 fp = fopen(arguments.args[0], "r");
209 if (fp == NULL) err(EXIT_FAILURE, "Cannot open input_list [%s]", arguments.args[0]); //exit(EXIT_FAILURE);
211 fputs("\nFirst pass:\n", stderr);
212 DLeftArray_t *dleftp = dleft_arrayinit(9,27,1000,4);
213 fputs("DLA nfo: ", stderr);
214 fprintDLAnfo(stderr,dleftp);
215 while ((read = getline(&line, &len, fp)) != -1) {
217 if (*line=='\n') continue; // skip empty lines, which is definitely impossible
218 if (*(line+read-1)=='\n') *(line+(--read))='\0'; // line[read-1] = '\0';
219 fprintf(stderr, " <%s> ...", line);
220 //sleep(1); // the Call ...
221 SeqFileObj *seqobj = inSeqFinit(line,GFIOCHRBASE|GFIODIBBASE);
223 while ( (readlength = (*seqobj->getNextSeq)(seqobj) >= 0) ) {
225 printf("-ID:[%s,%s] %zu %zu\nSeq:[%s]\nQ:[%s] %zx\n",
226 seqobj->name,seqobj->comment,seqobj->readlength,seqobj->binMallocedQQWord,
227 seqobj->seq,seqobj->qual,(size_t)seqobj->seq);
230 for (i=0;i<seqobj->readlength/32;i++) {
231 printf("[%s]",tmpstr=unit2basechr(seqobj->diBseq[i]));
235 printf("[%s]\n",tmpstr=dib2basechr(seqobj->diBseq,seqobj->readlength));
237 uint64_t *revcomseq=dibrevcomp(seqobj->diBseq,seqobj->readlength);
238 printf("[%s]\n",tmpstr=dib2basechr(revcomseq,seqobj->readlength));
240 uint64_t *revcomseq2=dibrevcomp(revcomseq,seqobj->readlength);
241 printf("[%s]\n",tmpstr=dib2basechr(revcomseq2,seqobj->readlength));
245 fputs("\b\b\b\b, done !\n", stderr);
246 inSeqFdestroy(seqobj);
250 fputs("\nSecond pass:\n", stderr);
251 while ((read = getline(&line, &len, fp)) != -1) {
252 if (*(line+read-1)=='\n') *(line+(--read))='\0';
253 printf("[%zu]+%zu <%s>\n", read, len, line);
255 fputs("\nHashing Done!\n", stderr);
257 dleft_arraydestroy(dleftp);