modified: nfig1.py
[GalaxyCodeBases.git] / c_cpp / coverages / constator.c
blob4f0f5277f1830fba2c9b5da59515e51b73979da3
1 #define _GNU_SOURCE
2 #include <stdlib.h> //EXIT_FAILURE
3 #include <stdio.h>
4 #include <stdint.h>
5 #include <string.h>
6 #include <err.h>
7 #include <argp.h>
8 #include <math.h>
9 #include <bam/sam.h>
10 #include "uthash/utarray.h"
11 #include "getch.h"
12 #include "timer.h"
13 #include "chrtable.h"
15 //#define MAXREADLEN (8ul*1024*1024)
16 //uint64_t ReadsLenArr[MAXREADLEN];
18 const char *argp_program_version =
19 "Continuous Stator 0.1 (SAM/BAM) @"__TIME__ "," __DATE__;
20 const char *argp_program_bug_address =
21 "<galaxy001@gmail.com>";
23 /* Program documentation. */
24 static char doc[] =
25 "Continuous Stator"
26 #ifdef DEBUG
27 " (Debug Version)"
28 #endif
29 #ifdef TEST
30 " (Test Version)"
31 #endif
34 int UT_array_intsort(const void *a,const void*b) {
35 int _a = *(int*)a;
36 int _b = *(int*)b;
37 return _a - _b;
39 //struct ChrData_hash_struct *ChrData = NULL; /* important! initialize to NULL */
41 /* A description of the arguments we accept. */
42 static char args_doc[] = "input_files (SAM or BAM)";
44 /* The options we understand. */
45 static struct argp_option options[] = {
46 {"interactive", 'i', 0, 0, "pause before procedure"},
47 {"sam", 's', 0, 0, "input is SAM" },
48 {"overlap_length", 'l', "25", 0, "min overlap to connect reads" },
49 {"min_depths", 'd', "1,2,3,5",0, "list of min acceptable depth" },
50 {"out_prefix", 'o', "./ctg", 0, "prefix of output files" },
51 { 0 }
54 /* Used by main to communicate with parse_opt. */
55 struct arguments {
56 uint_fast8_t isSAM, interactive;
57 uint16_t overlap;
58 char *deplstStr;
59 UT_array *deplst;
60 char **args;
61 char *outfile;
64 /* Parse a single option. */
65 static error_t
66 parse_opt (int key, char *arg, struct argp_state *state) {
67 /* Get the input argument from argp_parse, which we
68 know is a pointer to our arguments structure. */
69 struct arguments *arguments = state->input;
70 //utarray_new(arguments->deplst,&ut_int_icd);
71 int tmpArgValue;
72 switch (key) {
73 case 'l':
74 tmpArgValue = atoi(arg);
75 if (tmpArgValue>=1 && tmpArgValue < UINT16_MAX) { // k=l+1, so not == UINT16_MAX
76 arguments->overlap = tmpArgValue;
77 } else {
78 errx(2,"-%c \"%s\"=%i is not a integer of [1,%u] !",key,arg,tmpArgValue,UINT16_MAX-1);
80 break;
81 case 'd':
83 char *tmpStr, *tmpStrN;
84 if ( (tmpStrN = tmpStr = malloc(strlen(arg)+2)) == NULL ) // +1 for '\0' and +1 for extra ','
85 err(3,NULL);
86 //strcpy(tmpStr,arg);
87 //printf("[%s] %s\n",tmpStr,arg);
88 utarray_init(arguments->deplst,&ut_int_icd);
89 for ( strcpy(tmpStrN,arg);
91 tmpStrN = NULL) {
92 char *token = strtok(tmpStrN, ",; .");
93 if (token == NULL) break;
94 tmpArgValue = atoi(token);
95 if (tmpArgValue>=1 && tmpArgValue <= UINT8_MAX) {
96 utarray_push_back(arguments->deplst,&tmpArgValue);
97 //printf("[%s] %s %d\n",tmpStr,token,tmpArgValue);
98 } else {
99 errx(2,"In -%c \"%s\":[%i] is not a integer of [1,%u] !",key,arg,tmpArgValue,UINT8_MAX);
102 utarray_sort(arguments->deplst,&UT_array_intsort);
103 //free(tmpStr);
104 *tmpStr = '\0';
105 if ( (tmpStrN = malloc(strlen(arg)+2)) == NULL ) err(3,NULL);
106 for ( int *p=(int*)utarray_front(arguments->deplst);
107 p!=NULL;
108 p=(int*)utarray_next(arguments->deplst,p) ) {
109 sprintf(tmpStrN,"%i,",*p);
110 strcat(tmpStr,tmpStrN);
112 free(tmpStrN);
113 arguments->deplstStr = tmpStr;
114 tmpStr += strlen(tmpStr)-1;
115 *tmpStr = '\0';
116 //printf("[%s] %s\n",arguments->deplstStr,arg);
117 break;
118 case 'o':
119 arguments->outfile = arg;
120 break;
121 case 's':
122 arguments->isSAM = 1;
123 break;
124 case 'i':
125 arguments->interactive = 1;
126 break;
128 case ARGP_KEY_ARG:
129 arguments->args[state->arg_num] = arg;
130 break;
132 case ARGP_KEY_END:
133 if (state->arg_num < 1) /* Not enough arguments. */
134 //argp_usage (state);
135 argp_state_help (state,stderr,ARGP_HELP_STD_HELP);
136 break;
138 default:
139 return ARGP_ERR_UNKNOWN;
141 return 0;
144 /* Our argp parser. */
145 static struct argp argp = { options, parse_opt, args_doc, doc };
147 int ChrDat_init(bam_header_t *samhead) {
148 char **lpChrID = samhead->target_name;
149 uint32_t *lpChrLen = samhead->target_len;
150 if (Data.n_targets == 0) {
151 Data.n_targets = samhead->n_targets;
152 Data.ChrDat = malloc( Data.n_targets * sizeof(size_t));
153 Data.target_len = malloc( Data.n_targets * sizeof(size_t));
154 Data.target_name = malloc( Data.n_targets * sizeof(size_t));
155 puts("\nChr NFO:");
156 for (int32_t i=0; i < Data.n_targets; ++i) {
157 //Data.ChrDat[i] = malloc(*(lpChrLen+i) * sizeof(uint8_t));
158 Data.target_len[i] = *(lpChrLen+i);
159 Data.ChrDat[i] = calloc(Data.target_len[i], sizeof(uint8_t));
160 Data.target_name[i] = malloc(strlen(*(lpChrID+i))+1);
161 strcpy(Data.target_name[i],*(lpChrID+i));
162 //printf("%i: %s\t%u\n",i,*(lpChrID+i),*(lpChrLen+i));
163 printf("%i: %s\t%u\n",i,Data.target_name[i],Data.target_len[i]);
165 } else {
166 if (Data.n_targets == samhead->n_targets) {
167 printf("Same reference count: %i.\n",samhead->n_targets);
168 // more check ?
169 } else {
170 errx(1,"Different reference.");
173 return 0;
176 int main (int argc, char **argv) {
177 struct arguments arguments;
178 arguments.args=calloc(sizeof(size_t),argc);
180 // Default values.
181 arguments.isSAM = 0;
182 arguments.interactive = 0;
183 arguments.deplstStr = "1,2,3,5";
184 int tmpArgValue;
185 utarray_new(arguments.deplst,&ut_int_icd);
186 tmpArgValue = 1; utarray_push_back(arguments.deplst,&tmpArgValue);
187 tmpArgValue = 2; utarray_push_back(arguments.deplst,&tmpArgValue);
188 tmpArgValue = 3; utarray_push_back(arguments.deplst,&tmpArgValue);
189 tmpArgValue = 5; utarray_push_back(arguments.deplst,&tmpArgValue);
190 arguments.overlap = 25;
191 arguments.outfile = "./ctg";
193 // Parse our arguments; every option seen by parse_opt will be reflected in arguments.
194 argp_parse (&argp, argc, argv, 0, 0, &arguments);
196 printf("Options:\noverlap_length = %u,\nmin_depths = [%s],\nout_prefix = [%s]\nInput file(s) type = %s .\n",
197 arguments.overlap,arguments.deplstStr,arguments.outfile,arguments.isSAM?"SAM":"BAM");
198 if (arguments.interactive) {
199 pressAnyKey();
202 char *const*line = arguments.args-1; // so that we can use *(++line)
204 G_TIMER_START;
206 samfile_t *samfp;
207 fputs("\nParsing SAM/BAM Files:\n", stderr);
208 while ( *(++line) ) {
209 fprintf(stderr, " <%s> ... ", *line);
210 if (arguments.isSAM) {
211 samfp = samopen(*line, "r", 0);
212 } else {
213 samfp = samopen(*line, "rb", 0);
215 if (samfp) {
216 bam_header_t *samhead = samfp->header;
217 if ( samhead == NULL ) errx(3,"File Header Error.");
218 ChrDat_init(samhead);
220 bam1_t *balignd = bam_init1();
221 while (samread(samfp, balignd) >= 0) do_stat(balignd, arguments.overlap, &Data);
222 bam_destroy1(balignd);
224 fputs("done !\n", stderr);
225 samclose(samfp);
226 continue;
228 warn("failed to open file");
229 //fputs("failed !\n", stderr);
231 free(arguments.args);
232 fputs("\nReading Done!\n\nBegin Stat:\n", stderr);
234 for ( int *p=(int*)utarray_front(arguments.deplst);
235 p!=NULL;
236 p=(int*)utarray_next(arguments.deplst,p) ) {
237 fprintf(stderr,"minDepth = %i: ",*p);
238 do_contig(*p, &Data, arguments.outfile);
239 fputs("done !\n", stderr);
242 FILE *fp = fopen(arguments.outfile, "w");
243 fprintf(fp,"#Main\n");
244 fclose(fp);
246 G_TIMER_END;
247 G_TIMER_PRINT;
249 exit(EXIT_SUCCESS);