2 #include <stdlib.h> //EXIT_FAILURE
10 #include "uthash/utarray.h"
15 //#define MAXREADLEN (8ul*1024*1024)
16 //uint64_t ReadsLenArr[MAXREADLEN];
18 const char *argp_program_version
=
19 "Continuous Stator 0.1 (SAM/BAM) @"__TIME__
"," __DATE__
;
20 const char *argp_program_bug_address
=
21 "<galaxy001@gmail.com>";
23 /* Program documentation. */
34 int UT_array_intsort(const void *a
,const void*b
) {
39 //struct ChrData_hash_struct *ChrData = NULL; /* important! initialize to NULL */
41 /* A description of the arguments we accept. */
42 static char args_doc
[] = "input_files (SAM or BAM)";
44 /* The options we understand. */
45 static struct argp_option options
[] = {
46 {"interactive", 'i', 0, 0, "pause before procedure"},
47 {"sam", 's', 0, 0, "input is SAM" },
48 {"overlap_length", 'l', "25", 0, "min overlap to connect reads" },
49 {"min_depths", 'd', "1,2,3,5",0, "list of min acceptable depth" },
50 {"out_prefix", 'o', "./ctg", 0, "prefix of output files" },
54 /* Used by main to communicate with parse_opt. */
56 uint_fast8_t isSAM
, interactive
;
64 /* Parse a single option. */
66 parse_opt (int key
, char *arg
, struct argp_state
*state
) {
67 /* Get the input argument from argp_parse, which we
68 know is a pointer to our arguments structure. */
69 struct arguments
*arguments
= state
->input
;
70 //utarray_new(arguments->deplst,&ut_int_icd);
74 tmpArgValue
= atoi(arg
);
75 if (tmpArgValue
>=1 && tmpArgValue
< UINT16_MAX
) { // k=l+1, so not == UINT16_MAX
76 arguments
->overlap
= tmpArgValue
;
78 errx(2,"-%c \"%s\"=%i is not a integer of [1,%u] !",key
,arg
,tmpArgValue
,UINT16_MAX
-1);
83 char *tmpStr
, *tmpStrN
;
84 if ( (tmpStrN
= tmpStr
= malloc(strlen(arg
)+2)) == NULL
) // +1 for '\0' and +1 for extra ','
87 //printf("[%s] %s\n",tmpStr,arg);
88 utarray_init(arguments
->deplst
,&ut_int_icd
);
89 for ( strcpy(tmpStrN
,arg
);
92 char *token
= strtok(tmpStrN
, ",; .");
93 if (token
== NULL
) break;
94 tmpArgValue
= atoi(token
);
95 if (tmpArgValue
>=1 && tmpArgValue
<= UINT8_MAX
) {
96 utarray_push_back(arguments
->deplst
,&tmpArgValue
);
97 //printf("[%s] %s %d\n",tmpStr,token,tmpArgValue);
99 errx(2,"In -%c \"%s\":[%i] is not a integer of [1,%u] !",key
,arg
,tmpArgValue
,UINT8_MAX
);
102 utarray_sort(arguments
->deplst
,&UT_array_intsort
);
105 if ( (tmpStrN
= malloc(strlen(arg
)+2)) == NULL
) err(3,NULL
);
106 for ( int *p
=(int*)utarray_front(arguments
->deplst
);
108 p
=(int*)utarray_next(arguments
->deplst
,p
) ) {
109 sprintf(tmpStrN
,"%i,",*p
);
110 strcat(tmpStr
,tmpStrN
);
113 arguments
->deplstStr
= tmpStr
;
114 tmpStr
+= strlen(tmpStr
)-1;
116 //printf("[%s] %s\n",arguments->deplstStr,arg);
119 arguments
->outfile
= arg
;
122 arguments
->isSAM
= 1;
125 arguments
->interactive
= 1;
129 arguments
->args
[state
->arg_num
] = arg
;
133 if (state
->arg_num
< 1) /* Not enough arguments. */
134 //argp_usage (state);
135 argp_state_help (state
,stderr
,ARGP_HELP_STD_HELP
);
139 return ARGP_ERR_UNKNOWN
;
144 /* Our argp parser. */
145 static struct argp argp
= { options
, parse_opt
, args_doc
, doc
};
147 int ChrDat_init(bam_header_t
*samhead
) {
148 char **lpChrID
= samhead
->target_name
;
149 uint32_t *lpChrLen
= samhead
->target_len
;
150 if (Data
.n_targets
== 0) {
151 Data
.n_targets
= samhead
->n_targets
;
152 Data
.ChrDat
= malloc( Data
.n_targets
* sizeof(size_t));
153 Data
.target_len
= malloc( Data
.n_targets
* sizeof(size_t));
154 Data
.target_name
= malloc( Data
.n_targets
* sizeof(size_t));
156 for (int32_t i
=0; i
< Data
.n_targets
; ++i
) {
157 //Data.ChrDat[i] = malloc(*(lpChrLen+i) * sizeof(uint8_t));
158 Data
.target_len
[i
] = *(lpChrLen
+i
);
159 Data
.ChrDat
[i
] = calloc(Data
.target_len
[i
], sizeof(uint8_t));
160 Data
.target_name
[i
] = malloc(strlen(*(lpChrID
+i
))+1);
161 strcpy(Data
.target_name
[i
],*(lpChrID
+i
));
162 //printf("%i: %s\t%u\n",i,*(lpChrID+i),*(lpChrLen+i));
163 printf("%i: %s\t%u\n",i
,Data
.target_name
[i
],Data
.target_len
[i
]);
166 if (Data
.n_targets
== samhead
->n_targets
) {
167 printf("Same reference count: %i.\n",samhead
->n_targets
);
170 errx(1,"Different reference.");
176 int main (int argc
, char **argv
) {
177 struct arguments arguments
;
178 arguments
.args
=calloc(sizeof(size_t),argc
);
182 arguments
.interactive
= 0;
183 arguments
.deplstStr
= "1,2,3,5";
185 utarray_new(arguments
.deplst
,&ut_int_icd
);
186 tmpArgValue
= 1; utarray_push_back(arguments
.deplst
,&tmpArgValue
);
187 tmpArgValue
= 2; utarray_push_back(arguments
.deplst
,&tmpArgValue
);
188 tmpArgValue
= 3; utarray_push_back(arguments
.deplst
,&tmpArgValue
);
189 tmpArgValue
= 5; utarray_push_back(arguments
.deplst
,&tmpArgValue
);
190 arguments
.overlap
= 25;
191 arguments
.outfile
= "./ctg";
193 // Parse our arguments; every option seen by parse_opt will be reflected in arguments.
194 argp_parse (&argp
, argc
, argv
, 0, 0, &arguments
);
196 printf("Options:\noverlap_length = %u,\nmin_depths = [%s],\nout_prefix = [%s]\nInput file(s) type = %s .\n",
197 arguments
.overlap
,arguments
.deplstStr
,arguments
.outfile
,arguments
.isSAM
?"SAM":"BAM");
198 if (arguments
.interactive
) {
202 char *const*line
= arguments
.args
-1; // so that we can use *(++line)
207 fputs("\nParsing SAM/BAM Files:\n", stderr
);
208 while ( *(++line
) ) {
209 fprintf(stderr
, " <%s> ... ", *line
);
210 if (arguments
.isSAM
) {
211 samfp
= samopen(*line
, "r", 0);
213 samfp
= samopen(*line
, "rb", 0);
216 bam_header_t
*samhead
= samfp
->header
;
217 if ( samhead
== NULL
) errx(3,"File Header Error.");
218 ChrDat_init(samhead
);
220 bam1_t
*balignd
= bam_init1();
221 while (samread(samfp
, balignd
) >= 0) do_stat(balignd
, arguments
.overlap
, &Data
);
222 bam_destroy1(balignd
);
224 fputs("done !\n", stderr
);
228 warn("failed to open file");
229 //fputs("failed !\n", stderr);
231 free(arguments
.args
);
232 fputs("\nReading Done!\n\nBegin Stat:\n", stderr
);
234 for ( int *p
=(int*)utarray_front(arguments
.deplst
);
236 p
=(int*)utarray_next(arguments
.deplst
,p
) ) {
237 fprintf(stderr
,"minDepth = %i: ",*p
);
238 do_contig(*p
, &Data
, arguments
.outfile
);
239 fputs("done !\n", stderr
);
242 FILE *fp
= fopen(arguments
.outfile
, "w");
243 fprintf(fp
,"#Main\n");