4 * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
6 * This file is part of SOAPdenovo.
8 * SOAPdenovo is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
13 * SOAPdenovo is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with SOAPdenovo. If not, see <http://www.gnu.org/licenses/>.
29 static void initenv ( int argc
, char ** argv
);
30 static char shortrdsfile
[256];
31 static char graphfile
[256];
33 static void display_map_usage ();
35 /*************************************************
39 Parses file *.preGraphBasic, and get vertex number,
40 K size, MaxReadLen, MinReadLen, MaxNameLen.
42 1. gfile the graph file prefix
47 *************************************************/
48 static int getMinOverlap ( char * gfile
)
52 int num_kmer
, overlaplen
= 23;
54 sprintf ( name
, "%s.preGraphBasic", gfile
);
55 fp
= fopen ( name
, "r" );
62 while ( fgets ( line
, sizeof ( line
), fp
) != NULL
)
66 sscanf ( line
+ 6, "%d %c %d", &num_kmer
, &ch
, &overlaplen
);
68 else if ( line
[0] == 'M' )
70 sscanf ( line
, "MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxReadLen
, &minReadLen
, &maxNameLen
);
78 /*************************************************
82 This is the main function for map step. It includes the following steps:
83 1.Chops contig (>=k+2) into kmer sets, marks the kmer occured twice or more as deleted,
84 records the unique kmers related contig's id and the position of the kmer on the contig.
85 2.Maps reads to edges.
87 @see display_map_usage()
94 *************************************************/
96 int call_align ( int argc
, char ** argv
)
98 time_t start_t
, stop_t
, time_bef
, time_aft
;
100 fprintf ( stderr
, "\n********************\n" );
101 fprintf ( stderr
, "Map\n" );
102 fprintf ( stderr
, "********************\n\n" );
103 initenv ( argc
, argv
);
104 overlaplen
= getMinOverlap ( graphfile
);
107 if ( smallKmer
> 12 && smallKmer
< 128 && smallKmer
% 2 == 1 )
109 deltaKmer
= overlaplen
- smallKmer
;
110 overlaplen
= smallKmer
;
115 if ( smallKmer
> 12 && smallKmer
< 64 && smallKmer
% 2 == 1 )
117 deltaKmer
= overlaplen
- smallKmer
;
118 overlaplen
= smallKmer
;
122 fprintf ( stderr
, "Kmer size: %d.\n", overlaplen
);
124 ctg_short
= overlaplen
+ 2;
125 fprintf ( stderr
, "Contig length cutoff: %d.\n", ctg_short
);
126 prlContig2nodes ( graphfile
, ctg_short
);
128 fprintf ( stderr
, "Time spent on graph construction: %ds.\n\n", ( int ) ( time_aft
- time_bef
) );
129 //map long read (asm_flags=4) to edge one by one
131 prlLongRead2Ctg ( shortrdsfile
, graphfile
);
133 fprintf ( stderr
, "Time spent on aligning long reads: %ds.\n\n", ( int ) ( time_aft
- time_bef
) );
134 //map read to edge one by one
136 prlRead2Ctg ( shortrdsfile
, graphfile
);
138 fprintf ( stderr
, "Time spent on aligning reads: %ds.\n\n", ( int ) ( time_aft
- time_bef
) );
139 free_Sets ( KmerSets
, thrd_num
);
141 fprintf ( stderr
, "Overall time spent on alignment: %dm.\n\n", ( int ) ( stop_t
- start_t
) / 60 );
148 /*****************************************************************************
149 * Parse command line switches
150 *****************************************************************************/
152 void initenv ( int argc
, char ** argv
)
156 extern char * optarg
;
160 fprintf ( stderr
, "Parameters: map " );
162 while ( ( copt
= getopt ( argc
, argv
, "s:g:K:p:k:f" ) ) != EOF
)
164 //printf("get option\n");
168 fprintf ( stderr
, "-s %s ", optarg
);
170 sscanf ( optarg
, "%s", shortrdsfile
);
173 fprintf ( stderr
, "-g %s ", optarg
);
175 sscanf ( optarg
, "%s", graphfile
);
178 fprintf ( stderr
, "-K %s ", optarg
);
179 sscanf ( optarg
, "%s", temp
);
180 overlaplen
= atoi ( temp
);
183 fprintf ( stderr
, "-p %s ", optarg
);
184 sscanf ( optarg
, "%s", temp
);
185 thrd_num
= atoi ( temp
);
188 fprintf ( stderr
, "-k %s ", optarg
);
189 sscanf ( optarg
, "%s", temp
);
190 smallKmer
= atoi ( temp
);
194 fprintf ( stderr
, "-f " );
198 if ( inpseq
== 0 || outseq
== 0 )
200 display_map_usage ();
206 fprintf ( stderr
, "\n\n" );
208 if ( inpseq
== 0 || outseq
== 0 )
210 display_map_usage ();
215 static void display_map_usage ()
217 fprintf ( stderr
, "\nmap -s configFile -g inputGraph [-f] [-p n_cpu -k kmer_R2C]\n" );
218 fprintf ( stderr
, " -s <string> configFile: the config file of solexa reads\n" );
219 fprintf ( stderr
, " -g <string> inputGraph: prefix of input graph file names\n" );
220 fprintf ( stderr
, " -f (optional) output gap related reads in map step for using SRkgf to fill gap, [NO]\n" );
221 fprintf ( stderr
, " -p <int> n_cpu: number of cpu for use, [8]\n" );
223 fprintf ( stderr
, " -k <int> kmer_R2C(min 13, max 127): kmer size used for mapping read to contig, [K]\n" );
225 fprintf ( stderr
, " -k <int> kmer_R2C(min 13, max 63): kmer size used for mapping read to contig, [K]\n" );