modified: makefile
[GalaxyCodeBases.git] / BGI / soap_src / soap_builder / lookupBuilder.c
blob33b8b792c6b29495c6ec65fed5d5d5f239190c50
1 // vim:set autoindent shiftwidth=4 tabstop=4 noexpandtab:
2 #include "lookupBuilder.h"
4 int LEN;
5 int TOP;
6 ULL NR_TOP;
7 ULL ALL_MASK;
9 const unsigned IN_BUF_SIZE = 100u Mibi;
10 const unsigned step = 1 Mibi;
12 unsigned char * ibuf;
14 ULL text_pos; // Text position, 0-based
15 ULL window; // Last length-LEN characters
17 int fin_src, fout_int;
18 long long start_time;
20 unsigned * otop;
23 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
26 void process(int v) {
27 ++text_pos;
28 //if ((text_pos & ((1ull << 27) - 1)) == 0)
29 //printf("Time %4llds: text_pos=%lluMi\n", time(0) - start_time, text_pos / 1048576);
30 window = (window << 2) | v;
31 if (text_pos >= -((ULL)LEN)) return; // First LEN-1 characters
32 unsigned top = (window & ALL_MASK);
33 ++otop[top];
36 void gen() {
37 // init
38 text_pos = -(ULL)LEN;
39 window = 0;
40 // start
41 TRY(lseek(fin_src, 0, SEEK_SET));
42 while (1) {
43 unsigned int i;
44 unsigned nbuf = read(fin_src, ibuf, sizeof(*ibuf) * IN_BUF_SIZE);
45 if (nbuf < IN_BUF_SIZE) nbuf -= 2;
46 for (i = 0; i < nbuf; ++i) {
47 unsigned short c = ibuf[i];
48 int j;
49 for (j = 6; j >= 0; j -= 2) process((c >> j) & 3);
51 if (nbuf < IN_BUF_SIZE) {
52 int rem = ibuf[nbuf + 1];
53 unsigned short c = ibuf[nbuf];
54 int j;
55 for (j = 6; j >= 8 - (rem * 2); j -= 2) process((c >> j) & 3);
56 break;
63 int buildLookupTable(char * PackedDNAFileName, char * LookupTableFileName, int lookupTableSize) {
65 ibuf = malloc(sizeof(unsigned char)*IN_BUF_SIZE);
66 LEN = lookupTableSize;
67 TOP = lookupTableSize;
68 start_time = time(0);
69 NR_TOP = 1 << TOP * 2;
70 ALL_MASK = NR_TOP - 1;
71 otop = malloc(sizeof(unsigned)*NR_TOP);
72 unsigned int i;
75 TRY(fin_src = open(PackedDNAFileName, O_RDONLY ));
76 TRY(fout_int = open(LookupTableFileName, O_CREAT | O_WRONLY , 0664));
77 gen();
78 for (i = 1; i < LEN; ++i) process(0);
79 for (i = 1; i < NR_TOP; ++i) otop[i] += otop[i-1];
80 for (i = 0; i < NR_TOP; i += step) {
81 TRYEQ(write(fout_int, otop + i, step * sizeof(*otop)), step * sizeof(*otop));
83 TRY(close(fin_src));
84 TRY(close(fout_int));
87 free(ibuf);
88 free(otop);
93 void process(int v) {
94 ++text_pos;
95 if ((text_pos & ((1ull << 27) - 1)) == 0)
96 printf("Time %4llds: text_pos=%lluMi\n", time(0) - start_time, text_pos / 1048576);
97 window = (window << 2) | v;
98 if (text_pos >= -((ULL)LEN)) return; // First LEN-1 characters
99 //ULL curr = rotation ? ((window & ALL_MASK) >> BOTTOM * 2) | (window << (TOP * 2)) : window;
100 unsigned top = (window & ALL_MASK);
101 ++otop[top];
104 void gen() {
105 // init
106 text_pos = -(ULL)LEN;
107 window = 0;
108 // start
109 TRY(fseek(fin_src, 0, SEEK_SET));
110 while (1) {
111 unsigned int i;
112 unsigned nbuf = fread(ibuf,sizeof(unsigned char) * IN_BUF_SIZE,1,fin_src);
113 if (nbuf < IN_BUF_SIZE) nbuf -= 2;
114 for (i = 0; i < nbuf; ++i) {
115 unsigned short c = ibuf[i];
116 int j;
117 for (j = 6; j >= 0; j -= 2) process((c >> j) & 3);
119 if (nbuf < IN_BUF_SIZE) {
120 int rem = ibuf[nbuf + 1];
121 unsigned short c = ibuf[nbuf];
122 int j;
123 for (j = 6; j >= 8 - (rem * 2); j -= 2) process((c >> j) & 3);
124 break;
131 int buildLookupTable(char * PackedDNAFileName, char * LookupTableFileName, int lookupTableSize) {
133 ibuf = malloc(sizeof(unsigned char)*IN_BUF_SIZE);
134 LEN = lookupTableSize;
135 TOP = lookupTableSize;
136 start_time = time(0);
137 NR_TOP = 1 << TOP * 2;
138 ALL_MASK = NR_TOP - 1;
139 otop = malloc(sizeof(unsigned)*NR_TOP);
140 unsigned int i;
141 TRY(fin_src = fopen(PackedDNAFileName, "rb"));
142 TRY(fout_int = fopen(LookupTableFileName, "wb"));
143 printf("MK\n");
144 gen();
145 for (i = 1; i < LEN; ++i) process(0);
146 for (i = 1; i < NR_TOP; ++i) otop[i] += otop[i-1];
147 const unsigned step = 1 Mibi;
148 for (i = 0; i < NR_TOP; i += step) {
149 TRYEQ(fwrite(otop + i,step * sizeof(*otop),1,fout_int), step * sizeof(*otop));
151 TRY(fclose(fin_src));
152 TRY(fclose(fout_int));
153 free(ibuf);
154 free(otop);