1 // vim:set autoindent shiftwidth=4 tabstop=4 noexpandtab:
2 #include "lookupBuilder.h"
9 const unsigned IN_BUF_SIZE
= 100u Mibi
;
10 const unsigned step
= 1 Mibi
;
14 ULL text_pos
; // Text position, 0-based
15 ULL window
; // Last length-LEN characters
17 int fin_src
, fout_int
;
23 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
28 //if ((text_pos & ((1ull << 27) - 1)) == 0)
29 //printf("Time %4llds: text_pos=%lluMi\n", time(0) - start_time, text_pos / 1048576);
30 window
= (window
<< 2) | v
;
31 if (text_pos
>= -((ULL
)LEN
)) return; // First LEN-1 characters
32 unsigned top
= (window
& ALL_MASK
);
41 TRY(lseek(fin_src
, 0, SEEK_SET
));
44 unsigned nbuf
= read(fin_src
, ibuf
, sizeof(*ibuf
) * IN_BUF_SIZE
);
45 if (nbuf
< IN_BUF_SIZE
) nbuf
-= 2;
46 for (i
= 0; i
< nbuf
; ++i
) {
47 unsigned short c
= ibuf
[i
];
49 for (j
= 6; j
>= 0; j
-= 2) process((c
>> j
) & 3);
51 if (nbuf
< IN_BUF_SIZE
) {
52 int rem
= ibuf
[nbuf
+ 1];
53 unsigned short c
= ibuf
[nbuf
];
55 for (j
= 6; j
>= 8 - (rem
* 2); j
-= 2) process((c
>> j
) & 3);
63 int buildLookupTable(char * PackedDNAFileName
, char * LookupTableFileName
, int lookupTableSize
) {
65 ibuf
= malloc(sizeof(unsigned char)*IN_BUF_SIZE
);
66 LEN
= lookupTableSize
;
67 TOP
= lookupTableSize
;
69 NR_TOP
= 1 << TOP
* 2;
70 ALL_MASK
= NR_TOP
- 1;
71 otop
= malloc(sizeof(unsigned)*NR_TOP
);
75 TRY(fin_src
= open(PackedDNAFileName
, O_RDONLY
));
76 TRY(fout_int
= open(LookupTableFileName
, O_CREAT
| O_WRONLY
, 0664));
78 for (i
= 1; i
< LEN
; ++i
) process(0);
79 for (i
= 1; i
< NR_TOP
; ++i
) otop
[i
] += otop
[i
-1];
80 for (i
= 0; i
< NR_TOP
; i
+= step
) {
81 TRYEQ(write(fout_int
, otop
+ i
, step
* sizeof(*otop
)), step
* sizeof(*otop
));
95 if ((text_pos & ((1ull << 27) - 1)) == 0)
96 printf("Time %4llds: text_pos=%lluMi\n", time(0) - start_time, text_pos / 1048576);
97 window = (window << 2) | v;
98 if (text_pos >= -((ULL)LEN)) return; // First LEN-1 characters
99 //ULL curr = rotation ? ((window & ALL_MASK) >> BOTTOM * 2) | (window << (TOP * 2)) : window;
100 unsigned top = (window & ALL_MASK);
106 text_pos = -(ULL)LEN;
109 TRY(fseek(fin_src, 0, SEEK_SET));
112 unsigned nbuf = fread(ibuf,sizeof(unsigned char) * IN_BUF_SIZE,1,fin_src);
113 if (nbuf < IN_BUF_SIZE) nbuf -= 2;
114 for (i = 0; i < nbuf; ++i) {
115 unsigned short c = ibuf[i];
117 for (j = 6; j >= 0; j -= 2) process((c >> j) & 3);
119 if (nbuf < IN_BUF_SIZE) {
120 int rem = ibuf[nbuf + 1];
121 unsigned short c = ibuf[nbuf];
123 for (j = 6; j >= 8 - (rem * 2); j -= 2) process((c >> j) & 3);
131 int buildLookupTable(char * PackedDNAFileName, char * LookupTableFileName, int lookupTableSize) {
133 ibuf = malloc(sizeof(unsigned char)*IN_BUF_SIZE);
134 LEN = lookupTableSize;
135 TOP = lookupTableSize;
136 start_time = time(0);
137 NR_TOP = 1 << TOP * 2;
138 ALL_MASK = NR_TOP - 1;
139 otop = malloc(sizeof(unsigned)*NR_TOP);
141 TRY(fin_src = fopen(PackedDNAFileName, "rb"));
142 TRY(fout_int = fopen(LookupTableFileName, "wb"));
145 for (i = 1; i < LEN; ++i) process(0);
146 for (i = 1; i < NR_TOP; ++i) otop[i] += otop[i-1];
147 const unsigned step = 1 Mibi;
148 for (i = 0; i < NR_TOP; i += step) {
149 TRYEQ(fwrite(otop + i,step * sizeof(*otop),1,fout_int), step * sizeof(*otop));
151 TRY(fclose(fin_src));
152 TRY(fclose(fout_int));