modified: src1/worker.c
[GalaxyCodeBases.git] / c_cpp / readscorr / 2bitseq.c
blob9a47e2f7a83c7e3dc7c5136851ee6169005d0e92
1 #include "2bitseq.h"
2 #include <string.h> // strcpy
4 char *unit2basechr(uint64_t seq32mer){
5 char *basechar=malloc(33);
6 char *tmpbc;
7 tmpbc=basechar;
8 int i;
9 for(i=0;i<64;i+=2) {
10 *tmpbc++ = DBIT2BASE((seq32mer&(3LLU<<i))>>i);
11 //*tmpbc++ = '0' + ((seq32mer&(3u<<i))>>i);
12 //printf("=%lx,%lx,%lx,%lx=\n",seq32mer,3LLU<<i,seq32mer&(3LLU<<i),(seq32mer&(3LLU<<i))>>i);
14 *tmpbc++=0;
15 return basechar;
18 char *dib2basechr(uint64_t *diBseq, size_t len){
19 char *basechar=malloc(len+1);
20 char *tmpstr;
21 size_t i;
22 for (i=0;i<len/32;i++) {
23 tmpstr=unit2basechr(diBseq[i]);
24 strcpy(basechar+i*32,tmpstr);
25 free(tmpstr);
27 tmpstr=unit2basechr(diBseq[i]);
28 strncpy(basechar+i*32,tmpstr,len % 32);
29 free(tmpstr);
30 *(basechar+len)='\0';
31 return basechar;
35 |000000000>||111111111>||2222222>...|, lastBits=7, nullBits=3 (%10)
36 |...0000000||00>1111111||11>2222222>|. Well, left is right here ...
37 |11>2222222>||00>1111111||...0000000|
38 |<2222222<11||1111111<00||0000000...|
40 uint64_t *dibrevcomp(uint64_t const *const inseq, size_t len){
41 size_t needtomallocQQW = (len+31u)>>5;
42 uint64_t *outseq = malloc(needtomallocQQW*8);
43 char lastBits = 2*(len % 64); // well "% 64" will also be "andl $63,%esi"
44 char nullBits = 64-lastBits; //(-lastBits) % 64; only use if lastBits.
45 uint64_t tmpstr=0;
46 uint64_t highmask = ~((1LLU<<lastBits)-1U);
47 //printf("QQW:%zu,L:%d,N:%d,M:%lx\n",needtomallocQQW,lastBits,nullBits,highmask);
48 for (size_t i=0;i<needtomallocQQW;i++) { // from 0
49 *(outseq+needtomallocQQW-1-i)=unitReverseComp((*(inseq+i)<<nullBits) | tmpstr);
50 //*(outseq+needtomallocQQW-1-i)=(*(inseq+i)<<nullBits) | tmpstr;
51 if (lastBits) { // this if will be optimized outsides of for, so I can have a rest here.
52 tmpstr = (*(inseq+i) & highmask)>>lastBits;
55 return outseq;