modified: nfig1.py
[GalaxyCodeBases.git] / c_cpp / readscorr / 2bitseqinline.h
blob325bed4f73584b4d4ddf8ababbf70df4e73853c5
1 #ifndef _G_DBSEQ_INLINE_H
2 #define _G_DBSEQ_INLINE_H
4 #include "gtypendef.h"
5 #include <stdint.h> // int_fast8_t
6 #include <stdlib.h> // malloc
7 //#include "2bitseq.h"
8 #ifdef DEBUG
9 #include <stdio.h>
10 #endif
12 FORCE_INLINE uint64_t *dibmalloc(size_t len){
13 size_t needtomallocQQW = (len+31u)>>5;
14 uint64_t *outseq = malloc(needtomallocQQW*8);
15 return outseq;
18 FORCE_INLINE uint64_t singlebase2dbitplus(const char *const base, unsigned char *const hexBQchr, size_t *const Ncountp) {
19 switch (*base) {
20 case 'a': case 'A':
21 return 0;
22 break;
23 case 't': case 'T':
24 return 3;
25 break;
26 case 'c': case 'C':
27 return 1;
28 break;
29 case 'g': case 'G':
30 return 2;
31 break;
32 default:
33 *hexBQchr = 1u<<7; // 128 for N
34 ++(*Ncountp);
35 // DONOT use `|=` since the memory is just malloced
36 return 0;
37 break;
38 } // Whether a looking-up array[64] can be faster ?
41 // for gFileIO only
42 FORCE_INLINE size_t base2dbit(size_t seqlen,
43 const char *const baseschr, const char *const qstr,
44 uint64_t *diBseq, unsigned char *hexBQ) {
45 // First, do the bases, overwrite output arrays since we have just malloc without setting to zero.
46 size_t Ncount=0;
47 size_t i,j;
48 const size_t seqlenDowntoDW = seqlen & ~((2u<<4)-1);
49 #ifdef DEBUG
50 printf("[b2d]%zu %zu\n",seqlen,seqlenDowntoDW);
51 #endif
52 uint64_t tmpqdbase;
53 for (i=0;i<seqlenDowntoDW;i+=32u) {
54 tmpqdbase=0;
55 for (j=0;j<32;j++) {
56 //tmpqdbase |= singlebase2dbit(baseschr+i+j,hexBQ+i+j)<<(62u-j);
57 tmpqdbase |= singlebase2dbitplus(baseschr+i+j,hexBQ+i+j,&Ncount)<<(j*2);
58 // printf(" A{%lx,%.1s,%lx}",tmpqdbase,baseschr+i+j,singlebase2dbit(baseschr+i+j,hexBQ+i+j)<<(j*2));
60 *diBseq++ = tmpqdbase;
62 // printf("[b]%zu %zu\n",i,j);
63 tmpqdbase = 0;
64 for (j=0;j<seqlen-i;j++) { // seqlen starts from 0.
65 // Cannot use 'j<=seqlen-i-1' here since 'size_t j' cannot be negative.
66 tmpqdbase |= singlebase2dbitplus(baseschr+i+j,hexBQ+i+j,&Ncount)<<(j*2);
67 // printf(" B{%lx,%.1s,%lx}",tmpqdbase,baseschr+i+j,singlebase2dbit(baseschr+i+j,hexBQ+i+j)<<(j*2));
69 *diBseq++ = tmpqdbase;
70 // Everything should done in for-cycle when seqlen%4 == 0.
71 // No need to add '/0' since we have got seqlen, and there may be poly-A.
72 // Then, for the Q values ...
73 if (qstr != NULL) {
74 for (i=0;i<=seqlen;i++) {
75 if ( qstr[i] >= '@' && qstr[i] <= 'h' )
76 tmpqdbase = qstr[i] - '?'; // @=1, A=2, B=3
77 else tmpqdbase = 0;
78 hexBQ[i] |= tmpqdbase; // lower 7bit already Zero-ed.
81 // Eamss-masking will be considered later.
82 return Ncount;
85 FORCE_INLINE uint64_t unitReverseComp(uint64_t seq32mer){
86 seq32mer = ~seq32mer;
87 seq32mer = ((seq32mer & 0x3333333333333333LLU)<< 2) | ((seq32mer & 0xCCCCCCCCCCCCCCCCLLU)>> 2);
88 seq32mer = ((seq32mer & 0x0F0F0F0F0F0F0F0FLLU)<< 4) | ((seq32mer & 0xF0F0F0F0F0F0F0F0LLU)>> 4);
89 #if !defined(__GNUC__) || (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 3)
90 // see also https://github.com/damm/ffi/raw/7d11ff675759ca47243bb901e4226ee3ec6de0c6/ext/ffi_c/AbstractMemory.c
91 seq32mer = ((seq32mer & 0x00FF00FF00FF00FFLLU)<< 8) | ((seq32mer & 0xFF00FF00FF00FF00LLU)>> 8);
92 seq32mer = ((seq32mer & 0x0000FFFF0000FFFFLLU)<<16) | ((seq32mer & 0xFFFF0000FFFF0000LLU)>>16);
93 seq32mer = ((seq32mer & 0x00000000FFFFFFFFLLU)<<32) | ((seq32mer & 0xFFFFFFFF00000000LLU)>>32);
94 #else
95 seq32mer = __builtin_bswap64(seq32mer);
96 #endif
97 return seq32mer;
100 0xABCDEF0123456789 -> cgagtcgcccactagacaaattgtctattggg
101 revcomp -> cccaatagacaatttgtctagtgggcgactcg
103 //FORCE_INLINE uint64_t QQWkmerMovHigher(uint64_t *seq32mer, unsigned char bphigher, uint64_t ){}
105 // *base must have been Normalized to /[ATCGN]*/
106 FORCE_INLINE uint64_t singlebase2dbit(const char *const base, size_t *const Ncountp) {
107 #ifdef NEWTRY /* slower than switch */
108 const unsigned char baseswitcher[]={
109 4,0,4,1,4, // 64-68
110 4,4,2,4,4, // 69-73
111 4,4,4,4,4, // 74-78
112 4,4,4,4,4, // 79-83
113 3 // 84
115 if (*base == 'N') {
116 ++(*Ncountp);
117 return 0;
119 // Now, only /[ATCG]/
120 unsigned char thebase = *base - 64;
121 thebase = baseswitcher[thebase];
122 return thebase;
123 #else
124 switch (*base) {
125 case 'A':
126 return 0;
127 break;
128 case 'T':
129 return 3;
130 break;
131 case 'C':
132 return 1;
133 break;
134 case 'G':
135 return 2;
136 break;
137 default:
138 ++(*Ncountp);
139 // DONOT use `|=` since the memory is just malloced
140 return 0;
141 break;
142 } // Whether a looking-up array[32] can be faster ?
143 #endif
145 FORCE_INLINE size_t ChrSeq2dib(const char *const baseschr, size_t seqlen, uint64_t **diBseqp, size_t *const puint64cnt){
146 size_t needtomallocQQW = (seqlen+31u)>>5; // in fact length in sizeof(uint64_t)
147 if (needtomallocQQW > *puint64cnt) {
148 *diBseqp = realloc(*diBseqp, needtomallocQQW*8);
149 *puint64cnt = needtomallocQQW;
151 uint64_t *tmpdiBseqp = *diBseqp;
152 size_t Ncount = 0;
153 size_t i,j;
154 const size_t seqlenDowntoDW = seqlen & ~((2u<<4)-1);
155 uint64_t tmpqdbase;
156 for (i=0;i<seqlenDowntoDW;i+=32u) {
157 tmpqdbase=0;
158 for (j=0;j<32;j++) {
159 tmpqdbase |= singlebase2dbit(baseschr+i+j,&Ncount)<<(j*2);
161 *tmpdiBseqp++ = tmpqdbase;
163 tmpqdbase = 0;
164 for (j=0;j<seqlen-i;j++) { // seqlen starts from 0.
165 tmpqdbase |= singlebase2dbit(baseschr+i+j,&Ncount)<<(j*2);
167 *tmpdiBseqp++ = tmpqdbase;
168 #ifdef EP
169 while (tmpdiBseqp < (*diBseqp)+*puint64cnt) {
170 *tmpdiBseqp++ = 0LLU;
172 #endif // not nessary since we already have seqlen and the last uint64 is always with tailing 0s.
173 #ifdef DEBUG
174 printf("[s2d]%zu %zu [%lx] [%.*s]\n",seqlen,*puint64cnt,**diBseqp,(int)seqlen,baseschr); // just show the 1st one.
175 #endif
176 return Ncount;
179 #endif // 2bitseqinline.h
182 NEWTRY(use array instead of switch) is slower ...
183 and User: 183.165154(s) if const uint64_t baseswitcher[]
185 ==> tnne.log <==
186 User: 143.039254(s), System: 0.682896(s). Real: 144.603003(s).
187 Sleep: 0.880853(s). Block I/O times: 0/0. MaxRSS: 0 kiB.
188 Wait(s): 117(nvcsw) + 14195(nivcsw). Page Fault(s): 41291(minflt) + 0(majflt).
190 ==> ttmp.log <==
191 User: 132.749818(s), System: 1.740735(s). Real: 136.756637(s).
192 Sleep: 2.266084(s). Block I/O times: 0/0. MaxRSS: 0 kiB.
193 Wait(s): 71(nvcsw) + 42227(nivcsw). Page Fault(s): 41292(minflt) + 0(majflt).