1 #ifndef _G_DBSEQ_INLINE_H
2 #define _G_DBSEQ_INLINE_H
5 #include <stdint.h> // int_fast8_t
6 #include <stdlib.h> // malloc
12 FORCE_INLINE
uint64_t *dibmalloc(size_t len
){
13 size_t needtomallocQQW
= (len
+31u)>>5;
14 uint64_t *outseq
= malloc(needtomallocQQW
*8);
18 FORCE_INLINE
uint64_t singlebase2dbitplus(const char *const base
, unsigned char *const hexBQchr
, size_t *const Ncountp
) {
33 *hexBQchr
= 1u<<7; // 128 for N
35 // DONOT use `|=` since the memory is just malloced
38 } // Whether a looking-up array[64] can be faster ?
42 FORCE_INLINE
size_t base2dbit(size_t seqlen
,
43 const char *const baseschr
, const char *const qstr
,
44 uint64_t *diBseq
, unsigned char *hexBQ
) {
45 // First, do the bases, overwrite output arrays since we have just malloc without setting to zero.
48 const size_t seqlenDowntoDW
= seqlen
& ~((2u<<4)-1);
50 printf("[b2d]%zu %zu\n",seqlen
,seqlenDowntoDW
);
53 for (i
=0;i
<seqlenDowntoDW
;i
+=32u) {
56 //tmpqdbase |= singlebase2dbit(baseschr+i+j,hexBQ+i+j)<<(62u-j);
57 tmpqdbase
|= singlebase2dbitplus(baseschr
+i
+j
,hexBQ
+i
+j
,&Ncount
)<<(j
*2);
59 *diBseq
++ = tmpqdbase
;
62 for (j
=0;j
<seqlen
-i
;j
++) { // seqlen starts from 0.
63 // Cannot use 'j<=seqlen-i-1' here since 'size_t j' cannot be negative.
64 tmpqdbase
|= singlebase2dbitplus(baseschr
+i
+j
,hexBQ
+i
+j
,&Ncount
)<<(j
*2);
66 *diBseq
++ = tmpqdbase
;
67 // Everything should done in for-cycle when seqlen%4 == 0.
68 // No need to add '/0' since we have got seqlen, and there may be poly-A.
69 // Then, for the Q values ...
71 for (i
=0;i
<=seqlen
;i
++) {
72 if ( qstr
[i
] >= '@' && qstr
[i
] <= 'h' )
73 tmpqdbase
= qstr
[i
] - '?'; // @=1, A=2, B=3
75 hexBQ
[i
] |= tmpqdbase
; // lower 7bit already Zero-ed.
78 // Eamss-masking will be considered later.
82 FORCE_INLINE
uint64_t unitReverseComp(uint64_t seq32mer
){
84 seq32mer
= ((seq32mer
& 0x3333333333333333LLU
)<< 2) | ((seq32mer
& 0xCCCCCCCCCCCCCCCCLLU
)>> 2);
85 seq32mer
= ((seq32mer
& 0x0F0F0F0F0F0F0F0FLLU
)<< 4) | ((seq32mer
& 0xF0F0F0F0F0F0F0F0LLU
)>> 4);
86 #if !defined(__GNUC__) || (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 3)
87 seq32mer
= ((seq32mer
& 0x00FF00FF00FF00FFLLU
)<< 8) | ((seq32mer
& 0xFF00FF00FF00FF00LLU
)>> 8);
88 seq32mer
= ((seq32mer
& 0x0000FFFF0000FFFFLLU
)<<16) | ((seq32mer
& 0xFFFF0000FFFF0000LLU
)>>16);
89 seq32mer
= ((seq32mer
& 0x00000000FFFFFFFFLLU
)<<32) | ((seq32mer
& 0xFFFFFFFF00000000LLU
)>>32);
91 seq32mer
= __builtin_bswap64(seq32mer
);
96 0xABCDEF0123456789 -> cgagtcgcccactagacaaattgtctattggg
97 revcomp -> cccaatagacaatttgtctagtgggcgactcg
99 //FORCE_INLINE uint64_t QQWkmerMovHigher(uint64_t *seq32mer, unsigned char bphigher, uint64_t ){}
101 // *base must have been Normalized to /[ATCGN]*/
102 FORCE_INLINE
uint64_t singlebase2dbit(const char *const base
, size_t *const Ncountp
) {
118 // DONOT use `|=` since the memory is just malloced
121 } // Whether a looking-up array[32] can be faster ?
123 FORCE_INLINE
size_t ChrSeq2dib(const char *const baseschr
, size_t seqlen
, uint64_t **diBseqp
, size_t *const puint64cnt
){
124 size_t needtomallocQQW
= (seqlen
+31u)>>5; // in fact length in sizeof(uint64_t)
125 if (needtomallocQQW
> *puint64cnt
) {
126 *diBseqp
= realloc(*diBseqp
, needtomallocQQW
*8);
127 *puint64cnt
= needtomallocQQW
;
129 uint64_t *tmpdiBseqp
= *diBseqp
;
132 const size_t seqlenDowntoDW
= seqlen
& ~((2u<<4)-1);
134 for (i
=0;i
<seqlenDowntoDW
;i
+=32u) {
137 tmpqdbase
|= singlebase2dbit(baseschr
+i
+j
,&Ncount
)<<(j
*2);
139 *tmpdiBseqp
++ = tmpqdbase
;
142 for (j
=0;j
<seqlen
-i
;j
++) { // seqlen starts from 0.
143 tmpqdbase
|= singlebase2dbit(baseschr
+i
+j
,&Ncount
)<<(j
*2);
145 *tmpdiBseqp
++ = tmpqdbase
;
147 while (tmpdiBseqp
< (*diBseqp
)+*puint64cnt
) {
148 *tmpdiBseqp
++ = 0LLU;
150 #endif // not nessary since we already have seqlen
152 printf("[s2d]%zu %zu [%lx] [%.*s]\n",seqlen
,*puint64cnt
,**diBseqp
,(int)seqlen
,baseschr
); // just show the 1st one.
157 #endif // 2bitseqinline.h