[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / libclc / generic / lib / integer / mul_hi.cl
blob174d893afb14f9ad5d8ac902b5ddf8d25dc13009
1 #include <clc/clc.h>
3 //For all types EXCEPT long, which is implemented separately
4 #define __CLC_MUL_HI_IMPL(BGENTYPE, GENTYPE, GENSIZE) \
5 _CLC_OVERLOAD _CLC_DEF GENTYPE mul_hi(GENTYPE x, GENTYPE y){ \
6 return (GENTYPE)(((BGENTYPE)x * (BGENTYPE)y) >> GENSIZE); \
7 } \
9 //FOIL-based long mul_hi
11 // Summary: Treat mul_hi(long x, long y) as:
12 // (a+b) * (c+d) where a and c are the high-order parts of x and y respectively
13 // and b and d are the low-order parts of x and y.
14 // Thinking back to algebra, we use FOIL to do the work.
16 _CLC_OVERLOAD _CLC_DEF long mul_hi(long x, long y){
17 long f, o, i;
18 ulong l;
20 //Move the high/low halves of x/y into the lower 32-bits of variables so
21 //that we can multiply them without worrying about overflow.
22 long x_hi = x >> 32;
23 long x_lo = x & UINT_MAX;
24 long y_hi = y >> 32;
25 long y_lo = y & UINT_MAX;
27 //Multiply all of the components according to FOIL method
28 f = x_hi * y_hi;
29 o = x_hi * y_lo;
30 i = x_lo * y_hi;
31 l = x_lo * y_lo;
33 //Now add the components back together in the following steps:
34 //F: doesn't need to be modified
35 //O/I: Need to be added together.
36 //L: Shift right by 32-bits, then add into the sum of O and I
37 //Once O/I/L are summed up, then shift the sum by 32-bits and add to F.
39 //We use hadd to give us a bit of extra precision for the intermediate sums
40 //but as a result, we shift by 31 bits instead of 32
41 return (long)(f + (hadd(o, (i + (long)((ulong)l>>32))) >> 31));
44 _CLC_OVERLOAD _CLC_DEF ulong mul_hi(ulong x, ulong y){
45 ulong f, o, i;
46 ulong l;
48 //Move the high/low halves of x/y into the lower 32-bits of variables so
49 //that we can multiply them without worrying about overflow.
50 ulong x_hi = x >> 32;
51 ulong x_lo = x & UINT_MAX;
52 ulong y_hi = y >> 32;
53 ulong y_lo = y & UINT_MAX;
55 //Multiply all of the components according to FOIL method
56 f = x_hi * y_hi;
57 o = x_hi * y_lo;
58 i = x_lo * y_hi;
59 l = x_lo * y_lo;
61 //Now add the components back together, taking care to respect the fact that:
62 //F: doesn't need to be modified
63 //O/I: Need to be added together.
64 //L: Shift right by 32-bits, then add into the sum of O and I
65 //Once O/I/L are summed up, then shift the sum by 32-bits and add to F.
67 //We use hadd to give us a bit of extra precision for the intermediate sums
68 //but as a result, we shift by 31 bits instead of 32
69 return (f + (hadd(o, (i + (l>>32))) >> 31));
72 #define __CLC_MUL_HI_VEC(GENTYPE) \
73 _CLC_OVERLOAD _CLC_DEF GENTYPE##2 mul_hi(GENTYPE##2 x, GENTYPE##2 y){ \
74 return (GENTYPE##2){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1)}; \
75 } \
76 _CLC_OVERLOAD _CLC_DEF GENTYPE##3 mul_hi(GENTYPE##3 x, GENTYPE##3 y){ \
77 return (GENTYPE##3){mul_hi(x.s0, y.s0), mul_hi(x.s1, y.s1), mul_hi(x.s2, y.s2)}; \
78 } \
79 _CLC_OVERLOAD _CLC_DEF GENTYPE##4 mul_hi(GENTYPE##4 x, GENTYPE##4 y){ \
80 return (GENTYPE##4){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
81 } \
82 _CLC_OVERLOAD _CLC_DEF GENTYPE##8 mul_hi(GENTYPE##8 x, GENTYPE##8 y){ \
83 return (GENTYPE##8){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
84 } \
85 _CLC_OVERLOAD _CLC_DEF GENTYPE##16 mul_hi(GENTYPE##16 x, GENTYPE##16 y){ \
86 return (GENTYPE##16){mul_hi(x.lo, y.lo), mul_hi(x.hi, y.hi)}; \
87 } \
89 #define __CLC_MUL_HI_DEC_IMPL(BTYPE, TYPE, BITS) \
90 __CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \
91 __CLC_MUL_HI_VEC(TYPE)
93 #define __CLC_MUL_HI_TYPES() \
94 __CLC_MUL_HI_DEC_IMPL(short, char, 8) \
95 __CLC_MUL_HI_DEC_IMPL(ushort, uchar, 8) \
96 __CLC_MUL_HI_DEC_IMPL(int, short, 16) \
97 __CLC_MUL_HI_DEC_IMPL(uint, ushort, 16) \
98 __CLC_MUL_HI_DEC_IMPL(long, int, 32) \
99 __CLC_MUL_HI_DEC_IMPL(ulong, uint, 32) \
100 __CLC_MUL_HI_VEC(long) \
101 __CLC_MUL_HI_VEC(ulong)
103 __CLC_MUL_HI_TYPES()
105 #undef __CLC_MUL_HI_TYPES
106 #undef __CLC_MUL_HI_DEC_IMPL
107 #undef __CLC_MUL_HI_IMPL
108 #undef __CLC_MUL_HI_VEC
109 #undef __CLC_B32