4 ** The author disclaims copyright to this source code. In place of
5 ** a legal notice, here is a blessing:
7 ** May you do good and not evil.
8 ** May you find forgiveness for yourself and forgive others.
9 ** May you share freely, never taking more than you give.
11 ******************************************************************************
13 ** This file contains code to implement the percentile(Y,P) SQL function
14 ** as described below:
16 ** (1) The percentile(Y,P) function is an aggregate function taking
17 ** exactly two arguments.
19 ** (2) If the P argument to percentile(Y,P) is not the same for every
20 ** row in the aggregate then an error is thrown. The word "same"
21 ** in the previous sentence means that the value differ by less
24 ** (3) If the P argument to percentile(Y,P) evaluates to anything other
25 ** than a number in the range of 0.0 to 100.0 inclusive then an
28 ** (4) If any Y argument to percentile(Y,P) evaluates to a value that
29 ** is not NULL and is not numeric then an error is thrown.
31 ** (5) If any Y argument to percentile(Y,P) evaluates to plus or minus
32 ** infinity then an error is thrown. (SQLite always interprets NaN
35 ** (6) Both Y and P in percentile(Y,P) can be arbitrary expressions,
36 ** including CASE WHEN expressions.
38 ** (7) The percentile(Y,P) aggregate is able to handle inputs of at least
39 ** one million (1,000,000) rows.
41 ** (8) If there are no non-NULL values for Y, then percentile(Y,P)
44 ** (9) If there is exactly one non-NULL value for Y, the percentile(Y,P)
45 ** returns the one Y value.
47 ** (10) If there N non-NULL values of Y where N is two or more and
48 ** the Y values are ordered from least to greatest and a graph is
49 ** drawn from 0 to N-1 such that the height of the graph at J is
50 ** the J-th Y value and such that straight lines are drawn between
51 ** adjacent Y values, then the percentile(Y,P) function returns
52 ** the height of the graph at P*(N-1)/100.
54 ** (11) The percentile(Y,P) function always returns either a floating
55 ** point number or NULL.
57 ** (12) The percentile(Y,P) is implemented as a single C99 source-code
58 ** file that compiles into a shared-library or DLL that can be loaded
59 ** into SQLite using the sqlite3_load_extension() interface.
61 #include "sqlite3ext.h"
62 SQLITE_EXTENSION_INIT1
67 /* The following object is the session context for a single percentile()
68 ** function. We have to remember all input Y values until the very end.
69 ** Those values are accumulated in the Percentile.a[] array.
71 typedef struct Percentile Percentile
;
73 unsigned nAlloc
; /* Number of slots allocated for a[] */
74 unsigned nUsed
; /* Number of slots actually used in a[] */
75 double rPct
; /* 1.0 more than the value for P */
76 double *a
; /* Array of Y values */
80 ** Return TRUE if the input floating-point number is an infinity.
82 static int isInfinity(double r
){
84 assert( sizeof(u
)==sizeof(r
) );
85 memcpy(&u
, &r
, sizeof(u
));
86 return ((u
>>52)&0x7ff)==0x7ff;
90 ** Return TRUE if two doubles differ by 0.001 or less
92 static int sameValue(double a
, double b
){
94 return a
>=-0.001 && a
<=0.001;
98 ** The "step" function for percentile(Y,P) is called once for each
101 static void percentStep(sqlite3_context
*pCtx
, int argc
, sqlite3_value
**argv
){
108 /* Requirement 3: P must be a number between 0 and 100 */
109 eType
= sqlite3_value_numeric_type(argv
[1]);
110 rPct
= sqlite3_value_double(argv
[1]);
111 if( (eType
!=SQLITE_INTEGER
&& eType
!=SQLITE_FLOAT
)
112 || rPct
<0.0 || rPct
>100.0 ){
113 sqlite3_result_error(pCtx
, "2nd argument to percentile() is not "
114 "a number between 0.0 and 100.0", -1);
118 /* Allocate the session context. */
119 p
= (Percentile
*)sqlite3_aggregate_context(pCtx
, sizeof(*p
));
122 /* Remember the P value. Throw an error if the P value is different
123 ** from any prior row, per Requirement (2). */
126 }else if( !sameValue(p
->rPct
,rPct
+1.0) ){
127 sqlite3_result_error(pCtx
, "2nd argument to percentile() is not the "
128 "same for all input rows", -1);
132 /* Ignore rows for which Y is NULL */
133 eType
= sqlite3_value_type(argv
[0]);
134 if( eType
==SQLITE_NULL
) return;
136 /* If not NULL, then Y must be numeric. Otherwise throw an error.
138 if( eType
!=SQLITE_INTEGER
&& eType
!=SQLITE_FLOAT
){
139 sqlite3_result_error(pCtx
, "1st argument to percentile() is not "
144 /* Throw an error if the Y value is infinity or NaN */
145 y
= sqlite3_value_double(argv
[0]);
147 sqlite3_result_error(pCtx
, "Inf input to percentile()", -1);
151 /* Allocate and store the Y */
152 if( p
->nUsed
>=p
->nAlloc
){
153 unsigned n
= p
->nAlloc
*2 + 250;
154 double *a
= sqlite3_realloc64(p
->a
, sizeof(double)*n
);
157 memset(p
, 0, sizeof(*p
));
158 sqlite3_result_error_nomem(pCtx
);
164 p
->a
[p
->nUsed
++] = y
;
168 ** Compare to doubles for sorting using qsort()
170 static int SQLITE_CDECL
doubleCmp(const void *pA
, const void *pB
){
171 double a
= *(double*)pA
;
172 double b
= *(double*)pB
;
179 ** Called to compute the final output of percentile() and to clean
180 ** up all allocated memory.
182 static void percentFinal(sqlite3_context
*pCtx
){
187 p
= (Percentile
*)sqlite3_aggregate_context(pCtx
, 0);
189 if( p
->a
==0 ) return;
191 qsort(p
->a
, p
->nUsed
, sizeof(double), doubleCmp
);
192 ix
= (p
->rPct
-1.0)*(p
->nUsed
-1)*0.01;
194 i2
= ix
==(double)i1
|| i1
==p
->nUsed
-1 ? i1
: i1
+1;
197 vx
= v1
+ (v2
-v1
)*(ix
-i1
);
198 sqlite3_result_double(pCtx
, vx
);
201 memset(p
, 0, sizeof(*p
));
206 __declspec(dllexport
)
208 int sqlite3_percentile_init(
211 const sqlite3_api_routines
*pApi
214 SQLITE_EXTENSION_INIT2(pApi
);
215 (void)pzErrMsg
; /* Unused parameter */
216 rc
= sqlite3_create_function(db
, "percentile", 2,
217 SQLITE_UTF8
|SQLITE_INNOCUOUS
, 0,
218 0, percentStep
, percentFinal
);