From 561d0a0fe27e6e868b51f671eb569440c46563fa Mon Sep 17 00:00:00 2001 From: tgl Date: Sat, 4 Apr 2009 04:53:25 +0000 Subject: [PATCH] Rewrite interval_hash() so that the hashcodes are equal for values that interval_eq() considers equal. I'm not sure how that fundamental requirement escaped us through multiple revisions of this hash function, but there it is; it's been wrong since interval_hash was first written for PG 7.1. Per bug #4748 from Roman Kononov. Backpatch to all supported releases. This patch changes the contents of hash indexes for interval columns. That's no particular problem for PG 8.4, since we've broken on-disk compatibility of hash indexes already; but it will require a migration warning note in the next minor releases of all existing branches: "if you have any hash indexes on columns of type interval, REINDEX them after updating". --- src/backend/utils/adt/timestamp.c | 61 ++++++++++++++++------------------ src/test/regress/expected/interval.out | 13 ++++++++ src/test/regress/sql/interval.sql | 4 +++ 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 21c7beea62..6956497968 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -2041,27 +2041,30 @@ timestamptz_cmp_timestamp(PG_FUNCTION_ARGS) * * collate invalid interval at the end */ -static int -interval_cmp_internal(Interval *interval1, Interval *interval2) +static inline TimeOffset +interval_cmp_value(const Interval *interval) { - TimeOffset span1, - span2; + TimeOffset span; - span1 = interval1->time; - span2 = interval2->time; + span = interval->time; #ifdef HAVE_INT64_TIMESTAMP - span1 += interval1->month * INT64CONST(30) * USECS_PER_DAY; - span1 += interval1->day * INT64CONST(24) * USECS_PER_HOUR; - span2 += interval2->month * INT64CONST(30) * USECS_PER_DAY; - span2 += interval2->day * INT64CONST(24) * USECS_PER_HOUR; + span += interval->month * INT64CONST(30) * USECS_PER_DAY; + span += interval->day * INT64CONST(24) * USECS_PER_HOUR; #else - span1 += interval1->month * ((double) DAYS_PER_MONTH * SECS_PER_DAY); - span1 += interval1->day * ((double) HOURS_PER_DAY * SECS_PER_HOUR); - span2 += interval2->month * ((double) DAYS_PER_MONTH * SECS_PER_DAY); - span2 += interval2->day * ((double) HOURS_PER_DAY * SECS_PER_HOUR); + span += interval->month * ((double) DAYS_PER_MONTH * SECS_PER_DAY); + span += interval->day * ((double) HOURS_PER_DAY * SECS_PER_HOUR); #endif + return span; +} + +static int +interval_cmp_internal(Interval *interval1, Interval *interval2) +{ + TimeOffset span1 = interval_cmp_value(interval1); + TimeOffset span2 = interval_cmp_value(interval2); + return ((span1 < span2) ? -1 : (span1 > span2) ? 1 : 0); } @@ -2128,32 +2131,24 @@ interval_cmp(PG_FUNCTION_ARGS) PG_RETURN_INT32(interval_cmp_internal(interval1, interval2)); } +/* + * Hashing for intervals + * + * We must produce equal hashvals for values that interval_cmp_internal() + * considers equal. So, compute the net span the same way it does, + * and then hash that, using either int64 or float8 hashing. + */ Datum interval_hash(PG_FUNCTION_ARGS) { - Interval *key = PG_GETARG_INTERVAL_P(0); - uint32 thash; - uint32 mhash; + Interval *interval = PG_GETARG_INTERVAL_P(0); + TimeOffset span = interval_cmp_value(interval); - /* - * To avoid any problems with padding bytes in the struct, we figure the - * field hashes separately and XOR them. This also provides a convenient - * framework for dealing with the fact that the time field might be either - * double or int64. - */ #ifdef HAVE_INT64_TIMESTAMP - thash = DatumGetUInt32(DirectFunctionCall1(hashint8, - Int64GetDatumFast(key->time))); + return DirectFunctionCall1(hashint8, Int64GetDatumFast(span)); #else - thash = DatumGetUInt32(DirectFunctionCall1(hashfloat8, - Float8GetDatumFast(key->time))); + return DirectFunctionCall1(hashfloat8, Float8GetDatumFast(span)); #endif - thash ^= DatumGetUInt32(hash_uint32(key->day)); - /* Shift so "k days" and "k months" don't hash to the same thing */ - mhash = DatumGetUInt32(hash_uint32(key->month)); - thash ^= mhash << 24; - thash ^= mhash >> 8; - PG_RETURN_UINT32(thash); } /* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator. diff --git a/src/test/regress/expected/interval.out b/src/test/regress/expected/interval.out index f788e369e7..9effe61f42 100644 --- a/src/test/regress/expected/interval.out +++ b/src/test/regress/expected/interval.out @@ -717,3 +717,16 @@ select interval '0:0:0.7', interval '@ 0.70 secs', interval '0.7 seconds'; @ 0.7 secs | @ 0.7 secs | @ 0.7 secs (1 row) +-- check that '30 days' equals '1 month' according to the hash function +select '30 days'::interval = '1 month'::interval as t; + t +--- + t +(1 row) + +select interval_hash('30 days'::interval) = interval_hash('1 month'::interval) as t; + t +--- + t +(1 row) + diff --git a/src/test/regress/sql/interval.sql b/src/test/regress/sql/interval.sql index 0498739b83..2a903a940d 100644 --- a/src/test/regress/sql/interval.sql +++ b/src/test/regress/sql/interval.sql @@ -241,3 +241,7 @@ SET IntervalStyle to postgres_verbose; select interval '-10 mons -3 days +03:55:06.70'; select interval '1 year 2 mons 3 days 04:05:06.699999'; select interval '0:0:0.7', interval '@ 0.70 secs', interval '0.7 seconds'; + +-- check that '30 days' equals '1 month' according to the hash function +select '30 days'::interval = '1 month'::interval as t; +select interval_hash('30 days'::interval) = interval_hash('1 month'::interval) as t; -- 2.11.4.GIT