| From f2a571dae7d70f7e3b59022834d8003ecd2df884 Mon Sep 17 00:00:00 2001 |
| From: Yves Orton <demerphq@gmail.com> |
| Date: Tue, 12 Feb 2013 10:53:05 +0100 |
| Subject: [PATCH] Prevent premature hsplit() calls, and only trigger REHASH |
| after hsplit() |
| |
| Triggering a hsplit due to long chain length allows an attacker |
| to create a carefully chosen set of keys which can cause the hash |
| to use 2 * (2**32) * sizeof(void *) bytes ram. AKA a DOS via memory |
| exhaustion. Doing so also takes non trivial time. |
| |
| Eliminating this check, and only inspecting chain length after a |
| normal hsplit() (triggered when keys>buckets) prevents the attack |
| entirely, and makes such attacks relatively benign. |
| |
| (cherry picked from commit f1220d61455253b170e81427c9d0357831ca0fac) |
| --- |
| ext/Hash-Util-FieldHash/t/10_hash.t | 18 ++++++++++++++++-- |
| hv.c | 26 ++++++-------------------- |
| t/op/hash.t | 20 +++++++++++++++++--- |
| 3 files changed, 39 insertions(+), 25 deletions(-) |
| |
| diff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t |
| index 2cfb4e8..d58f053 100644 |
| --- a/ext/Hash-Util-FieldHash/t/10_hash.t |
| +++ b/ext/Hash-Util-FieldHash/t/10_hash.t |
| @@ -38,15 +38,29 @@ use constant START => "a"; |
| |
| # some initial hash data |
| fieldhash my %h2; |
| -%h2 = map {$_ => 1} 'a'..'cc'; |
| +my $counter= "a"; |
| +$h2{$counter++}++ while $counter ne 'cd'; |
| |
| ok (!Internals::HvREHASH(%h2), |
| "starting with pre-populated non-pathological hash (rehash flag if off)"); |
| |
| my @keys = get_keys(\%h2); |
| +my $buckets= buckets(\%h2); |
| $h2{$_}++ for @keys; |
| +$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split |
| ok (Internals::HvREHASH(%h2), |
| - scalar(@keys) . " colliding into the same bucket keys are triggering rehash"); |
| + scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split"); |
| + |
| +# returns the number of buckets in a hash |
| +sub buckets { |
| + my $hr = shift; |
| + my $keys_buckets= scalar(%$hr); |
| + if ($keys_buckets=~m!/([0-9]+)\z!) { |
| + return 0+$1; |
| + } else { |
| + return 8; |
| + } |
| +} |
| |
| sub get_keys { |
| my $hr = shift; |
| diff --git a/hv.c b/hv.c |
| index 89c6456..8659678 100644 |
| --- a/hv.c |
| +++ b/hv.c |
| @@ -35,7 +35,8 @@ holds the key and hash value. |
| #define PERL_HASH_INTERNAL_ACCESS |
| #include "perl.h" |
| |
| -#define HV_MAX_LENGTH_BEFORE_SPLIT 14 |
| +#define HV_MAX_LENGTH_BEFORE_REHASH 14 |
| +#define SHOULD_DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */ |
| |
| static const char S_strtab_error[] |
| = "Cannot modify shared string table in hv_%s"; |
| @@ -818,23 +819,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen, |
| xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ |
| if (!counter) { /* initial entry? */ |
| xhv->xhv_fill++; /* HvFILL(hv)++ */ |
| - } else if (xhv->xhv_keys > (IV)xhv->xhv_max) { |
| + } else if ( SHOULD_DO_HSPLIT(xhv) ) { |
| hsplit(hv); |
| - } else if(!HvREHASH(hv)) { |
| - U32 n_links = 1; |
| - |
| - while ((counter = HeNEXT(counter))) |
| - n_links++; |
| - |
| - if (n_links > HV_MAX_LENGTH_BEFORE_SPLIT) { |
| - /* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit |
| - bucket splits on a rehashed hash, as we're not going to |
| - split it again, and if someone is lucky (evil) enough to |
| - get all the keys in one list they could exhaust our memory |
| - as we repeatedly double the number of buckets on every |
| - entry. Linear search feels a less worse thing to do. */ |
| - hsplit(hv); |
| - } |
| } |
| } |
| |
| @@ -1180,7 +1166,7 @@ S_hsplit(pTHX_ HV *hv) |
| |
| |
| /* Pick your policy for "hashing isn't working" here: */ |
| - if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */ |
| + if (longest_chain <= HV_MAX_LENGTH_BEFORE_REHASH /* split worked? */ |
| || HvREHASH(hv)) { |
| return; |
| } |
| @@ -2551,8 +2537,8 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags) |
| xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */ |
| if (!next) { /* initial entry? */ |
| xhv->xhv_fill++; /* HvFILL(hv)++ */ |
| - } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) { |
| - hsplit(PL_strtab); |
| + } else if ( SHOULD_DO_HSPLIT(xhv) ) { |
| + hsplit(PL_strtab); |
| } |
| } |
| |
| diff --git a/t/op/hash.t b/t/op/hash.t |
| index 9bde518..45eb782 100644 |
| --- a/t/op/hash.t |
| +++ b/t/op/hash.t |
| @@ -39,22 +39,36 @@ use constant THRESHOLD => 14; |
| use constant START => "a"; |
| |
| # some initial hash data |
| -my %h2 = map {$_ => 1} 'a'..'cc'; |
| +my %h2; |
| +my $counter= "a"; |
| +$h2{$counter++}++ while $counter ne 'cd'; |
| |
| ok (!Internals::HvREHASH(%h2), |
| "starting with pre-populated non-pathological hash (rehash flag if off)"); |
| |
| my @keys = get_keys(\%h2); |
| +my $buckets= buckets(\%h2); |
| $h2{$_}++ for @keys; |
| +$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split |
| ok (Internals::HvREHASH(%h2), |
| - scalar(@keys) . " colliding into the same bucket keys are triggering rehash"); |
| + scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split"); |
| + |
| +# returns the number of buckets in a hash |
| +sub buckets { |
| + my $hr = shift; |
| + my $keys_buckets= scalar(%$hr); |
| + if ($keys_buckets=~m!/([0-9]+)\z!) { |
| + return 0+$1; |
| + } else { |
| + return 8; |
| + } |
| +} |
| |
| sub get_keys { |
| my $hr = shift; |
| |
| # the minimum of bits required to mount the attack on a hash |
| my $min_bits = log(THRESHOLD)/log(2); |
| - |
| # if the hash has already been populated with a significant amount |
| # of entries the number of mask bits can be higher |
| my $keys = scalar keys %$hr; |
| -- |
| 1.8.1.3 |
| |