dev-lang/perl/files/perl-5.12.5-rehash-5.12.5.patch - mirrors/cros/chromiumos/overlays/portage-stable - Git at Google

 From f2a571dae7d70f7e3b59022834d8003ecd2df884 Mon Sep 17 00:00:00 2001
 From: Yves Orton <demerphq@gmail.com>
 Date: Tue, 12 Feb 2013 10:53:05 +0100
 Subject: [PATCH] Prevent premature hsplit() calls, and only trigger REHASH
  after hsplit()

 Triggering a hsplit due to long chain length allows an attacker
 to create a carefully chosen set of keys which can cause the hash
 to use 2 * (2**32) * sizeof(void *) bytes ram. AKA a DOS via memory
 exhaustion. Doing so also takes non trivial time.

 Eliminating this check, and only inspecting chain length after a
 normal hsplit() (triggered when keys>buckets) prevents the attack
 entirely, and makes such attacks relatively benign.

 (cherry picked from commit f1220d61455253b170e81427c9d0357831ca0fac)
 ---
  ext/Hash-Util-FieldHash/t/10_hash.t | 18 ++++++++++++++++--
  hv.c                                | 26 ++++++--------------------
  t/op/hash.t                         | 20 +++++++++++++++++---
  3 files changed, 39 insertions(+), 25 deletions(-)

 diff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t
 index 2cfb4e8..d58f053 100644
 --- a/ext/Hash-Util-FieldHash/t/10_hash.t
 +++ b/ext/Hash-Util-FieldHash/t/10_hash.t
 @@ -38,15 +38,29 @@ use constant START     => "a";

  # some initial hash data
  fieldhash my %h2;
 -%h2 = map {$_ => 1} 'a'..'cc';
 +my $counter= "a";
 +$h2{$counter++}++ while $counter ne 'cd';

  ok (!Internals::HvREHASH(%h2),
      "starting with pre-populated non-pathological hash (rehash flag if off)");

  my @keys = get_keys(\%h2);
 +my $buckets= buckets(\%h2);
  $h2{$_}++ for @keys;
 +$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
  ok (Internals::HvREHASH(%h2),
 -    scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
 +    scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
 +
 +# returns the number of buckets in a hash
 +sub buckets {
 +    my $hr = shift;
 +    my $keys_buckets= scalar(%$hr);
 +    if ($keys_buckets=~m!/([0-9]+)\z!) {
 +        return 0+$1;
 +    } else {
 +        return 8;
 +    }
 +}

  sub get_keys {
      my $hr = shift;
 diff --git a/hv.c b/hv.c
 index 89c6456..8659678 100644
 --- a/hv.c
 +++ b/hv.c
 @@ -35,7 +35,8 @@ holds the key and hash value.
  #define PERL_HASH_INTERNAL_ACCESS
  #include "perl.h"

 -#define HV_MAX_LENGTH_BEFORE_SPLIT 14
 +#define HV_MAX_LENGTH_BEFORE_REHASH 14
 +#define SHOULD_DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */

  static const char S_strtab_error[]
      = "Cannot modify shared string table in hv_%s";
 @@ -818,23 +819,8 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, STRLEN klen,
  	xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
  	if (!counter) {				/* initial entry? */
  	    xhv->xhv_fill++; /* HvFILL(hv)++ */
 -	} else if (xhv->xhv_keys > (IV)xhv->xhv_max) {
 +	} else if ( SHOULD_DO_HSPLIT(xhv) ) {
  	    hsplit(hv);
 -	} else if(!HvREHASH(hv)) {
 -	    U32 n_links = 1;
 -
 -	    while ((counter = HeNEXT(counter)))
 -		n_links++;
 -
 -	    if (n_links > HV_MAX_LENGTH_BEFORE_SPLIT) {
 -		/* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit
 -		   bucket splits on a rehashed hash, as we're not going to
 -		   split it again, and if someone is lucky (evil) enough to
 -		   get all the keys in one list they could exhaust our memory
 -		   as we repeatedly double the number of buckets on every
 -		   entry. Linear search feels a less worse thing to do.  */
 -		hsplit(hv);
 -	    }
  	}
      }

 @@ -1180,7 +1166,7 @@ S_hsplit(pTHX_ HV *hv)


      /* Pick your policy for "hashing isn't working" here:  */
 -    if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked?  */
 +    if (longest_chain <= HV_MAX_LENGTH_BEFORE_REHASH /* split worked?  */
  	|| HvREHASH(hv)) {
  	return;
      }
 @@ -2551,8 +2537,8 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags)
  	xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
  	if (!next) {			/* initial entry? */
  	    xhv->xhv_fill++; /* HvFILL(hv)++ */
 -	} else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
 -		hsplit(PL_strtab);
 +	} else if ( SHOULD_DO_HSPLIT(xhv) ) {
 +            hsplit(PL_strtab);
  	}
      }

 diff --git a/t/op/hash.t b/t/op/hash.t
 index 9bde518..45eb782 100644
 --- a/t/op/hash.t
 +++ b/t/op/hash.t
 @@ -39,22 +39,36 @@ use constant THRESHOLD => 14;
  use constant START     => "a";

  # some initial hash data
 -my %h2 = map {$_ => 1} 'a'..'cc';
 +my %h2;
 +my $counter= "a";
 +$h2{$counter++}++ while $counter ne 'cd';

  ok (!Internals::HvREHASH(%h2),
      "starting with pre-populated non-pathological hash (rehash flag if off)");

  my @keys = get_keys(\%h2);
 +my $buckets= buckets(\%h2);
  $h2{$_}++ for @keys;
 +$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
  ok (Internals::HvREHASH(%h2),
 -    scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
 +    scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
 +
 +# returns the number of buckets in a hash
 +sub buckets {
 +    my $hr = shift;
 +    my $keys_buckets= scalar(%$hr);
 +    if ($keys_buckets=~m!/([0-9]+)\z!) {
 +        return 0+$1;
 +    } else {
 +        return 8;
 +    }
 +}

  sub get_keys {
      my $hr = shift;

      # the minimum of bits required to mount the attack on a hash
      my $min_bits = log(THRESHOLD)/log(2);
 -
      # if the hash has already been populated with a significant amount
      # of entries the number of mask bits can be higher
      my $keys = scalar keys %$hr;
 --
 1.8.1.3
	From f2a571dae7d70f7e3b59022834d8003ecd2df884 Mon Sep 17 00:00:00 2001
	From: Yves Orton <demerphq@gmail.com>
	Date: Tue, 12 Feb 2013 10:53:05 +0100
	Subject: [PATCH] Prevent premature hsplit() calls, and only trigger REHASH
	after hsplit()

	Triggering a hsplit due to long chain length allows an attacker
	to create a carefully chosen set of keys which can cause the hash
	to use 2 * (2*32) sizeof(void *) bytes ram. AKA a DOS via memory
	exhaustion. Doing so also takes non trivial time.

	Eliminating this check, and only inspecting chain length after a
	normal hsplit() (triggered when keys>buckets) prevents the attack
	entirely, and makes such attacks relatively benign.

	(cherry picked from commit f1220d61455253b170e81427c9d0357831ca0fac)
	---
	ext/Hash-Util-FieldHash/t/10_hash.t \| 18 ++++++++++++++++--
	hv.c \| 26 ++++++--------------------
	t/op/hash.t \| 20 +++++++++++++++++---
	3 files changed, 39 insertions(+), 25 deletions(-)

	diff --git a/ext/Hash-Util-FieldHash/t/10_hash.t b/ext/Hash-Util-FieldHash/t/10_hash.t
	index 2cfb4e8..d58f053 100644
	--- a/ext/Hash-Util-FieldHash/t/10_hash.t
	+++ b/ext/Hash-Util-FieldHash/t/10_hash.t
	@@ -38,15 +38,29 @@ use constant START => "a";

	# some initial hash data
	fieldhash my %h2;
	-%h2 = map {$_ => 1} 'a'..'cc';
	+my $counter= "a";
	+$h2{$counter++}++ while $counter ne 'cd';

	ok (!Internals::HvREHASH(%h2),
	"starting with pre-populated non-pathological hash (rehash flag if off)");

	my @keys = get_keys(\%h2);
	+my $buckets= buckets(\%h2);
	$h2{$_}++ for @keys;
	+$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
	ok (Internals::HvREHASH(%h2),
	- scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
	+ scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
	+
	+# returns the number of buckets in a hash
	+sub buckets {
	+ my $hr = shift;
	+ my $keys_buckets= scalar(%$hr);
	+ if ($keys_buckets=~m!/([0-9]+)\z!) {
	+ return 0+$1;
	+ } else {
	+ return 8;
	+ }
	+}

	sub get_keys {
	my $hr = shift;
	diff --git a/hv.c b/hv.c
	index 89c6456..8659678 100644
	--- a/hv.c
	+++ b/hv.c
	@@ -35,7 +35,8 @@ holds the key and hash value.
	#define PERL_HASH_INTERNAL_ACCESS
	#include "perl.h"

	-#define HV_MAX_LENGTH_BEFORE_SPLIT 14
	+#define HV_MAX_LENGTH_BEFORE_REHASH 14
	+#define SHOULD_DO_HSPLIT(xhv) ((xhv)->xhv_keys > (xhv)->xhv_max) /* HvTOTALKEYS(hv) > HvMAX(hv) */

	static const char S_strtab_error[]
	= "Cannot modify shared string table in hv_%s";
	@@ -818,23 +819,8 @@ Perl_hv_common(pTHX_ HV hv, SV keysv, const char *key, STRLEN klen,
	xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
	if (!counter) { /* initial entry? */
	xhv->xhv_fill++; /* HvFILL(hv)++ */
	- } else if (xhv->xhv_keys > (IV)xhv->xhv_max) {
	+ } else if ( SHOULD_DO_HSPLIT(xhv) ) {
	hsplit(hv);
	- } else if(!HvREHASH(hv)) {
	- U32 n_links = 1;
	-
	- while ((counter = HeNEXT(counter)))
	- n_links++;
	-
	- if (n_links > HV_MAX_LENGTH_BEFORE_SPLIT) {
	- /* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit
	- bucket splits on a rehashed hash, as we're not going to
	- split it again, and if someone is lucky (evil) enough to
	- get all the keys in one list they could exhaust our memory
	- as we repeatedly double the number of buckets on every
	- entry. Linear search feels a less worse thing to do. */
	- hsplit(hv);
	- }
	}
	}

	@@ -1180,7 +1166,7 @@ S_hsplit(pTHX_ HV *hv)


	/* Pick your policy for "hashing isn't working" here: */
	- if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */
	+ if (longest_chain <= HV_MAX_LENGTH_BEFORE_REHASH /* split worked? */
	\|\| HvREHASH(hv)) {
	return;
	}
	@@ -2551,8 +2537,8 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags)
	xhv->xhv_keys++; /* HvTOTALKEYS(hv)++ */
	if (!next) { /* initial entry? */
	xhv->xhv_fill++; /* HvFILL(hv)++ */
	- } else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
	- hsplit(PL_strtab);
	+ } else if ( SHOULD_DO_HSPLIT(xhv) ) {
	+ hsplit(PL_strtab);
	}
	}

	diff --git a/t/op/hash.t b/t/op/hash.t
	index 9bde518..45eb782 100644
	--- a/t/op/hash.t
	+++ b/t/op/hash.t
	@@ -39,22 +39,36 @@ use constant THRESHOLD => 14;
	use constant START => "a";

	# some initial hash data
	-my %h2 = map {$_ => 1} 'a'..'cc';
	+my %h2;
	+my $counter= "a";
	+$h2{$counter++}++ while $counter ne 'cd';

	ok (!Internals::HvREHASH(%h2),
	"starting with pre-populated non-pathological hash (rehash flag if off)");

	my @keys = get_keys(\%h2);
	+my $buckets= buckets(\%h2);
	$h2{$_}++ for @keys;
	+$h2{$counter++}++ while buckets(\%h2) == $buckets; # force a split
	ok (Internals::HvREHASH(%h2),
	- scalar(@keys) . " colliding into the same bucket keys are triggering rehash");
	+ scalar(@keys) . " colliding into the same bucket keys are triggering rehash after split");
	+
	+# returns the number of buckets in a hash
	+sub buckets {
	+ my $hr = shift;
	+ my $keys_buckets= scalar(%$hr);
	+ if ($keys_buckets=~m!/([0-9]+)\z!) {
	+ return 0+$1;
	+ } else {
	+ return 8;
	+ }
	+}

	sub get_keys {
	my $hr = shift;

	# the minimum of bits required to mount the attack on a hash
	my $min_bits = log(THRESHOLD)/log(2);
	-
	# if the hash has already been populated with a significant amount
	# of entries the number of mask bits can be higher
	my $keys = scalar keys %$hr;
	--
	1.8.1.3