From 0ec271ded31ff8e74f66c145035b40048561df75 Mon Sep 17 00:00:00 2001 From: Elton Date: Thu, 10 Aug 2023 15:29:32 +0200 Subject: [PATCH 1/4] Fix division by zero in WeightedRandomStreamRanker --- .../StreamRankers/WeightedRandomStreamRanker.php | 8 ++++++-- .../StreamRankers/WeightedRandomStreamRankerTest.php | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php b/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php index 5fc2d17..5b3e9bb 100644 --- a/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php +++ b/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php @@ -51,8 +51,12 @@ protected function rank_inner(array $stream_elements, StreamTracer $tracer = nul foreach ($valid_elements as $element) { /** @var RecommendationLeafStreamElementTrait $original_element */ $original_element = $element->get_original_element(); - // calculate sampling score - $r = pow(mt_rand() / $max_rand, (1 / $original_element->get_score())); + if ($original_element->get_score() === 0.0) { + $r = 0; + } else { + // calculate sampling score + $r = pow(mt_rand() / $max_rand, (1 / $original_element->get_score())); + } $H[strval($r)] = $element; } // sort by key in descending order diff --git a/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php b/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php index fcdbc01..84b4207 100644 --- a/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php +++ b/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php @@ -105,12 +105,14 @@ public function test_ranker_with_stream_elements(): void 1234 => 1.0, 2345 => 2.0, 3456 => 3.0, + 4567 => 0.0, ]; $stream_elements = $this->build_blog_stream_elements($bid2score); mt_srand(0); $ranked_elements = $this->ranker->rank($stream_elements); mt_srand(0); $weighted_score = array_map(function ($score) { + if ($score === 0.0) return 0; return pow(mt_rand() / mt_getrandmax(), (1 / $score)); }, $bid2score); // sort by value in descending order From 038a543a04d28b1bf860ecb45391701d5d73349f Mon Sep 17 00:00:00 2001 From: Elton Date: Thu, 10 Aug 2023 15:51:00 +0200 Subject: [PATCH 2/4] Add braces to if statement --- .../StreamRankers/WeightedRandomStreamRankerTest.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php b/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php index 84b4207..9435c34 100644 --- a/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php +++ b/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php @@ -112,7 +112,9 @@ public function test_ranker_with_stream_elements(): void $ranked_elements = $this->ranker->rank($stream_elements); mt_srand(0); $weighted_score = array_map(function ($score) { - if ($score === 0.0) return 0; + if ($score === 0.0) { + return 0; + } return pow(mt_rand() / mt_getrandmax(), (1 / $score)); }, $bid2score); // sort by value in descending order From c877a55aded7fbcece4cd8ab3c6bd7f7ecb58bec Mon Sep 17 00:00:00 2001 From: lucilastancato Date: Thu, 17 Aug 2023 15:23:19 -0300 Subject: [PATCH 3/4] dont store 2 elements on the same key if the key is the same --- .../WeightedRandomStreamRanker.php | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php b/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php index 5b3e9bb..2684ca0 100644 --- a/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php +++ b/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php @@ -51,14 +51,23 @@ protected function rank_inner(array $stream_elements, StreamTracer $tracer = nul foreach ($valid_elements as $element) { /** @var RecommendationLeafStreamElementTrait $original_element */ $original_element = $element->get_original_element(); - if ($original_element->get_score() === 0.0) { - $r = 0; - } else { - // calculate sampling score - $r = pow(mt_rand() / $max_rand, (1 / $original_element->get_score())); + $score = $original_element->get_score(); + if ($score == 0.0) { + $score = 0.001; + } + // calculate sampling score + $r = pow(mt_rand() / $max_rand, (1 / $score)); + + // store the element in $H, using $r as key. + $key = strval($r); + if (array_key_exists($key, $H)) { + // We don't want to replace an element that was previously added to $H. + // so we append the element id, if the key already exists. + $key = sprintf('%s_%s', $key, $original_element->get_element_id()); } - $H[strval($r)] = $element; + $H[$key] = $element; } + // sort by key in descending order krsort($H); $ranked_elements = array_values($H); From e5b83ef8d70a8b6ee1e9f29ead572bb5e5b78abe Mon Sep 17 00:00:00 2001 From: lucilastancato Date: Thu, 17 Aug 2023 15:36:15 -0300 Subject: [PATCH 4/4] fix test --- .../WeightedRandomStreamRanker.php | 28 +++++++++++++------ .../WeightedRandomStreamRankerTest.php | 3 -- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php b/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php index 2684ca0..2dd17cc 100644 --- a/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php +++ b/lib/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRanker.php @@ -46,24 +46,17 @@ protected function rank_inner(array $stream_elements, StreamTracer $tracer = nul // weighted sampling array, $H: ['sampling score' => $stream_element] $H = []; - $max_rand = mt_getrandmax(); /** @var StreamElement $element */ foreach ($valid_elements as $element) { - /** @var RecommendationLeafStreamElementTrait $original_element */ - $original_element = $element->get_original_element(); - $score = $original_element->get_score(); - if ($score == 0.0) { - $score = 0.001; - } // calculate sampling score - $r = pow(mt_rand() / $max_rand, (1 / $score)); + $r = $this->get_element_random_score($element); // store the element in $H, using $r as key. $key = strval($r); if (array_key_exists($key, $H)) { // We don't want to replace an element that was previously added to $H. // so we append the element id, if the key already exists. - $key = sprintf('%s_%s', $key, $original_element->get_element_id()); + $key = sprintf('%s_%s', $key, $element->get_element_id()); } $H[$key] = $element; } @@ -74,6 +67,23 @@ protected function rank_inner(array $stream_elements, StreamTracer $tracer = nul return array_merge($ranked_elements, $not_valid_elements); } + /** + * @param StreamElement $element Stream element to rank randomly + * @return float|int|object + */ + protected function get_element_random_score(StreamElement $element) + { + /** @var RecommendationLeafStreamElementTrait $original_element */ + $original_element = $element->get_original_element(); + $max_rand = mt_getrandmax(); + $score = $original_element->get_score(); + if ($score == 0.0) { + $score = 0.001; + } + // calculate sampling score + return pow(mt_rand() / $max_rand, (1 / $score)); + } + /** * Selects stream elements that have a score * @param StreamElement[] $stream_elements Stream elements diff --git a/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php b/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php index 9435c34..01f8372 100644 --- a/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php +++ b/tests/unit/Tumblr/StreamBuilder/StreamRankers/WeightedRandomStreamRankerTest.php @@ -112,9 +112,6 @@ public function test_ranker_with_stream_elements(): void $ranked_elements = $this->ranker->rank($stream_elements); mt_srand(0); $weighted_score = array_map(function ($score) { - if ($score === 0.0) { - return 0; - } return pow(mt_rand() / mt_getrandmax(), (1 / $score)); }, $bid2score); // sort by value in descending order