Skip to content

Commit f956339

Browse files
Merge pull request #458 from MaheshGPai/mahesh_pr
Provide const_iter implementation for tdigest to iterate the centroids
2 parents ed7aee2 + 5be04f2 commit f956339

File tree

3 files changed

+112
-2
lines changed

3 files changed

+112
-2
lines changed

tdigest/include/tdigest.hpp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,16 +257,29 @@ class tdigest {
257257
*/
258258
static tdigest deserialize(const void* bytes, size_t size, const Allocator& allocator = Allocator());
259259

260+
class const_iterator;
261+
262+
/**
263+
* Iterator pointing to the first centroid in the sketch.
264+
* If the sketch is empty, the returned iterator must not be dereferenced or incremented.
265+
* @return iterator pointing to the first centroid in the sketch
266+
*/
267+
const_iterator begin() const;
268+
269+
/**
270+
* Iterator pointing to the past-the-end centroid in the sketch.
271+
* It does not point to any centroid, and must not be dereferenced or incremented.
272+
* @return iterator pointing to the past-the-end centroid in the sketch
273+
*/
274+
const_iterator end() const;
260275
private:
261276
bool reverse_merge_;
262277
uint16_t k_;
263-
uint16_t internal_k_;
264278
T min_;
265279
T max_;
266280
size_t centroids_capacity_;
267281
vector_centroid centroids_;
268282
uint64_t centroids_weight_;
269-
size_t buffer_capacity_;
270283
vector_t buffer_;
271284

272285
static const size_t BUFFER_MULTIPLIER = 4;
@@ -297,6 +310,27 @@ class tdigest {
297310
static inline void check_split_points(const T* values, uint32_t size);
298311
};
299312

313+
template<typename T, typename A>
314+
class tdigest<T, A>::const_iterator {
315+
public:
316+
using iterator_category = std::input_iterator_tag;
317+
using value_type = std::pair<const T&, const W>;
318+
using difference_type = void;
319+
using pointer = const return_value_holder<value_type>;
320+
using reference = const value_type;
321+
322+
const_iterator& operator++();
323+
const_iterator& operator++(int);
324+
bool operator==(const const_iterator& other) const;
325+
bool operator!=(const const_iterator& other) const;
326+
reference operator*() const;
327+
pointer operator->() const;
328+
private:
329+
friend class tdigest;
330+
uint32_t index_;
331+
vector_centroid centroids_;
332+
const_iterator(const tdigest& tdigest_, bool is_end);
333+
};
300334
} /* namespace datasketches */
301335

302336
#include "tdigest_impl.hpp"

tdigest/include/tdigest_impl.hpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,65 @@ void tdigest<T, A>::check_split_points(const T* values, uint32_t size) {
627627
}
628628
}
629629

630+
template <typename T, typename A>
631+
typename tdigest<T, A>::const_iterator tdigest<T, A>::begin() const {
632+
return tdigest<T, A>::const_iterator(*this, false);
633+
}
634+
635+
template <typename T, typename A>
636+
typename tdigest<T, A>::const_iterator tdigest<T, A>::end() const {
637+
return tdigest::const_iterator(*this, true);
638+
}
639+
640+
template<typename T, typename A>
641+
tdigest<T, A>::const_iterator::const_iterator(const tdigest& tdigest_, const bool is_end):
642+
centroids_(tdigest_.get_allocator())
643+
{
644+
// Create a copy of the tdigest to generate the centroids after processing the buffered values
645+
tdigest tmp(tdigest_);
646+
tmp.compress();
647+
centroids_.insert(centroids_.end(), tmp.centroids_.begin(), tmp.centroids_.end());
648+
649+
if (is_end) {
650+
index_ = centroids_.size();
651+
} else {
652+
index_ = 0;
653+
}
654+
}
655+
656+
template<typename T, typename A>
657+
typename tdigest<T, A>::const_iterator& tdigest<T, A>::const_iterator::operator++() {
658+
++index_;
659+
return *this;
660+
}
661+
662+
template<typename T, typename A>
663+
typename tdigest<T, A>::const_iterator& tdigest<T, A>::const_iterator::operator++(int) {
664+
const_iterator tmp(*this);
665+
operator++();
666+
return tmp;
667+
}
668+
669+
template<typename T, typename A>
670+
bool tdigest<T, A>::const_iterator::operator==(const const_iterator& other) const {
671+
return index_ == other.index_;
672+
}
673+
674+
template<typename T, typename A>
675+
bool tdigest<T, A>::const_iterator::operator!=(const const_iterator& other) const {
676+
return !operator==(other);
677+
}
678+
679+
template<typename T, typename A>
680+
auto tdigest<T, A>::const_iterator::operator*() const -> reference {
681+
return value_type(centroids_[index_].get_mean(), centroids_[index_].get_weight());
682+
}
683+
684+
template<typename T, typename A>
685+
auto tdigest<T, A>::const_iterator::operator->() const -> pointer {
686+
return **this;
687+
}
688+
630689
} /* namespace datasketches */
631690

632691
#endif // _TDIGEST_IMPL_HPP_

tdigest/test/tdigest_test.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,4 +453,21 @@ TEST_CASE("deserialize from reference implementation bytes float", "[tdigest]")
453453
REQUIRE(td.get_rank(n) == 1);
454454
}
455455

456+
TEST_CASE("iterate centroids", "[tdigest]") {
457+
tdigest_double td(100);
458+
for (int i = 0; i < 10; i++) {
459+
td.update(i);
460+
}
461+
462+
auto centroid_count = 0;
463+
uint64_t total_weight = 0;
464+
for (const auto &centroid: td) {
465+
centroid_count++;
466+
total_weight += centroid.second;
467+
}
468+
// Ensure that centroids are retrieved for a case where there is buffered values
469+
REQUIRE(centroid_count == 10);
470+
REQUIRE(td.get_total_weight() == total_weight);
471+
}
472+
456473
} /* namespace datasketches */

0 commit comments

Comments
 (0)