From 041e815d8f422311e7e0b99d52d587c61c7ac02a Mon Sep 17 00:00:00 2001 From: Siddhant Deshmukh Date: Mon, 21 Apr 2025 17:01:53 -0700 Subject: [PATCH 1/2] Histogram field indexing and unit tests Signed-off-by: Siddhant Deshmukh --- .../index/mapper/HistogramFieldMapper.java | 238 ++++++++++ .../org/opensearch/indices/IndicesModule.java | 2 + .../opensearch/index/fielddata/Histogram.java | 31 ++ .../mapper/HistogramFieldMapperTests.java | 413 ++++++++++++++++++ .../index/query/HistogramFieldQueryTests.java | 258 +++++++++++ 5 files changed, 942 insertions(+) create mode 100644 server/src/main/java/org/opensearch/index/mapper/HistogramFieldMapper.java create mode 100644 server/src/test/java/org/opensearch/index/fielddata/Histogram.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/HistogramFieldMapperTests.java create mode 100644 server/src/test/java/org/opensearch/index/query/HistogramFieldQueryTests.java diff --git a/server/src/main/java/org/opensearch/index/mapper/HistogramFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/HistogramFieldMapper.java new file mode 100644 index 0000000000000..1503791c57f69 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/HistogramFieldMapper.java @@ -0,0 +1,238 @@ +package org.opensearch.index.mapper; + +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.fielddata.IndexFieldData; +import org.opensearch.index.fielddata.plain.BytesBinaryIndexFieldData; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.DocValueFormat; +import org.opensearch.search.aggregations.support.CoreValuesSourceType; +import org.opensearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.time.ZoneId; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.function.Supplier; + +public class HistogramFieldMapper extends ParametrizedFieldMapper { + public static final String CONTENT_TYPE = "histogram"; + public static final String VALUES_FIELD = "values"; + public static final String COUNTS_FIELD = "counts"; + + private static final FieldType FIELD_TYPE = new FieldType(); + static { + FIELD_TYPE.setTokenized(false); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE.setDocValuesType(DocValuesType.BINARY); + FIELD_TYPE.freeze(); + } + + public static class Builder extends ParametrizedFieldMapper.Builder { + private final Parameter docValues = Parameter.docValuesParam(m -> toType(m).hasDocValues, true); + private final Parameter indexed = Parameter.indexParam(m -> toType(m).indexed, true); + private final Parameter stored = Parameter.storeParam(m -> toType(m).stored, false); + private final Parameter> meta = Parameter.metaParam(); + + public Builder(String name) { + super(name); + } + + @Override + protected List> getParameters() { + return List.of(docValues, indexed, stored, meta); + } + + @Override + public HistogramFieldMapper build(BuilderContext context) { + return new HistogramFieldMapper( + name, + new HistogramFieldType(buildFullName(context), meta.getValue()), + multiFieldsBuilder.build(this, context), + copyTo.build(), + indexed.getValue(), + docValues.getValue(), + stored.getValue() + ); + } + } + + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n)); + + private final boolean indexed; + private final boolean hasDocValues; + private final boolean stored; + + protected HistogramFieldMapper(String simpleName, + MappedFieldType mappedFieldType, + MultiFields multiFields, + CopyTo copyTo, + boolean indexed, + boolean hasDocValues, + boolean stored) { + super(simpleName, mappedFieldType, multiFields, copyTo); + this.indexed = indexed; + this.hasDocValues = hasDocValues; + this.stored = stored; + } + + private static HistogramFieldMapper toType(FieldMapper mapper) { + return (HistogramFieldMapper) mapper; + } + + @Override + public ParametrizedFieldMapper.Builder getMergeBuilder() { + return new Builder(simpleName()).init(this); + } + + @Override + protected void parseCreateField(ParseContext context) throws IOException { + if (!hasDocValues) { + return; + } + + XContentParser parser = context.parser(); + XContentParser.Token token = parser.currentToken(); + + if (token == XContentParser.Token.VALUE_NULL) { + return; + } + + List values = new ArrayList<>(); + List counts = new ArrayList<>(); + String currentFieldName = null; + + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_ARRAY) { + if (VALUES_FIELD.equals(currentFieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token == XContentParser.Token.VALUE_NUMBER) { + values.add(parser.doubleValue()); + } else { + throw new IllegalArgumentException("values must be numbers"); + } + } + } else if (COUNTS_FIELD.equals(currentFieldName)) { + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token == XContentParser.Token.VALUE_NUMBER) { + // Check if the number is a decimal + if (parser.numberType() == XContentParser.NumberType.FLOAT + || parser.numberType() == XContentParser.NumberType.DOUBLE) { + throw new IllegalArgumentException("counts must be integers"); + } + counts.add(parser.longValue()); + } else { + throw new IllegalArgumentException("counts must be numbers"); + } + } + } + } + } + + if (values.isEmpty()) { + throw new IllegalArgumentException("values array cannot be empty"); + } + if (counts.isEmpty()) { + throw new IllegalArgumentException("counts array cannot be empty"); + } + + validateHistogramData(values, counts); + + byte[] encodedData = encodeHistogram(values, counts); + context.doc().add(new BinaryDocValuesField(fieldType().name(), new BytesRef(encodedData))); + } + + + private void validateHistogramData(List values, List counts) { + if (values.size() != counts.size()) { + throw new IllegalArgumentException("values and counts arrays must have the same length"); + } + + double previousValue = Double.NEGATIVE_INFINITY; + for (int i = 0; i < values.size(); i++) { + double value = values.get(i); + long count = counts.get(i); + + if (value <= previousValue) { + throw new IllegalArgumentException("values must be in strictly ascending order"); + } + + if (count < 0) { + throw new IllegalArgumentException("counts must be non-negative"); + } + + previousValue = value; + } + } + + + private byte[] encodeHistogram(List values, List counts) { + ByteBuffer buffer = ByteBuffer.allocate(4 + (values.size() * (Double.BYTES + Long.BYTES))); + buffer.putInt(values.size()); + + for (int i = 0; i < values.size(); i++) { + buffer.putDouble(values.get(i)); + buffer.putLong(counts.get(i)); + } + + return buffer.array(); + } + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + public static class HistogramFieldType extends MappedFieldType { + + public HistogramFieldType(String name, Map meta) { + super(name, true, false, true, TextSearchInfo.NONE, meta); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public Query termQuery(Object value, QueryShardContext context) { + throw new UnsupportedOperationException("Histogram fields do not support term queries"); + } + + @Override + public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { + return new SourceValueFetcher(name(), context, format) { + @Override + protected Object parseSourceValue(Object value) { + return value; + } + }; + } + + @Override + public DocValueFormat docValueFormat(String format, ZoneId timeZone) { + if (format != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] does not support custom formats"); + } + if (timeZone != null) { + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] does not support custom time zones"); + } + return DocValueFormat.BINARY; + } + + @Override + public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { + failIfNoDocValues(); + return new BytesBinaryIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES); + } + } +} diff --git a/server/src/main/java/org/opensearch/indices/IndicesModule.java b/server/src/main/java/org/opensearch/indices/IndicesModule.java index 2192872c6c752..f30ca0a65f9b0 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesModule.java +++ b/server/src/main/java/org/opensearch/indices/IndicesModule.java @@ -56,6 +56,7 @@ import org.opensearch.index.mapper.FieldNamesFieldMapper; import org.opensearch.index.mapper.FlatObjectFieldMapper; import org.opensearch.index.mapper.GeoPointFieldMapper; +import org.opensearch.index.mapper.HistogramFieldMapper; import org.opensearch.index.mapper.IdFieldMapper; import org.opensearch.index.mapper.IgnoredFieldMapper; import org.opensearch.index.mapper.IndexFieldMapper; @@ -180,6 +181,7 @@ public static Map getMappers(List mappe mappers.put(StarTreeMapper.CONTENT_TYPE, new StarTreeMapper.TypeParser()); mappers.put(SemanticVersionFieldMapper.CONTENT_TYPE, SemanticVersionFieldMapper.PARSER); mappers.put(ContextAwareGroupingFieldMapper.CONTENT_TYPE, ContextAwareGroupingFieldMapper.PARSER); + mappers.put(HistogramFieldMapper.CONTENT_TYPE, HistogramFieldMapper.PARSER); for (MapperPlugin mapperPlugin : mapperPlugins) { for (Map.Entry entry : mapperPlugin.getMappers().entrySet()) { diff --git a/server/src/test/java/org/opensearch/index/fielddata/Histogram.java b/server/src/test/java/org/opensearch/index/fielddata/Histogram.java new file mode 100644 index 0000000000000..ee17e17e05ccd --- /dev/null +++ b/server/src/test/java/org/opensearch/index/fielddata/Histogram.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.fielddata; + +public class Histogram { + private final double[] values; + private final long[] counts; + + public Histogram(double[] values, long[] counts) { + this.values = values; + this.counts = counts; + } + + public double[] getValues() { + return values; + } + + public long[] getCounts() { + return counts; + } + + public int getSize() { + return values.length; + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/HistogramFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/HistogramFieldMapperTests.java new file mode 100644 index 0000000000000..269b70d9137ff --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/HistogramFieldMapperTests.java @@ -0,0 +1,413 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.document.BinaryDocValuesField; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Collections; + +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.notNullValue; +import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.opensearch.geometry.utils.Geohash.stringEncode; + +/** + * Basic unit test for {@link HistogramFieldMapper}. + */ +public class HistogramFieldMapperTests extends MapperTestCase { + + @Override + protected void minimalMapping(XContentBuilder b) throws IOException { + b.field("type", "histogram"); + } + + @Override + protected void registerParameters(ParameterChecker checker) throws IOException { + checker.registerConflictCheck("doc_values", b -> b.field("doc_values", false)); + checker.registerConflictCheck("index", b -> b.field("index", false)); + checker.registerConflictCheck("store", b -> b.field("store", true)); + } + + @Override + protected void writeFieldValue(XContentBuilder builder) throws IOException { + builder.startObject() + .array("values", 0.1, 0.2, 0.3) + .array("counts", 3L, 7L, 23L) + .endObject(); + } + + public void testParserPresent() throws IOException { + MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping)); + Mapper.TypeParser parser = mapperService.mapperRegistry.getMapperParsers().get("histogram"); + assertNotNull("Histogram type parser should be registered", parser); + assertTrue("Type parser should be an instance of HistogramFieldMapper.TypeParser", + parser instanceof HistogramFieldMapper.TypeParser); + } + + public void testHistogramValue() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + XContentBuilder source = JsonXContent.contentBuilder() + .startObject() + .startObject("field") + .array("values", 0.1, 0.2, 0.3) + .array("counts", 3L, 7L, 23L) + .endObject() + .endObject(); + + ParsedDocument doc = mapper.parse(new SourceToParse("test", "1", + BytesReference.bytes(source), XContentType.JSON)); + + IndexableField field = doc.rootDoc().getField("field"); + assertThat("Histogram field should not be null", field, notNullValue()); + assertTrue("Field should be a BinaryDocValuesField", field instanceof BinaryDocValuesField); + + BytesRef bytesRef = field.binaryValue(); + ByteBuffer buffer = ByteBuffer.wrap(bytesRef.bytes, bytesRef.offset, bytesRef.length); + + int size = buffer.getInt(); + assertEquals("Wrong number of values", 3, size); + assertEquals("First value wrong", 0.1, buffer.getDouble(), 0.001); + assertEquals("First count wrong", 3L, buffer.getLong()); + assertEquals("Second value wrong", 0.2, buffer.getDouble(), 0.001); + assertEquals("Second count wrong", 7L, buffer.getLong()); + assertEquals("Third value wrong", 0.3, buffer.getDouble(), 0.001); + assertEquals("Third count wrong", 23L, buffer.getLong()); + } + + public void testBothArraysEmpty() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + MapperParsingException e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .startArray("values").endArray() + .startArray("counts").endArray() + .endObject())); + }); + + assertThat(e.getMessage(), + containsString("failed to parse field [field] of type [histogram]")); + + Throwable cause = e.getCause(); + assertNotNull("Cause should not be null", cause); + assertThat(cause.getMessage(), containsString("values array cannot be empty")); + } + + public void testEmptyValuesArray() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + MapperParsingException e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .startArray("values").endArray() + .array("counts", 1L, 2L, 3L) + .endObject())); + }); + + assertThat(e.getMessage(), + containsString("failed to parse field [field] of type [histogram]")); + + Throwable cause = e.getCause(); + assertNotNull("Cause should not be null", cause); + assertThat(cause.getMessage(), containsString("values array cannot be empty")); + } + + public void testEmptyCountsArray() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + MapperParsingException e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .array("values", 0.1, 0.2, 0.3) + .startArray("counts").endArray() + .endObject())); + }); + + assertThat(e.getMessage(), + containsString("failed to parse field [field] of type [histogram]")); + + Throwable cause = e.getCause(); + assertNotNull("Cause should not be null", cause); + assertThat(cause.getMessage(), containsString("counts array cannot be empty")); + } + + public void testMissingArrays() throws Exception { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + MapperParsingException e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .endObject())); + }); + + assertThat(e.getMessage(), + containsString("failed to parse field [field] of type [histogram]")); + + Throwable cause = e.getCause(); + assertNotNull("Cause should not be null", cause); + assertThat(cause.getMessage(), containsString("values array cannot be empty")); + } + + public void testMultipleHistogramFields() throws IOException { + // Test document with multiple histogram fields + XContentBuilder mapping = XContentBuilder.builder(JsonXContent.jsonXContent) + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("histogram1") + .field("type", "histogram") + .endObject() + .startObject("histogram2") + .field("type", "histogram") + .endObject() + .endObject() + .endObject() + .endObject(); + + DocumentMapper mapper = createDocumentMapper(mapping); + + ParsedDocument doc = mapper.parse(source(b -> b + .startObject("histogram1") + .array("values", 0.1, 0.2, 0.3) + .array("counts", 3, 7, 5) + .endObject() + .startObject("histogram2") + .array("values", 1.0, 2.0, 3.0) + .array("counts", 10, 20, 30) + .endObject())); + + assertNotNull(doc.rootDoc().getField("histogram1")); + assertNotNull(doc.rootDoc().getField("histogram2")); + } + + public void testHistogramWithLargeValues() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + ParsedDocument doc = mapper.parse(source(b -> b.startObject("field") + .array("values", 1000.0, 2000.0, 3000.0) + .array("counts", 1000000L, 2000000L, 3000000L) + .endObject())); + + IndexableField field = doc.rootDoc().getField("field"); + assertNotNull(field); + + BytesRef bytesRef = field.binaryValue(); + ByteBuffer buffer = ByteBuffer.wrap(bytesRef.bytes, bytesRef.offset, bytesRef.length); + + assertEquals(3, buffer.getInt()); + assertEquals(1000.0, buffer.getDouble(), 0.001); + assertEquals(1000000L, buffer.getLong()); + assertEquals(2000.0, buffer.getDouble(), 0.001); + assertEquals(2000000L, buffer.getLong()); + assertEquals(3000.0, buffer.getDouble(), 0.001); + assertEquals(3000000L, buffer.getLong()); + } + + public void testHistogramWithSingleValue() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + ParsedDocument doc = mapper.parse(source(b -> b.startObject("field") + .array("values", 1.0) + .array("counts", 100L) + .endObject())); + + IndexableField field = doc.rootDoc().getField("field"); + assertNotNull(field); + + BytesRef bytesRef = field.binaryValue(); + ByteBuffer buffer = ByteBuffer.wrap(bytesRef.bytes, bytesRef.offset, bytesRef.length); + + assertEquals(1, buffer.getInt()); + assertEquals(1.0, buffer.getDouble(), 0.001); + assertEquals(100L, buffer.getLong()); + } + + public void testHistogramUpdate() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + // First document + ParsedDocument doc1 = mapper.parse(source(b -> b.startObject("field") + .array("values", 0.1, 0.2, 0.3) + .array("counts", 3, 7, 5) + .endObject())); + + // Updated document + ParsedDocument doc2 = mapper.parse(source(b -> b.startObject("field") + .array("values", 0.1, 0.2, 0.3) + .array("counts", 4, 8, 6) + .endObject())); + + assertNotNull(doc1.rootDoc().getField("field")); + assertNotNull(doc2.rootDoc().getField("field")); + assertNotEquals( + doc1.rootDoc().getField("field").binaryValue(), + doc2.rootDoc().getField("field").binaryValue() + ); + } + + public void testHistogramWithVariousInvalidCounts() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + // Test decimal numbers + MapperParsingException e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .array("values", 0.1, 0.2, 0.3) + .startArray("counts") + .value(3.5d) + .endArray() + .endObject())); + }); + assertThat(e.getMessage(), containsString("failed to parse field [field] of type [histogram]")); + assertThat(e.getCause().getMessage(), containsString("counts must be integers")); + + // Test non-numbers + e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .array("values", 0.1, 0.2, 0.3) + .startArray("counts") + .value("not a number") + .endArray() + .endObject())); + }); + assertThat(e.getMessage(), containsString("failed to parse field [field] of type [histogram]")); + assertThat(e.getCause().getMessage(), containsString("counts must be numbers")); + + // Test negative numbers + e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .array("values", 0.1, 0.2, 0.3) + .startArray("counts") + .value(-1) + .value(-2) + .value(-3) + .endArray() + .endObject())); + }); + assertThat(e.getMessage(), containsString("failed to parse field [field] of type [histogram]")); + assertThat(e.getCause().getMessage(), containsString("counts must be non-negative")); + } + + public void testHistogramWithVariousInvalidValues() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + // Test non-numeric values + MapperParsingException e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .startArray("values") + .value("not a number") + .endArray() + .array("counts", 1) + .endObject())); + }); + assertThat(e.getMessage(), containsString("failed to parse field [field] of type [histogram]")); + assertThat(e.getCause().getMessage(), containsString("values must be numbers")); + + // Test infinite values + e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .startArray("values") + .value(Double.POSITIVE_INFINITY) + .value(0.2) + .value(0.3) + .endArray() + .array("counts", 1, 2, 3) + .endObject())); + }); + assertThat(e.getMessage(), containsString("failed to parse field [field] of type [histogram]")); + assertThat(e.getCause().getMessage(), containsString("values must be numbers")); + + // Test NaN values + e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .startArray("values") + .value(Double.NaN) + .value(0.2) + .value(0.3) + .endArray() + .array("counts", 1, 2, 3) + .endObject())); + }); + assertThat(e.getMessage(), containsString("failed to parse field [field] of type [histogram]")); + assertThat(e.getCause().getMessage(), containsString("values must be numbers")); + + // Test non-ascending values + e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .array("values", 0.3, 0.2, 0.1) // Descending order + .array("counts", 1, 2, 3) + .endObject())); + }); + assertThat(e.getMessage(), containsString("failed to parse field [field] of type [histogram]")); + assertThat(e.getCause().getMessage(), containsString("values must be in strictly ascending order")); + } + + public void testHistogramArraysNotSameLength() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + + // Test when counts array is shorter than values + MapperParsingException e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .array("values", 0.1, 0.2, 0.3) + .array("counts", 1, 2) // One count missing + .endObject())); + }); + assertThat(e.getMessage(), containsString("failed to parse field [field] of type [histogram]")); + assertThat(e.getCause().getMessage(), containsString("values and counts arrays must have the same length")); + + // Test when counts array is longer than values + e = expectThrows(MapperParsingException.class, () -> { + mapper.parse(source(b -> b.startObject("field") + .array("values", 0.1, 0.2) + .array("counts", 1, 2, 3) // Extra count + .endObject())); + }); + assertThat(e.getMessage(), containsString("failed to parse field [field] of type [histogram]")); + assertThat(e.getCause().getMessage(), containsString("values and counts arrays must have the same length")); + + // Test valid case with same length + ParsedDocument doc = mapper.parse(source(b -> b.startObject("field") + .array("values", 0.1, 0.2, 0.3) + .array("counts", 1, 2, 3) + .endObject())); + assertNotNull(doc.rootDoc().getField("field")); + } + + public void testHistogramFieldSerialization() throws IOException { + XContentBuilder mapping = XContentBuilder.builder(JsonXContent.jsonXContent) + .startObject() + .startObject("_doc") + .startObject("properties") + .startObject("histogram_field") + .field("type", "histogram") + .endObject() + .endObject() + .endObject() + .endObject(); + + DocumentMapper mapper = createDocumentMapper(mapping); + assertNotNull(mapper); + + Mapper fieldMapper = mapper.mappers().getMapper("histogram_field"); + assertNotNull("Histogram field mapper should not be null", fieldMapper); + + XContentBuilder builder = jsonBuilder().startObject(); + fieldMapper.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + + assertEquals("{\"histogram_field\":{\"type\":\"histogram\"}}", builder.toString()); + } +} diff --git a/server/src/test/java/org/opensearch/index/query/HistogramFieldQueryTests.java b/server/src/test/java/org/opensearch/index/query/HistogramFieldQueryTests.java new file mode 100644 index 0000000000000..0512c63726137 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/query/HistogramFieldQueryTests.java @@ -0,0 +1,258 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.opensearch.action.search.SearchResponse; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.search.aggregations.bucket.histogram.Histogram; +import org.opensearch.search.aggregations.metrics.Max; +import org.opensearch.search.aggregations.metrics.Min; +import org.opensearch.search.aggregations.metrics.Percentiles; +import org.opensearch.search.aggregations.metrics.Sum; +import org.opensearch.search.aggregations.metrics.ValueCount; +import org.opensearch.test.OpenSearchSingleNodeTestCase; +import org.opensearch.action.support.WriteRequest.RefreshPolicy; + +import java.util.List; + +import static org.opensearch.search.aggregations.AggregationBuilders.min; +import static org.opensearch.search.aggregations.AggregationBuilders.max; +import static org.opensearch.search.aggregations.AggregationBuilders.sum; +import static org.opensearch.search.aggregations.AggregationBuilders.count; +import static org.opensearch.search.aggregations.AggregationBuilders.percentiles; +import static org.opensearch.search.aggregations.AggregationBuilders.histogram; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertSearchResponse; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.notNullValue; + +public class HistogramFieldQueryTests extends OpenSearchSingleNodeTestCase { + + private static final String defaultHistogramField = "histogram_field_name"; + private static final String defaultIndexName = "test"; + + protected XContentBuilder createMapping() throws Exception { + return XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject(defaultHistogramField) + .field("type", "histogram") + .endObject() + .endObject() + .endObject(); + } + + public void testExistsQuery() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + // Index document with histogram + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) + .endObject() + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + // Index document without histogram + client().prepareIndex(defaultIndexName) + .setId("2") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("other_field", "value") + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .setQuery(QueryBuilders.existsQuery(defaultHistogramField)) + .get(); + + assertSearchResponse(response); + assertHitCount(response, 1); + assertThat(response.getHits().getAt(0).getId(), equalTo("1")); + } + + public void testMinAggregation() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) + .endObject() + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .addAggregation(min("min_value").field(defaultHistogramField)) + .get(); + + assertSearchResponse(response); + Min min = response.getAggregations().get("min_value"); + assertThat(min.getValue(), equalTo(0.1)); + } + + public void testMaxAggregation() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) + .endObject() + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .addAggregation(max("max_value").field(defaultHistogramField)) + .get(); + + assertSearchResponse(response); + Max max = response.getAggregations().get("max_value"); + assertThat(max.getValue(), equalTo(0.3)); + } + + public void testSumAggregation() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) + .endObject() + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .addAggregation(sum("sum_value").field(defaultHistogramField)) + .get(); + + assertSearchResponse(response); + Sum sum = response.getAggregations().get("sum_value"); + // Expected sum: (0.1 * 3) + (0.2 * 7) + (0.3 * 23) = 8.0 + assertThat(sum.getValue(), equalTo(8.0)); + } + + public void testValueCountAggregation() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) + .endObject() + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .addAggregation(count("value_count").field(defaultHistogramField)) + .get(); + + assertSearchResponse(response); + ValueCount valueCount = response.getAggregations().get("value_count"); + assertThat(valueCount.getValue(), equalTo(33L)); // Sum of counts: 3 + 7 + 23 + } + + public void testPercentilesAggregation() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) + .endObject() + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .addAggregation( + percentiles("percentiles") + .field(defaultHistogramField) + .percentiles(50.0, 95.0) + ) + .get(); + + assertSearchResponse(response); + Percentiles percentiles = response.getAggregations().get("percentiles"); + assertThat(percentiles, notNullValue()); + assertThat(percentiles.percentile(50.0), greaterThan(0.0)); + assertThat(percentiles.percentile(95.0), greaterThan(0.0)); + } + + public void testHistogramAggregation() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3, 0.4, 0.5}) + .array("counts", new long[]{3, 7, 23, 12, 6}) + .endObject() + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .addAggregation( + histogram("histogram_agg") + .field(defaultHistogramField) + .interval(0.1) + ) + .get(); + + assertSearchResponse(response); + Histogram histogram = response.getAggregations().get("histogram_agg"); + assertThat(histogram, notNullValue()); + List buckets = histogram.getBuckets(); + assertThat(buckets.size(), greaterThan(0)); + } +} From 2079fb11c273085c78b80adb819383ba83d8c43a Mon Sep 17 00:00:00 2001 From: Siddhant Deshmukh Date: Mon, 5 Jan 2026 15:21:55 -0800 Subject: [PATCH 2/2] Histogram field aggregations Signed-off-by: Siddhant Deshmukh --- .../fielddata/HistogramIndexFieldData.java | 62 ++++++ .../fielddata/HistogramLeafFieldData.java | 76 +++++++ .../index/fielddata/HistogramValues.java | 4 +- .../fielddata/HistogramValuesSource.java | 134 ++++++++++++ .../index/mapper/HistogramFieldMapper.java | 10 +- .../indices}/fielddata/Histogram.java | 2 +- .../indices/fielddata/HistogramValues.java | 17 ++ .../AbstractHistogramAggregator.java | 1 + .../histogram/HistogramAggregatorFactory.java | 2 +- .../bucket/histogram/InternalHistogram.java | 1 + .../histogram/NumericHistogramAggregator.java | 6 +- .../histogram/RangeHistogramAggregator.java | 6 +- .../range/AbstractRangeAggregatorFactory.java | 13 +- .../bucket/range/RangeAggregator.java | 94 ++++++--- .../bucket/range/RangeAggregatorFactory.java | 6 + .../AbstractHDRPercentilesAggregator.java | 59 ++++-- .../AbstractInternalTDigestPercentiles.java | 1 + .../AbstractTDigestPercentilesAggregator.java | 5 +- .../aggregations/metrics/AvgAggregator.java | 76 ++++++- .../metrics/AvgAggregatorFactory.java | 2 +- .../metrics/ExtendedStatsAggregator.java | 28 ++- .../ExtendedStatsAggregatorFactory.java | 2 +- .../aggregations/metrics/MaxAggregator.java | 59 +++++- .../metrics/MaxAggregatorFactory.java | 2 +- .../aggregations/metrics/MinAggregator.java | 72 ++++++- .../metrics/MinAggregatorFactory.java | 2 +- .../PercentileRanksAggregatorFactory.java | 2 +- .../metrics/PercentilesAggregatorFactory.java | 2 +- .../aggregations/metrics/StatsAggregator.java | 40 +++- .../metrics/StatsAggregatorFactory.java | 2 +- .../aggregations/metrics/SumAggregator.java | 70 ++++++- .../metrics/SumAggregatorFactory.java | 2 +- .../metrics/ValueCountAggregator.java | 38 +++- .../support/CoreValuesSourceType.java | 29 +++ .../support/HistogramValuesSourceType.java | 47 +++++ .../support/ValuesSourceConfig.java | 15 ++ .../index/query/HistogramFieldQueryTests.java | 193 +++++++++++++++++- 37 files changed, 1089 insertions(+), 93 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/fielddata/HistogramIndexFieldData.java create mode 100644 server/src/main/java/org/opensearch/index/fielddata/HistogramLeafFieldData.java create mode 100644 server/src/main/java/org/opensearch/index/fielddata/HistogramValuesSource.java rename server/src/{test/java/org/opensearch/index => main/java/org/opensearch/indices}/fielddata/Histogram.java (93%) create mode 100644 server/src/main/java/org/opensearch/indices/fielddata/HistogramValues.java create mode 100644 server/src/main/java/org/opensearch/search/aggregations/support/HistogramValuesSourceType.java diff --git a/server/src/main/java/org/opensearch/index/fielddata/HistogramIndexFieldData.java b/server/src/main/java/org/opensearch/index/fielddata/HistogramIndexFieldData.java new file mode 100644 index 0000000000000..caf3f707cac76 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/fielddata/HistogramIndexFieldData.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.index.fielddata; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.SortField; +import org.opensearch.common.util.BigArrays; +import org.opensearch.search.DocValueFormat; +import org.opensearch.search.MultiValueMode; +import org.opensearch.search.aggregations.support.CoreValuesSourceType; +import org.opensearch.search.aggregations.support.ValuesSourceType; +import org.opensearch.search.sort.BucketedSort; +import org.opensearch.search.sort.SortOrder; + +public class HistogramIndexFieldData implements IndexFieldData { + private final String fieldName; + + public HistogramIndexFieldData(String fieldName) { + this.fieldName = fieldName; + } + + @Override + public String getFieldName() { + return fieldName; + } + + @Override + public ValuesSourceType getValuesSourceType() { + return CoreValuesSourceType.HISTOGRAM; + } + + @Override + public HistogramLeafFieldData load(LeafReaderContext context) { + return new HistogramLeafFieldData(context.reader(), fieldName); + } + + @Override + public HistogramLeafFieldData loadDirect(LeafReaderContext context) throws Exception { + return load(context); + } + + @Override + public SortField wideSortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) { + return IndexFieldData.super.wideSortField(missingValue, sortMode, nested, reverse); + } + + @Override + public BucketedSort newBucketedSort(BigArrays bigArrays, Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, SortOrder sortOrder, DocValueFormat format, int bucketSize, BucketedSort.ExtraData extra) { + return null; + } + + @Override + public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) { + throw new UnsupportedOperationException("Histogram fields do not support sorting"); + } +} + diff --git a/server/src/main/java/org/opensearch/index/fielddata/HistogramLeafFieldData.java b/server/src/main/java/org/opensearch/index/fielddata/HistogramLeafFieldData.java new file mode 100644 index 0000000000000..55c3d76eab913 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/fielddata/HistogramLeafFieldData.java @@ -0,0 +1,76 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.fielddata; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.util.BytesRef; +import org.opensearch.indices.fielddata.Histogram; + +import java.io.IOException; +import java.nio.ByteBuffer; + +public class HistogramLeafFieldData implements LeafFieldData { + private final LeafReader reader; + private final String fieldName; + + public HistogramLeafFieldData(LeafReader reader, String fieldName) { + this.reader = reader; + this.fieldName = fieldName; + } + + public HistogramValues getHistogramValues() throws IOException { + final BinaryDocValues values = DocValues.getBinary(reader, fieldName); + return new HistogramValues() { + @Override + public boolean advanceExact(int doc) throws IOException { + return values.advanceExact(doc); + } + + @Override + public Histogram histogram() throws IOException { + BytesRef bytesRef = values.binaryValue(); + return decodeHistogram(bytesRef); + } + }; + } + + private Histogram decodeHistogram(BytesRef bytesRef) { + ByteBuffer buffer = ByteBuffer.wrap(bytesRef.bytes, bytesRef.offset, bytesRef.length); + int size = buffer.getInt(); + double[] values = new double[size]; + long[] counts = new long[size]; + + for (int i = 0; i < size; i++) { + values[i] = buffer.getDouble(); + counts[i] = buffer.getLong(); + } + + return new Histogram(values, counts); + } + + @Override + public void close() {} + + @Override + public ScriptDocValues getScriptValues() { + return null; + } + + @Override + public SortedBinaryDocValues getBytesValues() { + return null; + } + + @Override + public long ramBytesUsed() { + return 0; + } +} diff --git a/server/src/main/java/org/opensearch/index/fielddata/HistogramValues.java b/server/src/main/java/org/opensearch/index/fielddata/HistogramValues.java index 001d9fdb7b8eb..28c5c469eb121 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/HistogramValues.java +++ b/server/src/main/java/org/opensearch/index/fielddata/HistogramValues.java @@ -32,6 +32,8 @@ package org.opensearch.index.fielddata; +import org.opensearch.indices.fielddata.Histogram; + import java.io.IOException; /** @@ -51,6 +53,6 @@ public abstract class HistogramValues { * Get the {@link HistogramValue} associated with the current document. * The returned {@link HistogramValue} might be reused across calls. */ - public abstract HistogramValue histogram() throws IOException; + public abstract Histogram histogram() throws IOException; } diff --git a/server/src/main/java/org/opensearch/index/fielddata/HistogramValuesSource.java b/server/src/main/java/org/opensearch/index/fielddata/HistogramValuesSource.java new file mode 100644 index 0000000000000..a4455718bbb50 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/fielddata/HistogramValuesSource.java @@ -0,0 +1,134 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.fielddata; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.util.BytesRef; +import org.opensearch.indices.fielddata.Histogram; +import org.opensearch.search.aggregations.support.ValuesSource; + +import java.io.IOException; +import java.nio.ByteBuffer; + +public class HistogramValuesSource extends ValuesSource.Numeric { + private final HistogramIndexFieldData indexFieldData; + + public HistogramValuesSource(HistogramIndexFieldData indexFieldData) { + this.indexFieldData = indexFieldData; + } + + public HistogramIndexFieldData getHistogramFieldData() { + return indexFieldData; + } + + @Override + public boolean isFloatingPoint() { + return true; + } + + @Override + public boolean isBigInteger() { + return false; + } + + @Override + public SortedNumericDocValues longValues(LeafReaderContext context) throws IOException { + throw new UnsupportedOperationException("Histogram fields only support double values"); + } + + @Override + public SortedNumericDoubleValues doubleValues(LeafReaderContext context) throws IOException { + final HistogramLeafFieldData leafFieldData = indexFieldData.load(context); + final HistogramValues histogramValues = leafFieldData.getHistogramValues(); + + return new SortedNumericDoubleValues() { + private double[] currentValues; + private int currentValueIndex; + + @Override + public boolean advanceExact(int doc) throws IOException { + if (histogramValues.advanceExact(doc)) { + Histogram histogram = histogramValues.histogram(); + currentValues = histogram.getValues(); + currentValueIndex = 0; + return currentValues.length > 0; + } + currentValues = null; + return false; + } + + @Override + public double nextValue() throws IOException { + if (currentValues == null || currentValueIndex >= currentValues.length) { + throw new IllegalStateException("Cannot call nextValue() when there are no more values"); + } + return currentValues[currentValueIndex++]; + } + + @Override + public int docValueCount() { + return currentValues == null ? 0 : currentValues.length; + } + }; + } + + /** + * Returns the counts values for the histogram buckets as doubles. + * These represent the frequency/count per bucket. + */ + public SortedNumericDoubleValues getCounts(LeafReaderContext context) throws IOException { + final HistogramLeafFieldData leafFieldData = indexFieldData.load(context); + final HistogramValues histogramValues = leafFieldData.getHistogramValues(); + + return new SortedNumericDoubleValues() { + private long[] currentCounts; + private int currentIndex; + + @Override + public boolean advanceExact(int doc) throws IOException { + if (histogramValues.advanceExact(doc)) { + Histogram histogram = histogramValues.histogram(); + currentCounts = histogram.getCounts(); + currentIndex = 0; + return currentCounts.length > 0; + } + currentCounts = null; + return false; + } + + @Override + public double nextValue() throws IOException { + if (currentCounts == null || currentIndex >= currentCounts.length) { + throw new IllegalStateException("Cannot call nextValue() when there are no more count values"); + } + return (double) currentCounts[currentIndex++]; + } + + @Override + public int docValueCount() { + return currentCounts == null ? 0 : currentCounts.length; + } + }; + } + + private double decodeMin(BytesRef bytesRef) { + ByteBuffer buffer = ByteBuffer.wrap(bytesRef.bytes, bytesRef.offset, bytesRef.length); + int size = buffer.getInt(); + if (size > 0) { + return buffer.getDouble(); + } + return Double.NaN; + } + + @Override + public SortedBinaryDocValues bytesValues(LeafReaderContext context) throws IOException { + return null; + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/HistogramFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/HistogramFieldMapper.java index 1503791c57f69..2c14fb16f6d13 100644 --- a/server/src/main/java/org/opensearch/index/mapper/HistogramFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/HistogramFieldMapper.java @@ -6,8 +6,11 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; +import org.opensearch.core.indices.breaker.CircuitBreakerService; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.fielddata.HistogramIndexFieldData; import org.opensearch.index.fielddata.IndexFieldData; +import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.fielddata.plain.BytesBinaryIndexFieldData; import org.opensearch.index.query.QueryShardContext; import org.opensearch.search.DocValueFormat; @@ -232,7 +235,12 @@ public DocValueFormat docValueFormat(String format, ZoneId timeZone) { @Override public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { failIfNoDocValues(); - return new BytesBinaryIndexFieldData.Builder(name(), CoreValuesSourceType.BYTES); + return new IndexFieldData.Builder() { + @Override + public IndexFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) { + return new HistogramIndexFieldData(name()); + } + }; } } } diff --git a/server/src/test/java/org/opensearch/index/fielddata/Histogram.java b/server/src/main/java/org/opensearch/indices/fielddata/Histogram.java similarity index 93% rename from server/src/test/java/org/opensearch/index/fielddata/Histogram.java rename to server/src/main/java/org/opensearch/indices/fielddata/Histogram.java index ee17e17e05ccd..ec4f756660582 100644 --- a/server/src/test/java/org/opensearch/index/fielddata/Histogram.java +++ b/server/src/main/java/org/opensearch/indices/fielddata/Histogram.java @@ -6,7 +6,7 @@ * compatible open source license. */ -package org.opensearch.index.fielddata; +package org.opensearch.indices.fielddata; public class Histogram { private final double[] values; diff --git a/server/src/main/java/org/opensearch/indices/fielddata/HistogramValues.java b/server/src/main/java/org/opensearch/indices/fielddata/HistogramValues.java new file mode 100644 index 0000000000000..3340eb3dc062a --- /dev/null +++ b/server/src/main/java/org/opensearch/indices/fielddata/HistogramValues.java @@ -0,0 +1,17 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.indices.fielddata; + +import java.io.IOException; + +public interface HistogramValues { + boolean advanceExact(int doc) throws IOException; + Histogram histogram() throws IOException; +} + diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java index f41ef17330212..8c51966a7690f 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java @@ -34,6 +34,7 @@ import org.apache.lucene.util.CollectionUtil; import org.opensearch.common.lease.Releasables; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java index 7506dcde23641..493d0b1cb3acd 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/HistogramAggregatorFactory.java @@ -68,7 +68,7 @@ static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( HistogramAggregationBuilder.REGISTRY_KEY, - List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), + List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.HISTOGRAM), NumericHistogramAggregator::new, true ); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/InternalHistogram.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/InternalHistogram.java index a988b911de5a3..e2816c32fa578 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/InternalHistogram.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/InternalHistogram.java @@ -36,6 +36,7 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregations; import org.opensearch.search.aggregations.BucketOrder; diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/NumericHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/NumericHistogramAggregator.java index a8b4e2fea40ff..05e1bcfa488b6 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/NumericHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/NumericHistogramAggregator.java @@ -35,7 +35,9 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.DocIdStream; import org.apache.lucene.search.ScoreMode; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.SortedNumericDoubleValues; +import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; import org.opensearch.search.aggregations.BucketOrder; @@ -86,7 +88,9 @@ public NumericHistogramAggregator( minDocCount, extendedBounds, hardBounds, - valuesSourceConfig.format(), + valuesSourceConfig.getValuesSource() instanceof HistogramValuesSource + ? DocValueFormat.RAW + : valuesSourceConfig.format(), context, parent, cardinalityUpperBound, diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/RangeHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/RangeHistogramAggregator.java index 1390f51297dbf..8e882dba6ffb1 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/RangeHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/RangeHistogramAggregator.java @@ -35,9 +35,11 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.DocIdStream; import org.apache.lucene.util.BytesRef; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.SortedBinaryDocValues; import org.opensearch.index.mapper.RangeFieldMapper; import org.opensearch.index.mapper.RangeType; +import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; import org.opensearch.search.aggregations.BucketOrder; @@ -86,7 +88,9 @@ public RangeHistogramAggregator( minDocCount, extendedBounds, hardBounds, - valuesSourceConfig.format(), + valuesSourceConfig.getValuesSource() instanceof HistogramValuesSource + ? DocValueFormat.RAW + : valuesSourceConfig.format(), context, parent, cardinality, diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java index fd334638a0c1f..a4e948a342ac2 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/AbstractRangeAggregatorFactory.java @@ -32,7 +32,9 @@ package org.opensearch.search.aggregations.bucket.range; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; import org.opensearch.search.aggregations.AggregatorFactory; @@ -68,7 +70,7 @@ public static void registerAggregators( ) { builder.register( registryKey, - List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), + List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.HISTOGRAM), RangeAggregator::new, true ); @@ -106,13 +108,20 @@ protected Aggregator doCreateInternal( Map metadata ) throws IOException { + DocValueFormat format; + if (config.getValuesSource() instanceof HistogramValuesSource) { + format = DocValueFormat.RAW; + } else { + format = config.format(); + } + return queryShardContext.getValuesSourceRegistry() .getAggregator(registryKey, config) .build( name, factories, (Numeric) config.getValuesSource(), - config.format(), + format, rangeFactory, ranges, keyed, diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index aad698a7a2ba8..961996ab63503 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -51,8 +51,13 @@ import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; +import org.opensearch.index.fielddata.HistogramIndexFieldData; +import org.opensearch.index.fielddata.HistogramLeafFieldData; +import org.opensearch.index.fielddata.HistogramValues; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.SortedNumericDoubleValues; import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.indices.fielddata.Histogram; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; @@ -346,39 +351,70 @@ protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { - final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); - return new LeafBucketCollectorBase(sub, values) { - @Override - public void collect(int doc, long bucket) throws IOException { - if (values.advanceExact(doc)) { - final int valuesCount = values.docValueCount(); - for (int i = 0, lo = 0; i < valuesCount; ++i) { - final double value = values.nextValue(); - lo = collect(doc, value, bucket, lo); + if (valuesSource == null) { + return LeafBucketCollector.NO_OP_COLLECTOR; + } + + // Check if we're dealing with histogram values + if (valuesSource instanceof HistogramValuesSource) { + final HistogramValues histogramValues = ((HistogramValuesSource) valuesSource).getHistogramFieldData() + .load(ctx) + .getHistogramValues(); + + return new LeafBucketCollectorBase(sub, null) { + @Override + public void collect(int doc, long owningBucketOrd) throws IOException { + if (histogramValues.advanceExact(doc)) { + Histogram histogram = histogramValues.histogram(); + double[] values = histogram.getValues(); + long[] counts = histogram.getCounts(); + + for (int i = 0; i < values.length; i++) { + double value = values[i]; + long count = counts[i]; + + // Use existing MatchedRange logic + MatchedRange matchedRange = new MatchedRange(ranges, 0, value, maxTo); + + for (int j = matchedRange.startLo; j <= matchedRange.endHi; ++j) { + if (ranges[j].matches(value)) { + long bucketOrd = subBucketOrdinal(owningBucketOrd, j); + collectBucket(sub, doc, bucketOrd); + + // If we need to count more than once for the histogram counts + for (long k = 0; k < count; k++) { + collectExistingBucket(sub, doc, bucketOrd); + } + } + } + } } } - } - - @Override - public void collect(DocIdStream stream, long owningBucketOrd) throws IOException { - super.collect(stream, owningBucketOrd); - } - - @Override - public void collectRange(int min, int max) throws IOException { - super.collectRange(min, max); - } - - private int collect(int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { - MatchedRange range = new MatchedRange(ranges, lowBound, value, maxTo); - for (int i = range.startLo; i <= range.endHi; ++i) { - if (ranges[i].matches(value)) { - collectBucket(sub, doc, subBucketOrdinal(owningBucketOrdinal, i)); + }; + } else { + final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + if (values.advanceExact(doc)) { + final int valuesCount = values.docValueCount(); + for (int i = 0, lo = 0; i < valuesCount; ++i) { + final double value = values.nextValue(); + lo = collect(doc, value, bucket, lo); + } } } - return range.endHi + 1; - } - }; + private int collect(int doc, double value, long owningBucketOrdinal, int lowBound) throws IOException { + MatchedRange range = new MatchedRange(ranges, lowBound, value, maxTo); + for (int i = range.startLo; i <= range.endHi; ++i) { + if (ranges[i].matches(value)) { + collectBucket(sub, doc, subBucketOrdinal(owningBucketOrdinal, i)); + } + } + return range.endHi + 1; + } + }; + } } private void preComputeWithStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo starTree) throws IOException { diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorFactory.java index c58b2e881803c..7c910237ccc12 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorFactory.java @@ -32,12 +32,18 @@ package org.opensearch.search.aggregations.bucket.range; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.query.QueryShardContext; +import org.opensearch.search.DocValueFormat; +import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; import org.opensearch.search.aggregations.AggregatorFactory; +import org.opensearch.search.aggregations.CardinalityUpperBound; import org.opensearch.search.aggregations.bucket.range.InternalRange.Factory; import org.opensearch.search.aggregations.bucket.range.RangeAggregator.Range; +import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; +import org.opensearch.search.internal.SearchContext; import java.io.IOException; import java.util.Map; diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java index e2edfd32f7ff8..86eb556882754 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractHDRPercentilesAggregator.java @@ -38,7 +38,12 @@ import org.opensearch.common.util.ArrayUtils; import org.opensearch.common.util.BigArrays; import org.opensearch.common.util.ObjectArray; +import org.opensearch.index.fielddata.HistogramIndexFieldData; +import org.opensearch.index.fielddata.HistogramLeafFieldData; +import org.opensearch.index.fielddata.HistogramValues; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.SortedNumericDoubleValues; +import org.opensearch.indices.fielddata.Histogram; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.LeafBucketCollector; @@ -83,7 +88,9 @@ private static int indexOfKey(double[] keys, double key) { super(name, context, parent, metadata); this.valuesSource = valuesSource; this.keyed = keyed; - this.format = formatter; + this.format = valuesSource instanceof HistogramValuesSource + ? DocValueFormat.RAW + : formatter; this.states = context.bigArrays().newObjectArray(1); this.keys = keys; this.numberOfSignificantValueDigits = numberOfSignificantValueDigits; @@ -101,22 +108,48 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc } final BigArrays bigArrays = context.bigArrays(); - final SortedNumericDoubleValues values = ((ValuesSource.Numeric) valuesSource).doubleValues(ctx); - return new LeafBucketCollectorBase(sub, values) { - @Override - public void collect(int doc, long bucket) throws IOException { - DoubleHistogram state = getExistingOrNewHistogram(bigArrays, bucket); - if (values.advanceExact(doc)) { - final int valueCount = values.docValueCount(); - for (int i = 0; i < valueCount; i++) { - state.recordValue(values.nextValue()); + // Check if we're dealing with histogram values + if (valuesSource instanceof HistogramValuesSource) { + HistogramIndexFieldData indexFieldData = ((HistogramValuesSource) valuesSource).getHistogramFieldData(); + HistogramLeafFieldData leafFieldData = indexFieldData.load(ctx); + HistogramValues histogramValues = leafFieldData.getHistogramValues(); + + return new LeafBucketCollectorBase(sub, null) { + @Override + public void collect(int doc, long bucket) throws IOException { + DoubleHistogram state = getExistingOrNewHistogram(bigArrays, bucket); + + if (histogramValues.advanceExact(doc)) { + Histogram histogram = histogramValues.histogram(); + double[] histValues = histogram.getValues(); + long[] counts = histogram.getCounts(); + + // Record each value with its count using recordValueWithCount + for (int i = 0; i < histValues.length; i++) { + state.recordValueWithCount(histValues[i], counts[i]); + } } } - } - }; - + }; + } else { + // Original logic for non-histogram fields + final SortedNumericDoubleValues values = ((ValuesSource.Numeric) valuesSource).doubleValues(ctx); + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + DoubleHistogram state = getExistingOrNewHistogram(bigArrays, bucket); + if (values.advanceExact(doc)) { + final int valueCount = values.docValueCount(); + for (int i = 0; i < valueCount; i++) { + state.recordValue(values.nextValue()); + } + } + } + }; + } } + private DoubleHistogram getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) { states = bigArrays.grow(states, bucket + 1); DoubleHistogram state = states.get(bucket); diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractInternalTDigestPercentiles.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractInternalTDigestPercentiles.java index 398d0054403ac..6372cb7701fdd 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractInternalTDigestPercentiles.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractInternalTDigestPercentiles.java @@ -35,6 +35,7 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.InternalAggregation; diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java index 8c79a80a26b9e..6c5b9d55505e0 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/AbstractTDigestPercentilesAggregator.java @@ -38,6 +38,7 @@ import org.opensearch.common.util.ArrayUtils; import org.opensearch.common.util.BigArrays; import org.opensearch.common.util.ObjectArray; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.SortedNumericDoubleValues; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; @@ -81,7 +82,9 @@ private static int indexOfKey(double[] keys, double key) { super(name, context, parent, metadata); this.valuesSource = valuesSource; this.keyed = keyed; - this.formatter = formatter; + this.formatter = valuesSource instanceof HistogramValuesSource + ? DocValueFormat.RAW + : formatter; this.states = context.bigArrays().newObjectArray(1); this.keys = keys; this.compression = compression; diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java index 1031a0370e57d..b78f19ec790c7 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java @@ -46,7 +46,12 @@ import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; +import org.opensearch.index.fielddata.HistogramIndexFieldData; +import org.opensearch.index.fielddata.HistogramLeafFieldData; +import org.opensearch.index.fielddata.HistogramValues; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.SortedNumericDoubleValues; +import org.opensearch.indices.fielddata.Histogram; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.InternalAggregation; @@ -89,7 +94,9 @@ class AvgAggregator extends NumericMetricsAggregator.SingleValue implements Star super(name, context, parent, metadata); // TODO Stop expecting nulls here this.valuesSource = valuesSourceConfig.hasValues() ? (ValuesSource.Numeric) valuesSourceConfig.getValuesSource() : null; - this.format = valuesSourceConfig.format(); + this.format = valuesSource instanceof HistogramValuesSource + ? DocValueFormat.RAW + : valuesSourceConfig.format(); if (valuesSource != null) { final BigArrays bigArrays = context.bigArrays(); counts = bigArrays.newLongArray(1, true); @@ -194,8 +201,75 @@ private void setKahanSummation(long bucket) { kahanSummation.reset(sum, compensation); } }; + // Check if we're dealing with histogram values + if (valuesSource instanceof HistogramValuesSource) { + HistogramIndexFieldData indexFieldData = ((HistogramValuesSource) valuesSource).getHistogramFieldData(); + HistogramLeafFieldData leafFieldData = indexFieldData.load(ctx); + HistogramValues histogramValues = leafFieldData.getHistogramValues(); + + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + counts = bigArrays.grow(counts, bucket + 1); + sums = bigArrays.grow(sums, bucket + 1); + compensations = bigArrays.grow(compensations, bucket + 1); + + if (histogramValues.advanceExact(doc)) { + Histogram histogram = histogramValues.histogram(); + double[] histValues = histogram.getValues(); + long[] histCounts = histogram.getCounts(); + + // Use Kahan summation for better accuracy + double sum = sums.get(bucket); + double compensation = compensations.get(bucket); + kahanSummation.reset(sum, compensation); + + long totalCount = 0; + // Calculate weighted sum (value * count) for each bucket + for (int i = 0; i < histValues.length; i++) { + kahanSummation.add(histValues[i] * histCounts[i]); + totalCount += histCounts[i]; + } + + counts.increment(bucket, totalCount); + sums.set(bucket, kahanSummation.value()); + compensations.set(bucket, kahanSummation.delta()); + } + } + }; + } else { + // Original logic for non-histogram fields + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + counts = bigArrays.grow(counts, bucket + 1); + sums = bigArrays.grow(sums, bucket + 1); + compensations = bigArrays.grow(compensations, bucket + 1); + + if (values.advanceExact(doc)) { + final int valueCount = values.docValueCount(); + counts.increment(bucket, valueCount); + // Compute the sum of double values with Kahan summation algorithm which is more + // accurate than naive summation. + double sum = sums.get(bucket); + double compensation = compensations.get(bucket); + + kahanSummation.reset(sum, compensation); + + for (int i = 0; i < valueCount; i++) { + double value = values.nextValue(); + kahanSummation.add(value); + } + + sums.set(bucket, kahanSummation.value()); + compensations.set(bucket, kahanSummation.delta()); + } + } + }; + } } + private void precomputeLeafUsingStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo starTree) throws IOException { StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); assert starTreeValues != null; diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregatorFactory.java index 57389f19b4577..1c59c79bd512a 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregatorFactory.java @@ -73,7 +73,7 @@ public MetricStat getMetricStat() { static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( AvgAggregationBuilder.REGISTRY_KEY, - List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), + List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.HISTOGRAM), AvgAggregator::new, true ); diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/ExtendedStatsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/ExtendedStatsAggregator.java index d926bb1d0d273..0b4e354d68042 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/ExtendedStatsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/ExtendedStatsAggregator.java @@ -39,6 +39,7 @@ import org.opensearch.common.util.DoubleArray; import org.opensearch.common.util.LongArray; import org.opensearch.core.ParseField; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.SortedNumericDoubleValues; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; @@ -84,7 +85,9 @@ class ExtendedStatsAggregator extends NumericMetricsAggregator.MultiValue { super(name, context, parent, metadata); // TODO: stop depending on nulls here this.valuesSource = valuesSourceConfig.hasValues() ? (ValuesSource.Numeric) valuesSourceConfig.getValuesSource() : null; - this.format = valuesSourceConfig.format(); + this.format = valuesSource instanceof HistogramValuesSource + ? DocValueFormat.RAW + : valuesSourceConfig.format(); this.sigma = sigma; if (valuesSource != null) { final BigArrays bigArrays = context.bigArrays(); @@ -111,9 +114,14 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc return LeafBucketCollector.NO_OP_COLLECTOR; } final BigArrays bigArrays = context.bigArrays(); - final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); + + final HistogramValuesSource histogramValuesSource = (HistogramValuesSource) valuesSource; + final SortedNumericDoubleValues values = histogramValuesSource.doubleValues(ctx); + final SortedNumericDoubleValues countsValues = histogramValuesSource.getCounts(ctx); + final CompensatedSum compensatedSum = new CompensatedSum(0, 0); final CompensatedSum compensatedSumOfSqr = new CompensatedSum(0, 0); + return new LeafBucketCollectorBase(sub, values) { @Override @@ -132,13 +140,11 @@ public void collect(int doc, long bucket) throws IOException { maxes.fill(from, overSize, Double.NEGATIVE_INFINITY); } - if (values.advanceExact(doc)) { + if (values.advanceExact(doc) && countsValues.advanceExact(doc)) { final int valuesCount = values.docValueCount(); - counts.increment(bucket, valuesCount); double min = mins.get(bucket); double max = maxes.get(bucket); - // Compute the sum and sum of squires for double values with Kahan summation algorithm - // which is more accurate than naive summation. + double sum = sums.get(bucket); double compensation = compensations.get(bucket); compensatedSum.reset(sum, compensation); @@ -147,14 +153,20 @@ public void collect(int doc, long bucket) throws IOException { double compensationOfSqr = compensationOfSqrs.get(bucket); compensatedSumOfSqr.reset(sumOfSqr, compensationOfSqr); + long totalCount = counts.get(bucket); + for (int i = 0; i < valuesCount; i++) { double value = values.nextValue(); - compensatedSum.add(value); - compensatedSumOfSqr.add(value * value); + double count = countsValues.nextValue(); // get count for bucket + + totalCount += (long) count; + compensatedSum.add(value * count); + compensatedSumOfSqr.add(value * value * count); min = Math.min(min, value); max = Math.max(max, value); } + counts.set(bucket, totalCount); sums.set(bucket, compensatedSum.value()); compensations.set(bucket, compensatedSum.delta()); sumOfSqrs.set(bucket, compensatedSumOfSqr.value()); diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/ExtendedStatsAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/ExtendedStatsAggregatorFactory.java index 99b3d09517a1f..41e29fdae1201 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/ExtendedStatsAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/ExtendedStatsAggregatorFactory.java @@ -72,7 +72,7 @@ class ExtendedStatsAggregatorFactory extends ValuesSourceAggregatorFactory { static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( ExtendedStatsAggregationBuilder.REGISTRY_KEY, - Arrays.asList(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), + Arrays.asList(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.HISTOGRAM), ExtendedStatsAggregator::new, true ); diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java index 8a656d768cee2..d3b86726014b8 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java @@ -44,8 +44,13 @@ import org.opensearch.common.util.DoubleArray; import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.fielddata.HistogramIndexFieldData; +import org.opensearch.index.fielddata.HistogramLeafFieldData; +import org.opensearch.index.fielddata.HistogramValues; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.NumericDoubleValues; import org.opensearch.index.fielddata.SortedNumericDoubleValues; +import org.opensearch.indices.fielddata.Histogram; import org.opensearch.search.DocValueFormat; import org.opensearch.search.MultiValueMode; import org.opensearch.search.aggregations.Aggregator; @@ -93,7 +98,9 @@ class MaxAggregator extends NumericMetricsAggregator.SingleValue implements Star maxes = context.bigArrays().newDoubleArray(1, false); maxes.fill(0, maxes.size(), Double.NEGATIVE_INFINITY); } - this.formatter = config.format(); + this.formatter = valuesSource instanceof HistogramValuesSource + ? DocValueFormat.RAW + : config.format(); this.pointConverter = pointReaderIfAvailable(config); if (pointConverter != null) { pointField = config.fieldContext().field(); @@ -148,7 +155,6 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc if (parent != null) { return LeafBucketCollector.NO_OP_COLLECTOR; } else { - // we have no parent and the values source is empty so we can skip collecting hits. throw new CollectionTerminatedException(); } } @@ -200,8 +206,57 @@ private void growMaxes(long bucket) { } } }; + + // Check if we're dealing with histogram values + if (valuesSource instanceof HistogramValuesSource) { + HistogramIndexFieldData indexFieldData = ((HistogramValuesSource) valuesSource).getHistogramFieldData(); + HistogramLeafFieldData leafFieldData = indexFieldData.load(ctx); + HistogramValues histogramValues = leafFieldData.getHistogramValues(); + + return new LeafBucketCollectorBase(sub, allValues) { + @Override + public void collect(int doc, long bucket) throws IOException { + if (bucket >= maxes.size()) { + long from = maxes.size(); + maxes = bigArrays.grow(maxes, bucket + 1); + maxes.fill(from, maxes.size(), Double.NEGATIVE_INFINITY); + } + + if (histogramValues.advanceExact(doc)) { + Histogram histogram = histogramValues.histogram(); + double[] values = histogram.getValues(); + if (values.length > 0) { + // For max aggregation, use the last value since values are sorted + double value = values[values.length - 1]; + double max = maxes.get(bucket); + maxes.set(bucket, Math.max(max, value)); + } + } + } + }; + } else { + // Original logic for non-histogram fields + // final NumericDoubleValues values = MultiValueMode.MAX.select(allValues); + return new LeafBucketCollectorBase(sub, allValues) { + @Override + public void collect(int doc, long bucket) throws IOException { + if (bucket >= maxes.size()) { + long from = maxes.size(); + maxes = bigArrays.grow(maxes, bucket + 1); + maxes.fill(from, maxes.size(), Double.NEGATIVE_INFINITY); + } + if (values.advanceExact(doc)) { + final double value = values.doubleValue(); + double max = maxes.get(bucket); + max = Math.max(max, value); + maxes.set(bucket, max); + } + } + }; + } } + private void precomputeLeafUsingStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo starTree) throws IOException { AtomicReference max = new AtomicReference<>(maxes.get(0)); StarTreeQueryHelper.precomputeLeafUsingStarTree(context, valuesSource, ctx, starTree, MetricStat.MAX.getTypeName(), value -> { diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregatorFactory.java index c0ee471c87f29..8cddf7349f0ca 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregatorFactory.java @@ -57,7 +57,7 @@ class MaxAggregatorFactory extends MetricAggregatorFactory { static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( MaxAggregationBuilder.REGISTRY_KEY, - List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), + List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.HISTOGRAM), MaxAggregator::new, true ); diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java index cb4b530b5bda2..eb9dcd1509a3f 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java @@ -44,8 +44,13 @@ import org.opensearch.common.util.DoubleArray; import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.fielddata.HistogramIndexFieldData; +import org.opensearch.index.fielddata.HistogramLeafFieldData; +import org.opensearch.index.fielddata.HistogramValues; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.NumericDoubleValues; import org.opensearch.index.fielddata.SortedNumericDoubleValues; +import org.opensearch.indices.fielddata.Histogram; import org.opensearch.search.DocValueFormat; import org.opensearch.search.MultiValueMode; import org.opensearch.search.aggregations.Aggregator; @@ -93,7 +98,9 @@ class MinAggregator extends NumericMetricsAggregator.SingleValue implements Star mins = context.bigArrays().newDoubleArray(1, false); mins.fill(0, mins.size(), Double.POSITIVE_INFINITY); } - this.format = config.format(); + this.format = valuesSource instanceof HistogramValuesSource + ? DocValueFormat.RAW + : config.format(); this.pointConverter = pointReaderIfAvailable(config); if (pointConverter != null) { pointField = config.fieldContext().field(); @@ -148,23 +155,78 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc if (parent == null) { return LeafBucketCollector.NO_OP_COLLECTOR; } else { - // we have no parent and the values source is empty so we can skip collecting hits. throw new CollectionTerminatedException(); } } final BigArrays bigArrays = context.bigArrays(); final SortedNumericDoubleValues allValues = valuesSource.doubleValues(ctx); + + // Check if we're dealing with histogram values + if (valuesSource instanceof HistogramValuesSource) { + HistogramIndexFieldData indexFieldData = ((HistogramValuesSource) valuesSource).getHistogramFieldData(); + HistogramLeafFieldData leafFieldData = indexFieldData.load(ctx); + HistogramValues histogramValues = leafFieldData.getHistogramValues(); + + return new LeafBucketCollectorBase(sub, allValues) { + @Override + public void collect(int doc, long bucket) throws IOException { + if (bucket >= mins.size()) { + long from = mins.size(); + mins = bigArrays.grow(mins, bucket + 1); + mins.fill(from, mins.size(), Double.POSITIVE_INFINITY); + } + + if (histogramValues.advanceExact(doc)) { + Histogram histogram = histogramValues.histogram(); + double[] values = histogram.getValues(); + if (values.length > 0) { + // For min aggregation, use the first value since values are sorted + double value = values[0]; + double min = mins.get(bucket); + mins.set(bucket, Math.min(min, value)); + } + } + } + }; + } else { + // Original logic for non-histogram fields + final NumericDoubleValues values = MultiValueMode.MIN.select(allValues); + return new LeafBucketCollectorBase(sub, allValues) { + @Override + public void collect(int doc, long bucket) throws IOException { + if (bucket >= mins.size()) { + long from = mins.size(); + mins = bigArrays.grow(mins, bucket + 1); + mins.fill(from, mins.size(), Double.POSITIVE_INFINITY); + } + + if (values.advanceExact(doc)) { + final double value = values.doubleValue(); + double min = mins.get(bucket); + mins.set(bucket, Math.min(min, value)); + } + } + }; + } + } + + private LeafBucketCollector getRegularLeafCollector(BigArrays bigArrays, SortedNumericDoubleValues allValues, LeafBucketCollector sub) { final NumericDoubleValues values = MultiValueMode.MIN.select(allValues); return new LeafBucketCollectorBase(sub, allValues) { @Override public void collect(int doc, long bucket) throws IOException { growMins(bucket); + if (bucket >= mins.size()) { + long from = mins.size(); + mins = bigArrays.grow(mins, bucket + 1); + mins.fill(from, mins.size(), Double.POSITIVE_INFINITY); + } + if (values.advanceExact(doc)) { final double value = values.doubleValue(); double min = mins.get(bucket); - min = Math.min(min, value); - mins.set(bucket, min); + mins.set(bucket, Math.min(min, value)); } } @@ -202,6 +264,8 @@ private void growMins(long bucket) { }; } + + private void precomputeLeafUsingStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo starTree) throws IOException { AtomicReference min = new AtomicReference<>(mins.get(0)); StarTreeQueryHelper.precomputeLeafUsingStarTree(context, valuesSource, ctx, starTree, MetricStat.MIN.getTypeName(), value -> { diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregatorFactory.java index 44c0d9d7d11eb..169f9fd7af9f4 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregatorFactory.java @@ -57,7 +57,7 @@ class MinAggregatorFactory extends MetricAggregatorFactory { static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( MinAggregationBuilder.REGISTRY_KEY, - List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), + List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.HISTOGRAM), MinAggregator::new, true ); diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/PercentileRanksAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/PercentileRanksAggregatorFactory.java index d3c18bcad1a43..f98ac51757571 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/PercentileRanksAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/PercentileRanksAggregatorFactory.java @@ -61,7 +61,7 @@ class PercentileRanksAggregatorFactory extends ValuesSourceAggregatorFactory { static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( PercentileRanksAggregationBuilder.REGISTRY_KEY, - Arrays.asList(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), + Arrays.asList(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.HISTOGRAM), (name, valuesSource, context, parent, percents, percentilesConfig, keyed, formatter, metadata) -> percentilesConfig .createPercentileRanksAggregator(name, valuesSource, context, parent, percents, keyed, formatter, metadata), true diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/PercentilesAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/PercentilesAggregatorFactory.java index 148e26e038923..b79e6c0a9f59b 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/PercentilesAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/PercentilesAggregatorFactory.java @@ -62,7 +62,7 @@ class PercentilesAggregatorFactory extends ValuesSourceAggregatorFactory { static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( PercentilesAggregationBuilder.REGISTRY_KEY, - Arrays.asList(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), + Arrays.asList(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.HISTOGRAM), (name, valuesSource, context, parent, percents, percentilesConfig, keyed, formatter, metadata) -> percentilesConfig .createPercentilesAggregator(name, valuesSource, context, parent, percents, keyed, formatter, metadata), true diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/StatsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/StatsAggregator.java index 98fc5cc4d6d42..246532936bdb5 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/StatsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/StatsAggregator.java @@ -38,6 +38,7 @@ import org.opensearch.common.util.BigArrays; import org.opensearch.common.util.DoubleArray; import org.opensearch.common.util.LongArray; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.SortedNumericDoubleValues; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; @@ -87,7 +88,9 @@ class StatsAggregator extends NumericMetricsAggregator.MultiValue { maxes = bigArrays.newDoubleArray(1, false); maxes.fill(0, maxes.size(), Double.NEGATIVE_INFINITY); } - this.format = valuesSourceConfig.format(); + this.format = valuesSource instanceof HistogramValuesSource + ? DocValueFormat.RAW + : valuesSourceConfig.format(); } @Override @@ -101,7 +104,11 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc return LeafBucketCollector.NO_OP_COLLECTOR; } final BigArrays bigArrays = context.bigArrays(); + final boolean isHistogram = valuesSource instanceof HistogramValuesSource; final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); + final SortedNumericDoubleValues histogramCounts = isHistogram + ? ((HistogramValuesSource) valuesSource).getCounts(ctx) + : null; final CompensatedSum kahanSummation = new CompensatedSum(0, 0); return new LeafBucketCollectorBase(sub, values) { @@ -111,18 +118,45 @@ public void collect(int doc, long bucket) throws IOException { if (values.advanceExact(doc)) { final int valuesCount = values.docValueCount(); - counts.increment(bucket, valuesCount); + + long totalDocCount = 0L; double min = mins.get(bucket); double max = maxes.get(bucket); + double sum = sums.get(bucket); + double compensation = compensations.get(bucket); + kahanSummation.reset(sum, compensation); + + long[] countArray = new long[valuesCount]; + + if (isHistogram) { + assert histogramCounts != null; + if (histogramCounts.advanceExact(doc) == false || histogramCounts.docValueCount() != valuesCount) { + throw new IllegalStateException("Histogram counts and values must align"); + } + for (int i = 0; i < valuesCount; i++) { + countArray[i] = (long) histogramCounts.nextValue(); + } + } else { + for (int i = 0; i < valuesCount; i++) { + countArray[i] = 1L; // Each value counts once + } + } + // Process both values and counts for (int i = 0; i < valuesCount; i++) { double value = values.nextValue(); - kahanSummation.add(value); + long count = countArray[i]; + totalDocCount += count; + for (long j = 0; j < count; j++) { + kahanSummation.add(value); + } min = Math.min(min, value); max = Math.max(max, value); } + sums.set(bucket, kahanSummation.value()); compensations.set(bucket, kahanSummation.delta()); + counts.increment(bucket, totalDocCount); mins.set(bucket, min); maxes.set(bucket, max); } diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/StatsAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/StatsAggregatorFactory.java index 0e96e631044dd..80d9b9931cda8 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/StatsAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/StatsAggregatorFactory.java @@ -68,7 +68,7 @@ class StatsAggregatorFactory extends ValuesSourceAggregatorFactory { static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( StatsAggregationBuilder.REGISTRY_KEY, - List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), + List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.HISTOGRAM), StatsAggregator::new, true ); diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java index 29228afb8ce8e..5250345bf2a7a 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java @@ -40,7 +40,12 @@ import org.opensearch.common.util.DoubleArray; import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.fielddata.HistogramIndexFieldData; +import org.opensearch.index.fielddata.HistogramLeafFieldData; +import org.opensearch.index.fielddata.HistogramValues; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.SortedNumericDoubleValues; +import org.opensearch.indices.fielddata.Histogram; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.InternalAggregation; @@ -81,7 +86,9 @@ public class SumAggregator extends NumericMetricsAggregator.SingleValue implemen super(name, context, parent, metadata); // TODO: stop expecting nulls here this.valuesSource = valuesSourceConfig.hasValues() ? (ValuesSource.Numeric) valuesSourceConfig.getValuesSource() : null; - this.format = valuesSourceConfig.format(); + this.format = valuesSource instanceof HistogramValuesSource + ? DocValueFormat.RAW + : valuesSourceConfig.format(); if (valuesSource != null) { sums = context.bigArrays().newDoubleArray(1, true); compensations = context.bigArrays().newDoubleArray(1, true); @@ -113,10 +120,10 @@ protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { - if (valuesSource == null) { return LeafBucketCollector.NO_OP_COLLECTOR; } + final BigArrays bigArrays = context.bigArrays(); final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); final CompensatedSum kahanSummation = new CompensatedSum(0, 0); @@ -172,6 +179,65 @@ private void setKahanSummation(long bucket) { kahanSummation.reset(sum, compensation); } }; + + // Check if we're dealing with histogram values + if (valuesSource instanceof HistogramValuesSource) { + HistogramIndexFieldData indexFieldData = ((HistogramValuesSource) valuesSource).getHistogramFieldData(); + HistogramLeafFieldData leafFieldData = indexFieldData.load(ctx); + HistogramValues histogramValues = leafFieldData.getHistogramValues(); + + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + sums = bigArrays.grow(sums, bucket + 1); + compensations = bigArrays.grow(compensations, bucket + 1); + + if (histogramValues.advanceExact(doc)) { + Histogram histogram = histogramValues.histogram(); + double[] histValues = histogram.getValues(); + long[] counts = histogram.getCounts(); + + // Initialize Kahan summation with current values + double sum = sums.get(bucket); + double compensation = compensations.get(bucket); + kahanSummation.reset(sum, compensation); + + // Calculate sum directly from histogram values and counts + for (int i = 0; i < histValues.length; i++) { + kahanSummation.add(histValues[i] * counts[i]); + } + + // We don't need to process the values from doubleValues() as they're already counted + compensations.set(bucket, kahanSummation.delta()); + sums.set(bucket, kahanSummation.value()); + } + } + }; + } else { + // Original logic for non-histogram fields + return new LeafBucketCollectorBase(sub, values) { + @Override + public void collect(int doc, long bucket) throws IOException { + sums = bigArrays.grow(sums, bucket + 1); + compensations = bigArrays.grow(compensations, bucket + 1); + + if (values.advanceExact(doc)) { + final int valuesCount = values.docValueCount(); + double sum = sums.get(bucket); + double compensation = compensations.get(bucket); + kahanSummation.reset(sum, compensation); + + for (int i = 0; i < valuesCount; i++) { + double value = values.nextValue(); + kahanSummation.add(value); + } + + compensations.set(bucket, kahanSummation.delta()); + sums.set(bucket, kahanSummation.value()); + } + } + }; + } } private void precomputeLeafUsingStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo starTree) throws IOException { diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregatorFactory.java index e2e25a8c25a87..18c7ffc31a489 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregatorFactory.java @@ -73,7 +73,7 @@ public MetricStat getMetricStat() { static void registerAggregators(ValuesSourceRegistry.Builder builder) { builder.register( SumAggregationBuilder.REGISTRY_KEY, - List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN), + List.of(CoreValuesSourceType.NUMERIC, CoreValuesSourceType.DATE, CoreValuesSourceType.BOOLEAN, CoreValuesSourceType.HISTOGRAM), SumAggregator::new, true ); diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java index 3541753d94e6f..7516197f075e2 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java @@ -39,8 +39,13 @@ import org.opensearch.common.util.LongArray; import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.fielddata.HistogramIndexFieldData; +import org.opensearch.index.fielddata.HistogramLeafFieldData; +import org.opensearch.index.fielddata.HistogramValues; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.MultiGeoPointValues; import org.opensearch.index.fielddata.SortedBinaryDocValues; +import org.opensearch.indices.fielddata.Histogram; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; @@ -111,10 +116,33 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc } final BigArrays bigArrays = context.bigArrays(); - if (valuesSource instanceof ValuesSource.Numeric) { + if (valuesSource instanceof HistogramValuesSource) { + HistogramIndexFieldData indexFieldData = ((HistogramValuesSource) valuesSource).getHistogramFieldData(); + HistogramLeafFieldData leafFieldData = indexFieldData.load(ctx); + HistogramValues histogramValues = leafFieldData.getHistogramValues(); + + return new LeafBucketCollectorBase(sub, null) { + @Override + public void collect(int doc, long bucket) throws IOException { + counts = bigArrays.grow(counts, bucket + 1); + + if (histogramValues.advanceExact(doc)) { + Histogram histogram = histogramValues.histogram(); + long[] histCounts = histogram.getCounts(); + + // Sum up all counts in the histogram + long totalCount = 0; + for (long count : histCounts) { + totalCount += count; + } + + counts.increment(bucket, totalCount); + } + } + }; + } else if (valuesSource instanceof ValuesSource.Numeric) { final SortedNumericDocValues values = ((ValuesSource.Numeric) valuesSource).longValues(ctx); return new LeafBucketCollectorBase(sub, values) { - @Override public void collect(int doc, long bucket) throws IOException { counts = bigArrays.grow(counts, bucket + 1); @@ -123,11 +151,9 @@ public void collect(int doc, long bucket) throws IOException { } } }; - } - if (valuesSource instanceof ValuesSource.Bytes.GeoPoint) { + } else if (valuesSource instanceof ValuesSource.Bytes.GeoPoint) { MultiGeoPointValues values = ((ValuesSource.GeoPoint) valuesSource).geoPointValues(ctx); return new LeafBucketCollectorBase(sub, null) { - @Override public void collect(int doc, long bucket) throws IOException { counts = bigArrays.grow(counts, bucket + 1); @@ -140,7 +166,6 @@ public void collect(int doc, long bucket) throws IOException { // The following is default collector. Including the keyword FieldType final SortedBinaryDocValues values = valuesSource.bytesValues(ctx); return new LeafBucketCollectorBase(sub, values) { - @Override public void collect(int doc, long bucket) throws IOException { counts = bigArrays.grow(counts, bucket + 1); @@ -151,6 +176,7 @@ public void collect(int doc, long bucket) throws IOException { }; } + private void precomputeLeafUsingStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo starTree) throws IOException { StarTreeQueryHelper.precomputeLeafUsingStarTree( context, diff --git a/server/src/main/java/org/opensearch/search/aggregations/support/CoreValuesSourceType.java b/server/src/main/java/org/opensearch/search/aggregations/support/CoreValuesSourceType.java index 79a5e1334aac8..cbff2894e2daf 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/support/CoreValuesSourceType.java +++ b/server/src/main/java/org/opensearch/search/aggregations/support/CoreValuesSourceType.java @@ -41,6 +41,8 @@ import org.opensearch.common.time.DateFormatter; import org.opensearch.geometry.Geometry; import org.opensearch.geometry.utils.WellKnownText; +import org.opensearch.index.fielddata.HistogramIndexFieldData; +import org.opensearch.index.fielddata.HistogramValuesSource; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.IndexGeoPointFieldData; import org.opensearch.index.fielddata.IndexNumericFieldData; @@ -158,6 +160,33 @@ public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, } } }, + HISTOGRAM() { + @Override + public ValuesSource getEmpty() { + return ValuesSource.Numeric.EMPTY; + } + + @Override + public ValuesSource getScript(AggregationScript.LeafFactory script, ValueType scriptValueType) { + throw new AggregationExecutionException("Histogram fields do not support scripts"); + } + + @Override + public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFactory script) { + if (!(fieldContext.indexFieldData() instanceof HistogramIndexFieldData)) { + throw new IllegalArgumentException( + "Expected histogram field data but got " + fieldContext.indexFieldData().getClass() + ); + } + + return new HistogramValuesSource((HistogramIndexFieldData) fieldContext.indexFieldData()); + } + + @Override + public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) { + throw new IllegalArgumentException("Missing values not supported for histogram fields"); + } + }, GEOPOINT() { @Override public ValuesSource getEmpty() { diff --git a/server/src/main/java/org/opensearch/search/aggregations/support/HistogramValuesSourceType.java b/server/src/main/java/org/opensearch/search/aggregations/support/HistogramValuesSourceType.java new file mode 100644 index 0000000000000..6a8801599794e --- /dev/null +++ b/server/src/main/java/org/opensearch/search/aggregations/support/HistogramValuesSourceType.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.support; + +import org.opensearch.script.AggregationScript; +import org.opensearch.search.DocValueFormat; + +import java.util.function.LongSupplier; + +public class HistogramValuesSourceType implements ValuesSourceType { + public static final HistogramValuesSourceType HISTOGRAM = new HistogramValuesSourceType(); + + private HistogramValuesSourceType() { + super(); + } + + @Override + public ValuesSource getEmpty() { + return null; + } + + @Override + public ValuesSource getScript(AggregationScript.LeafFactory script, ValueType scriptValueType) { + return null; + } + + @Override + public ValuesSource getField(FieldContext fieldContext, AggregationScript.LeafFactory script) { + return null; + } + + @Override + public ValuesSource replaceMissing(ValuesSource valuesSource, Object rawMissing, DocValueFormat docValueFormat, LongSupplier now) { + return null; + } + + @Override + public String typeName() { + return ""; + } +} diff --git a/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSourceConfig.java b/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSourceConfig.java index 580a880875918..b7b7cd3579af7 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSourceConfig.java +++ b/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSourceConfig.java @@ -37,6 +37,7 @@ import org.opensearch.index.fielddata.IndexGeoPointFieldData; import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.DerivedFieldType; +import org.opensearch.index.mapper.HistogramFieldMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.RangeFieldMapper; import org.opensearch.index.query.QueryShardContext; @@ -190,6 +191,20 @@ private static ValuesSourceConfig internalResolve( aggregationScript = ((DerivedFieldType) fieldType).getAggregationScript(context); } + if (fieldType instanceof HistogramFieldMapper.HistogramFieldType) { + return new ValuesSourceConfig( + CoreValuesSourceType.HISTOGRAM, // ValuesSourceType + fieldContext, // FieldContext + unmapped, // boolean unmapped + aggregationScript, // AggregationScript.LeafFactory script + ValueType.DOUBLE, // ValueType scriptValueType (use DOUBLE for histogram) + missing, // Object missing + timeZone, // ZoneId timeZone + docValueFormat, // DocValueFormat format + context::nowInMillis // LongSupplier nowSupplier + ); + } + config = new ValuesSourceConfig( valuesSourceType, fieldContext, diff --git a/server/src/test/java/org/opensearch/index/query/HistogramFieldQueryTests.java b/server/src/test/java/org/opensearch/index/query/HistogramFieldQueryTests.java index 0512c63726137..776ae534cb3d0 100644 --- a/server/src/test/java/org/opensearch/index/query/HistogramFieldQueryTests.java +++ b/server/src/test/java/org/opensearch/index/query/HistogramFieldQueryTests.java @@ -9,12 +9,18 @@ package org.opensearch.index.query; import org.opensearch.action.search.SearchResponse; +import org.opensearch.action.support.WriteRequest; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.search.aggregations.bucket.histogram.Histogram; +import org.opensearch.search.aggregations.bucket.range.Range; +import org.opensearch.search.aggregations.metrics.Avg; +import org.opensearch.search.aggregations.metrics.ExtendedStats; import org.opensearch.search.aggregations.metrics.Max; import org.opensearch.search.aggregations.metrics.Min; +import org.opensearch.search.aggregations.metrics.PercentileRanks; import org.opensearch.search.aggregations.metrics.Percentiles; +import org.opensearch.search.aggregations.metrics.Stats; import org.opensearch.search.aggregations.metrics.Sum; import org.opensearch.search.aggregations.metrics.ValueCount; import org.opensearch.test.OpenSearchSingleNodeTestCase; @@ -22,8 +28,16 @@ import java.util.List; +import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.opensearch.search.aggregations.AggregationBuilders.avg; +import static org.opensearch.search.aggregations.AggregationBuilders.extendedStats; import static org.opensearch.search.aggregations.AggregationBuilders.min; import static org.opensearch.search.aggregations.AggregationBuilders.max; +import static org.opensearch.search.aggregations.AggregationBuilders.percentileRanks; +import static org.opensearch.search.aggregations.AggregationBuilders.range; +import static org.opensearch.search.aggregations.AggregationBuilders.stats; import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.opensearch.search.aggregations.AggregationBuilders.count; import static org.opensearch.search.aggregations.AggregationBuilders.percentiles; @@ -162,8 +176,8 @@ public void testSumAggregation() throws Exception { assertSearchResponse(response); Sum sum = response.getAggregations().get("sum_value"); - // Expected sum: (0.1 * 3) + (0.2 * 7) + (0.3 * 23) = 8.0 - assertThat(sum.getValue(), equalTo(8.0)); + // Expected sum: (0.1 * 3) + (0.2 * 7) + (0.3 * 23) = 8.6 + assertThat(sum.getValue(), equalTo(8.6)); } public void testValueCountAggregation() throws Exception { @@ -192,6 +206,33 @@ public void testValueCountAggregation() throws Exception { assertThat(valueCount.getValue(), equalTo(33L)); // Sum of counts: 3 + 7 + 23 } + public void testAvgAggregation() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) + .endObject() + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .addAggregation(avg("avg_value").field(defaultHistogramField)) + .get(); + + assertSearchResponse(response); + Avg avg = response.getAggregations().get("avg_value"); + // Expected avg: ((0.1 * 3) + (0.2 * 7) + (0.3 * 23)) / (3 + 7 + 23) = 8.6 / 33 ≈ 0.26 + assertThat(avg.getValue(), closeTo(0.26, 0.01)); + } + public void testPercentilesAggregation() throws Exception { XContentBuilder mapping = createMapping(); client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); @@ -220,8 +261,141 @@ public void testPercentilesAggregation() throws Exception { assertSearchResponse(response); Percentiles percentiles = response.getAggregations().get("percentiles"); assertThat(percentiles, notNullValue()); - assertThat(percentiles.percentile(50.0), greaterThan(0.0)); - assertThat(percentiles.percentile(95.0), greaterThan(0.0)); + + // Values should be within reasonable bounds given our distribution + double p50 = percentiles.percentile(50.0); + double p95 = percentiles.percentile(95.0); + + // Basic sanity checks + assertThat(p50, greaterThanOrEqualTo(0.1)); // Should be at least our minimum value + assertThat(p50, lessThanOrEqualTo(0.3)); // Should not exceed our maximum value + assertThat(p95, greaterThanOrEqualTo(p50)); // 95th percentile should be >= 50th percentile + assertThat(p95, lessThanOrEqualTo(0.3)); // Should not exceed our maximum value + } + + public void testStatsAggregation() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) + .endObject() + .endObject()) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .addAggregation(stats("stats_value").field(defaultHistogramField)) + .get(); + + assertSearchResponse(response); + Stats stats = response.getAggregations().get("stats_value"); + assertThat(stats.getMin(), equalTo(0.1)); + assertThat(stats.getMax(), equalTo(0.3)); + assertThat(stats.getSum(), equalTo(8.6)); // 0.1*3 + 0.2*7 + 0.3*23 + assertThat(stats.getCount(), equalTo(33L)); // sum of counts + assertThat(stats.getAvg(), closeTo(0.26, 0.01)); // 8.6 / 33 + } + + public void testExtendedStatsAggregation() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) + .endObject() + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .addAggregation(extendedStats("extended_stats").field(defaultHistogramField)) + .get(); + + assertSearchResponse(response); + ExtendedStats stats = response.getAggregations().get("extended_stats"); + assertThat(stats.getCount(), equalTo(33L)); + assertThat(stats.getMin(), equalTo(0.1)); + assertThat(stats.getMax(), equalTo(0.3)); + assertThat(stats.getSum(), equalTo(8.6)); + assertThat(stats.getAvg(), closeTo(0.26, 0.01)); + assertThat(stats.getVariance(), greaterThanOrEqualTo(0.0)); + assertThat(stats.getStdDeviation(), greaterThanOrEqualTo(0.0)); + } + + public void testPercentileRanksAggregation() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) + .endObject() + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .addAggregation(percentileRanks("percentile_ranks", new double[]{0.2, 0.3}).field(defaultHistogramField)) + .get(); + + assertSearchResponse(response); + PercentileRanks ranks = response.getAggregations().get("percentile_ranks"); + assertThat(ranks, notNullValue()); + assertThat(ranks.percent(0.2), greaterThanOrEqualTo(0.0)); + assertThat(ranks.percent(0.3), greaterThanOrEqualTo(0.0)); + } + + public void testRangeAggregation() throws Exception { + XContentBuilder mapping = createMapping(); + client().admin().indices().prepareCreate(defaultIndexName).setMapping(mapping).get(); + ensureGreen(); + + client().prepareIndex(defaultIndexName) + .setId("1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .startObject(defaultHistogramField) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) + .endObject() + .endObject()) + .setRefreshPolicy(RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch(defaultIndexName) + .addAggregation( + range("range_agg") + .field(defaultHistogramField) + .addUnboundedTo(0.15) + .addRange(0.15, 0.25) + .addUnboundedFrom(0.25) + ) + .get(); + + assertSearchResponse(response); + Range range = response.getAggregations().get("range_agg"); + assertThat(range, notNullValue()); + for (Range.Bucket bucket : range.getBuckets()) { + assertThat(bucket.getDocCount(), greaterThanOrEqualTo(0L)); + } } public void testHistogramAggregation() throws Exception { @@ -234,8 +408,8 @@ public void testHistogramAggregation() throws Exception { .setSource(XContentFactory.jsonBuilder() .startObject() .startObject(defaultHistogramField) - .array("values", new double[]{0.1, 0.2, 0.3, 0.4, 0.5}) - .array("counts", new long[]{3, 7, 23, 12, 6}) + .array("values", new double[]{0.1, 0.2, 0.3}) + .array("counts", new long[]{3, 7, 23}) .endObject() .endObject()) .setRefreshPolicy(RefreshPolicy.IMMEDIATE) @@ -252,7 +426,10 @@ public void testHistogramAggregation() throws Exception { assertSearchResponse(response); Histogram histogram = response.getAggregations().get("histogram_agg"); assertThat(histogram, notNullValue()); - List buckets = histogram.getBuckets(); - assertThat(buckets.size(), greaterThan(0)); + for (Histogram.Bucket bucket : histogram.getBuckets()) { + assertThat(bucket.getDocCount(), greaterThanOrEqualTo(0L)); + } } + + }