diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index d0cac82b23f..007ccb051f3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -8,6 +8,8 @@ import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_CASCADED_NESTED; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DEEP_NESTED; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS; @@ -24,6 +26,7 @@ import org.junit.Ignore; import org.junit.Test; import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.ppl.ExplainIT; public class CalciteExplainIT extends ExplainIT { @@ -42,6 +45,8 @@ public void init() throws Exception { loadIndex(Index.WORKER); loadIndex(Index.WORK_INFORMATION); loadIndex(Index.WEBLOG); + loadIndex(Index.DEEP_NESTED); + loadIndex(Index.CASCADED_NESTED); } @Override @@ -1953,4 +1958,67 @@ public void testDedupTextTypeNotPushdown() throws IOException { assertYamlEqualsIgnoreId( expected, explainQueryYaml(String.format("source=%s | dedup email", TEST_INDEX_BANK))); } + + @Test + public void testFilterOnComputedNestedFields() throws IOException { + assertYamlEqualsIgnoreId( + loadExpectedPlan("filter_computed_nested.yaml"), + explainQueryYaml( + StringUtils.format( + "source=%s | eval proj_name_len=length(projects.name) | fields projects.name," + + " proj_name_len | where proj_name_len > 29", + TEST_INDEX_DEEP_NESTED))); + } + + @Test + public void testFilterOnNestedAndRootFields() throws IOException { + assertYamlEqualsIgnoreId( + loadExpectedPlan("filter_root_and_nested.yaml"), + // city is not in a nested object + explainQueryYaml( + StringUtils.format( + "source=%s | where city.name = 'Seattle' and length(projects.name) > 29", + TEST_INDEX_DEEP_NESTED))); + } + + @Test + public void testFilterOnNestedFields() throws IOException { + assertYamlEqualsIgnoreId( + loadExpectedPlan("filter_nested_term.yaml"), + // address is a nested object + explainQueryYaml( + StringUtils.format( + "source=%s | where address.city = 'New york city'", TEST_INDEX_NESTED_SIMPLE))); + + assertYamlEqualsIgnoreId( + loadExpectedPlan("filter_nested_terms.yaml"), + explainQueryYaml( + StringUtils.format( + "source=%s | where address.city in ('Miami', 'san diego')", + TEST_INDEX_NESTED_SIMPLE))); + } + + @Test + public void testFilterOnMultipleCascadedNestedFields() throws IOException { + // 1. Access two different hierarchies of nested fields, one at author.books.reviews, another at + // author.books + // 2. One is pushed as nested range query, another is pushed as nested filter query. + assertYamlEqualsIgnoreId( + loadExpectedPlan("filter_multiple_nested_cascaded_range.yaml"), + explainQueryYaml( + StringUtils.format( + "source=%s | where author.books.reviews.rating >=4 and author.books.reviews.rating" + + " < 6 and author.books.title = 'The Shining'", + TEST_INDEX_CASCADED_NESTED))); + } + + @Test + public void testAggFilterOnNestedFields() throws IOException { + assertYamlEqualsIgnoreId( + loadExpectedPlan("agg_filter_nested.yaml"), + explainQueryYaml( + StringUtils.format( + "source=%s | stats count(eval(author.name < 'K')) as george_and_jk", + TEST_INDEX_CASCADED_NESTED))); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java index 582ce47000d..42524b49c7e 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteWhereCommandIT.java @@ -5,6 +5,21 @@ package org.opensearch.sql.calcite.remote; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_CASCADED_NESTED; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DEEP_NESTED; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifyErrorMessageContains; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import org.json.JSONObject; +import org.junit.Test; +import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.ppl.WhereCommandIT; public class CalciteWhereCommandIT extends WhereCommandIT { @@ -12,6 +27,9 @@ public class CalciteWhereCommandIT extends WhereCommandIT { public void init() throws Exception { super.init(); enableCalcite(); + loadIndex(Index.NESTED_SIMPLE); + loadIndex(Index.DEEP_NESTED); + loadIndex(Index.CASCADED_NESTED); } @Override @@ -19,4 +37,119 @@ protected String getIncompatibleTypeErrMsg() { return "In expression types are incompatible: fields type LONG, values type [INTEGER, INTEGER," + " STRING]"; } + + @Test + public void testFilterOnComputedNestedFields() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | eval proj_name_len=length(projects.name) | fields projects.name," + + " proj_name_len | where proj_name_len > 29", + TEST_INDEX_DEEP_NESTED)); + verifySchema(result, schema("projects.name", "string"), schema("proj_name_len", "int")); + verifyDataRows(result, rows("AWS Redshift Spectrum querying", 30)); + } + + @Test + public void testFilterOnNestedAndRootFields() throws IOException { + JSONObject result = + executeQuery( + String.format( + "source=%s | where city.name = 'Seattle' and length(projects.name) > 29 | fields" + + " city.name, projects.name", + TEST_INDEX_DEEP_NESTED)); + verifySchema(result, schema("city.name", "string"), schema("projects.name", "string")); + verifyDataRows(result, rows("Seattle", "AWS Redshift Spectrum querying")); + } + + @Test + public void testFilterOnNestedFields() throws IOException { + // address is a nested object + JSONObject result1 = + executeQuery( + String.format( + "source=%s | where address.city = 'New york city' | fields address.city", + TEST_INDEX_NESTED_SIMPLE)); + verifySchema(result1, schema("address.city", "string")); + verifyDataRows(result1, rows("New york city")); + + JSONObject result2 = + executeQuery( + String.format( + "source=%s | where address.city in ('Miami', 'san diego') | fields address.city", + TEST_INDEX_NESTED_SIMPLE)); + verifyDataRows(result2, rows("Miami"), rows("san diego")); + } + + @Test + public void testFilterOnMultipleCascadedNestedFields() throws IOException { + // SQL's static type system does not allow returning list[int] in place of int + enabledOnlyWhenPushdownIsEnabled(); + JSONObject result = + executeQuery( + String.format( + "source=%s | where author.books.reviews.rating >=4 and author.books.reviews.rating" + + " < 6 and author.books.title = 'The Shining' | fields author.books", + TEST_INDEX_CASCADED_NESTED)); + verifySchema(result, schema("author.books", "array")); + verifyDataRows( + result, + rows( + List.of( + Map.of( + "title", + "The Shining", + "reviews", + List.of( + Map.of( + "review_date", + "2022-09-03", + "rating", + 3, + "comment", + "Brilliant but terrifying"), + Map.of( + "review_date", + "2023-04-12", + "rating", + 4, + "comment", + "Psychological horror at its best"), + Map.of( + "review_date", + "2023-10-28", + "rating", + 2, + "comment", + "Too slow in places")))))); + } + + @Test + public void testScriptFilterOnDifferentNestedHierarchyShouldThrow() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + Throwable t = + assertThrows( + Exception.class, + () -> + executeQuery( + String.format( + "source=%s | where author.books.reviews.rating + length(author.books.title)" + + " > 10", + TEST_INDEX_CASCADED_NESTED))); + verifyErrorMessageContains( + t, + "Accessing multiple nested fields under different hierarchies in script is not supported:" + + " [author.books.reviews, author.books]"); + } + + @Test + public void testAggFilterOnNestedFields() throws IOException { + JSONObject result = + executeQuery( + StringUtils.format( + "source=%s | stats count(eval(author.name < 'K')) as george_and_jk", + TEST_INDEX_CASCADED_NESTED)); + verifySchema(result, schema("george_and_jk", "bigint")); + verifyDataRows(result, rows(2)); + } } diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 50ee11b765a..d9b76f757f9 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -704,6 +704,11 @@ public enum Index { "_doc", getDeepNestedIndexMapping(), "src/test/resources/deep_nested_index_data.json"), + CASCADED_NESTED( + TestsConstants.TEST_INDEX_CASCADED_NESTED, + "_doc", + getMappingFile("cascaded_nested_index_mapping.json"), + "src/test/resources/cascaded_nested.json"), TELEMETRY( TestsConstants.TEST_INDEX_TELEMETRY, "_doc", diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index 76923dbd984..ad8a232bab3 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -54,6 +54,7 @@ public class TestsConstants { public static final String TEST_INDEX_TIME_DATA = TEST_INDEX + "_time_data"; public static final String TEST_INDEX_DEEP_NESTED = TEST_INDEX + "_deep_nested"; + public static final String TEST_INDEX_CASCADED_NESTED = TEST_INDEX + "_cascaded_nested"; public static final String TEST_INDEX_TELEMETRY = TEST_INDEX + "_telemetry"; public static final String TEST_INDEX_STRINGS = TEST_INDEX + "_strings"; public static final String TEST_INDEX_DATATYPE_NUMERIC = TEST_INDEX + "_datatypes_numeric"; diff --git a/integ-test/src/test/resources/cascaded_nested.json b/integ-test/src/test/resources/cascaded_nested.json new file mode 100644 index 00000000000..2f2436b59e6 --- /dev/null +++ b/integ-test/src/test/resources/cascaded_nested.json @@ -0,0 +1,6 @@ +{"index": {"_id": "1"}} +{"author": {"name": "J.K. Rowling", "books": [{"title": "Harry Potter and the Sorcerer's Stone", "reviews": [{"rating": 5, "comment": "Magical and enchanting!", "review_date": "2023-01-15"}, {"rating": 4, "comment": "Great for kids and adults", "review_date": "2023-06-22"}]}, {"title": "Harry Potter and the Chamber of Secrets", "reviews": [{"rating": 5, "comment": "Even better than the first", "review_date": "2023-02-10"}, {"rating": 4, "comment": "Darker tone emerging", "review_date": "2023-07-18"}]}]}} +{"index": {"_id": "2"}} +{"author": {"name": "George R.R. Martin", "books": [{"title": "A Game of Thrones", "reviews": [{"rating": 4, "comment": "Epic fantasy masterpiece", "review_date": "2022-11-05"}, {"rating": 3, "comment": "Too many characters to track", "review_date": "2023-03-20"}]}, {"title": "A Clash of Kings", "reviews": [{"rating": 2, "comment": "Incredible plot twists", "review_date": "2023-08-14"}]}]}} +{"index": {"_id": "3"}} +{"author": {"name": "Stephen King", "books": [{"title": "The Shining", "reviews": [{"rating": 3, "comment": "Brilliant but terrifying", "review_date": "2022-09-03"}, {"rating": 4, "comment": "Psychological horror at its best", "review_date": "2023-04-12"}, {"rating": 2, "comment": "Too slow in places", "review_date": "2023-10-28"}]}]}} diff --git a/integ-test/src/test/resources/expectedOutput/calcite/agg_filter_nested.yaml b/integ-test/src/test/resources/expectedOutput/calcite/agg_filter_nested.yaml new file mode 100644 index 00000000000..c566e7e18f4 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/agg_filter_nested.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{}], george_and_jk=[COUNT($0)]) + LogicalProject($f1=[CASE(<($7, 'K'), 1, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]]) + physical: | + EnumerableLimit(fetch=[10000]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={},george_and_jk=COUNT() FILTER $0)], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"george_and_jk":{"filter":{"nested":{"query":{"range":{"author.name":{"from":null,"to":"K","include_lower":true,"include_upper":false,"boost":1.0}}},"path":"author","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"aggregations":{"george_and_jk":{"value_count":{"field":"_index"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/filter_computed_nested.yaml b/integ-test/src/test/resources/expectedOutput/calcite/filter_computed_nested.yaml new file mode 100644 index 00000000000..e646df565b6 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/filter_computed_nested.yaml @@ -0,0 +1,9 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalFilter(condition=[>($1, 29)]) + LogicalProject(projects.name=[$3], proj_name_len=[CHAR_LENGTH($3)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]]) + physical: | + EnumerableCalc(expr#0=[{inputs}], expr#1=[CHAR_LENGTH($t0)], proj#0..1=[{exprs}]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]], PushDownContext=[[PROJECT->[projects.name], SCRIPT->>(CHAR_LENGTH($0), 29), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"nested":{"query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCIXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI+IiwKICAgICJraW5kIjogIkdSRUFURVJfVEhBTiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["projects.name",29]}},"boost":1.0}},"path":"projects","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["projects.name"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/filter_multiple_nested_cascaded_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite/filter_multiple_nested_cascaded_range.yaml new file mode 100644 index 00000000000..ef0cf5d9813 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/filter_multiple_nested_cascaded_range.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(author=[$0]) + LogicalFilter(condition=[AND(SEARCH($4, Sarg[[4..6)]), =($6, 'The Shining'))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]], PushDownContext=[[PROJECT->[author, author.books.reviews.rating, author.books.title], FILTER->AND(SEARCH($1, Sarg[[4..6)]), =($2, 'The Shining')), PROJECT->[author], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"nested":{"query":{"range":{"author.books.reviews.rating":{"from":4.0,"to":6.0,"include_lower":true,"include_upper":false,"boost":1.0}}},"path":"author.books.reviews","ignore_unmapped":false,"score_mode":"none","boost":1.0}},{"nested":{"query":{"term":{"author.books.title":{"value":"The Shining","boost":1.0}}},"path":"author.books","ignore_unmapped":false,"score_mode":"none","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["author"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_term.yaml new file mode 100644 index 00000000000..5b040b26901 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_term.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(name=[$0], address=[$1], id=[$6], age=[$7]) + LogicalFilter(condition=[=($2, 'New york city')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[PROJECT->[name, address, address.city, id, age], FILTER->=($2, 'New york city'), PROJECT->[name, address, id, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"nested":{"query":{"term":{"address.city.keyword":{"value":"New york city","boost":1.0}}},"path":"address","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["name","address","id","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_terms.yaml new file mode 100644 index 00000000000..09590ff9945 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/filter_nested_terms.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(name=[$0], address=[$1], id=[$6], age=[$7]) + LogicalFilter(condition=[SEARCH($2, Sarg['Miami':VARCHAR, 'san diego':VARCHAR]:VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]], PushDownContext=[[PROJECT->[name, address, address.city, id, age], FILTER->SEARCH($2, Sarg['Miami':VARCHAR, 'san diego':VARCHAR]:VARCHAR), PROJECT->[name, address, id, age], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"nested":{"query":{"terms":{"address.city.keyword":["Miami","san diego"],"boost":1.0}},"path":"address","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["name","address","id","age"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite/filter_root_and_nested.yaml b/integ-test/src/test/resources/expectedOutput/calcite/filter_root_and_nested.yaml new file mode 100644 index 00000000000..b3b9366b808 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite/filter_root_and_nested.yaml @@ -0,0 +1,8 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(accounts=[$0], projects=[$2], city=[$4], account=[$8]) + LogicalFilter(condition=[AND(=($7, 'Seattle'), >(CHAR_LENGTH($3), 29))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]]) + physical: | + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]], PushDownContext=[[PROJECT->[accounts, projects, projects.name, city, city.name, account], SCRIPT->AND(=($4, 'Seattle'), >(CHAR_LENGTH($2), 29)), PROJECT->[accounts, projects, city, account], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"bool":{"must":[{"term":{"city.name":{"value":"Seattle","boost":1.0}}},{"nested":{"query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCIXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI+IiwKICAgICJraW5kIjogIkdSRUFURVJfVEhBTiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["projects.name",29]}},"boost":1.0}},"path":"projects","ignore_unmapped":false,"score_mode":"none","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["accounts","projects","city","account"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_filter_nested.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_filter_nested.yaml new file mode 100644 index 00000000000..3c0a1012db7 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/agg_filter_nested.yaml @@ -0,0 +1,11 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalAggregate(group=[{}], george_and_jk=[COUNT($0)]) + LogicalProject($f1=[CASE(<($7, 'K'), 1, null:NULL)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableAggregate(group=[{}], george_and_jk=[COUNT() FILTER $0]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=['K'], expr#15=[<($t7, $t14)], expr#16=[IS TRUE($t15)], $f1=[$t16]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_computed_nested.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_computed_nested.yaml new file mode 100644 index 00000000000..5af4a2aceeb --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_computed_nested.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalFilter(condition=[>($1, 29)]) + LogicalProject(projects.name=[$3], proj_name_len=[CHAR_LENGTH($3)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=[CHAR_LENGTH($t3)], expr#17=[29], expr#18=[>($t16, $t17)], projects.name=[$t3], proj_name_len=[$t16], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_multiple_nested_cascaded_range.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_multiple_nested_cascaded_range.yaml new file mode 100644 index 00000000000..84926b80826 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_multiple_nested_cascaded_range.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(author=[$0]) + LogicalFilter(condition=[AND(SEARCH($4, Sarg[[4..6)]), =($6, 'The Shining'))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[Sarg[[4..6)]], expr#15=[SEARCH($t4, $t14)], expr#16=['The Shining':VARCHAR], expr#17=[=($t6, $t16)], expr#18=[AND($t15, $t17)], author=[$t0], $condition=[$t18]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_cascaded_nested]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_nested_term.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_nested_term.yaml new file mode 100644 index 00000000000..0e43ea85d83 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_nested_term.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(name=[$0], address=[$1], id=[$6], age=[$7]) + LogicalFilter(condition=[=($2, 'New york city')]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=['New york city':VARCHAR], expr#15=[=($t2, $t14)], proj#0..1=[{exprs}], id=[$t6], age=[$t7], $condition=[$t15]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_nested_terms.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_nested_terms.yaml new file mode 100644 index 00000000000..1db1bc3bc80 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_nested_terms.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(name=[$0], address=[$1], id=[$6], age=[$7]) + LogicalFilter(condition=[SEARCH($2, Sarg['Miami':VARCHAR, 'san diego':VARCHAR]:VARCHAR)]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..13=[{inputs}], expr#14=[Sarg['Miami':VARCHAR, 'san diego':VARCHAR]:VARCHAR], expr#15=[SEARCH($t2, $t14)], proj#0..1=[{exprs}], id=[$t6], age=[$t7], $condition=[$t15]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_nested_simple]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_root_and_nested.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_root_and_nested.yaml new file mode 100644 index 00000000000..d635e8ae8e0 --- /dev/null +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/filter_root_and_nested.yaml @@ -0,0 +1,10 @@ +calcite: + logical: | + LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) + LogicalProject(accounts=[$0], projects=[$2], city=[$4], account=[$8]) + LogicalFilter(condition=[AND(=($7, 'Seattle'), >(CHAR_LENGTH($3), 29))]) + CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]]) + physical: | + EnumerableLimit(fetch=[10000]) + EnumerableCalc(expr#0..15=[{inputs}], expr#16=['Seattle':VARCHAR], expr#17=[=($t7, $t16)], expr#18=[CHAR_LENGTH($t3)], expr#19=[29], expr#20=[>($t18, $t19)], expr#21=[AND($t17, $t20)], accounts=[$t0], projects=[$t2], city=[$t4], account=[$t8], $condition=[$t21]) + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]]) diff --git a/integ-test/src/test/resources/indexDefinitions/cascaded_nested_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/cascaded_nested_index_mapping.json new file mode 100644 index 00000000000..301a26fe662 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/cascaded_nested_index_mapping.json @@ -0,0 +1,42 @@ +{ + "mappings": { + "properties": { + "author": { + "type": "nested", + "properties": { + "name": { + "type": "keyword" + }, + "books": { + "type": "nested", + "properties": { + "title": { + "type": "keyword" + }, + "reviews": { + "type": "nested", + "properties": { + "rating": { + "type": "integer" + }, + "comment": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "review_date": { + "type": "date", + "format": "yyyy-MM-dd" + } + } + } + } + } + } + } + } + } +} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4508.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4508.yml new file mode 100644 index 00000000000..690179dcc2e --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/4508.yml @@ -0,0 +1,110 @@ +setup: + - do: + indices.create: + index: test_nested_eval_filter + body: + mappings: + properties: + "id": + type: keyword + "items": + type: nested + properties: + "name": + type: keyword + - do: + bulk: + index: test_nested_eval_filter + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"id": "order1", "items": [{"name": "apple"}]}' + - '{"index": {"_id": "2"}}' + - '{"id": "order2", "items": [{"name": "banana"}]}' + - '{"index": {"_id": "3"}}' + - '{"id": "order3", "items": [{"name": "orange"}]}' + +--- +"eval on nested field without filter": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_nested_eval_filter | eval NameLen=LENGTH(items.name) | fields id, items.name, NameLen + + - match: { total: 3 } + - match: {"schema": [{"name": "id", "type": "string"}, {"name": "items.name", "type": "string"}, {"name": "NameLen", "type": "int"}]} + - match: {"datarows": [["order1", "apple", 5], ["order2", "banana", 6], ["order3", "orange", 6]]} + +--- +"eval on nested field with filter on computed field": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_nested_eval_filter | eval NameLen=LENGTH(items.name) | fields id, items.name, NameLen | where NameLen > 5 + + - match: { total: 2 } + - match: {"schema": [{"name": "id", "type": "string"}, {"name": "items.name", "type": "string"}, {"name": "NameLen", "type": "int"}]} + - match: {"datarows": [["order2", "banana", 6], ["order3", "orange", 6]]} + +--- +"comparison with regular field - eval and filter works correctly": + - skip: + features: + - headers + - do: + indices.create: + index: test_regular_eval_filter + body: + mappings: + properties: + "id": + type: keyword + "name": + type: keyword + - do: + bulk: + index: test_regular_eval_filter + refresh: true + body: + - '{"index": {"_id": "1"}}' + - '{"id": "order1", "name": "apple"}' + - '{"index": {"_id": "2"}}' + - '{"id": "order2", "name": "banana"}' + - '{"index": {"_id": "3"}}' + - '{"id": "order3", "name": "orange"}' + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_regular_eval_filter | eval NameLen=LENGTH(name) | fields id, name, NameLen | where NameLen > 5 + + - match: { total: 2 } + - match: {"schema": [{"name": "id", "type": "string"}, {"name": "name", "type": "string"}, {"name": "NameLen", "type": "int"}]} + - match: {"datarows": [["order2", "banana", 6], ["order3", "orange", 6]]} + +--- +"filter on both nested and root level fields ": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=test_nested_eval_filter | eval NameLen=LENGTH(items.name) | where NameLen> 5 and id = 'order2' | fields id, items.name, NameLen + + - match: { total: 1 } + - match: {"schema": [{"name": "id", "type": "string"}, {"name": "items.name", "type": "string"}, {"name": "NameLen", "type": "int"}]} + - match: {"datarows": [["order2", "banana", 6]]} diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java index 43d06c5e6b1..b7f1481da67 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java @@ -34,6 +34,7 @@ import static org.opensearch.index.query.QueryBuilders.boolQuery; import static org.opensearch.index.query.QueryBuilders.existsQuery; import static org.opensearch.index.query.QueryBuilders.matchQuery; +import static org.opensearch.index.query.QueryBuilders.nestedQuery; import static org.opensearch.index.query.QueryBuilders.rangeQuery; import static org.opensearch.index.query.QueryBuilders.regexpQuery; import static org.opensearch.index.query.QueryBuilders.termQuery; @@ -44,6 +45,7 @@ import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.SINGLE_FIELD_RELEVANCE_FUNCTION_SET; import static org.opensearch.sql.opensearch.storage.script.CompoundedScriptEngine.COMPOUNDED_LANG_NAME; +import com.google.common.base.Strings; import com.google.common.collect.BoundType; import com.google.common.collect.Range; import java.math.BigDecimal; @@ -56,8 +58,11 @@ import java.util.Locale; import java.util.Map; import java.util.Set; +import java.util.function.Predicate; import java.util.function.Supplier; +import java.util.stream.Collectors; import lombok.Getter; +import lombok.RequiredArgsConstructor; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.type.RelDataType; @@ -78,9 +83,11 @@ import org.apache.calcite.util.NlsString; import org.apache.calcite.util.RangeSets; import org.apache.calcite.util.Sarg; +import org.apache.lucene.search.join.ScoreMode; import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; import org.opensearch.index.query.RangeQueryBuilder; import org.opensearch.index.query.ScriptQueryBuilder; import org.opensearch.script.Script; @@ -88,6 +95,7 @@ import org.opensearch.sql.calcite.type.ExprIPType; import org.opensearch.sql.calcite.type.ExprSqlType; import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT; +import org.opensearch.sql.calcite.utils.PlanUtils; import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils; import org.opensearch.sql.data.model.ExprIpValue; import org.opensearch.sql.data.model.ExprTimestampValue; @@ -1202,6 +1210,9 @@ public QueryBuilder builder() { if (builder == null) { throw new IllegalStateException("Builder was not initialized"); } + if (rel != null && !Strings.isNullOrEmpty(rel.nestedPath)) { + return nestedQuery(rel.nestedPath, builder, ScoreMode.None); + } return builder; } @@ -1454,6 +1465,8 @@ public static class ScriptQueryExpression extends QueryExpression { private final Supplier codeGenerator; private String generatedCode; private final ScriptParameterHelper parameterHelper; + private final Map fieldTypes; + private final List referredFields; public ScriptQueryExpression( RexNode rexNode, @@ -1475,6 +1488,12 @@ public ScriptQueryExpression( () -> SerializationWrapper.wrapWithLangType( ScriptEngineType.CALCITE, serializer.serialize(rexNode, parameterHelper)); + this.referredFields = + PlanUtils.getInputRefs(rexNode).stream() + .map(RexInputRef::getIndex) + .map(rowType.getFieldNames()::get) + .toList(); + this.fieldTypes = fieldTypes; } // For filter script, this method will be called after planning phase; @@ -1489,7 +1508,25 @@ private String getOrCreateGeneratedCode() { @Override public QueryBuilder builder() { - return new ScriptQueryBuilder(getScript()); + ScriptQueryBuilder scriptQuery = QueryBuilders.scriptQuery(getScript()); + List nestedPaths = + referredFields.stream() + .map(p -> resolveNestedPath(p, fieldTypes)) + .filter(Predicate.not(Strings::isNullOrEmpty)) + .distinct() + .collect(Collectors.toUnmodifiableList()); + if (nestedPaths.size() > 1) { + throw new UnsupportedScriptException( + String.format( + Locale.ROOT, + "Accessing multiple nested fields under different hierarchies in script is not" + + " supported: %s", + nestedPaths)); + } + if (!nestedPaths.isEmpty()) { + return nestedQuery(nestedPaths.get(0), scriptQuery, ScoreMode.None); + } + return scriptQuery; } public Script getScript() { @@ -1565,25 +1602,22 @@ static boolean isCastExpression(Expression exp) { } /** Used for bind variables. */ + @RequiredArgsConstructor public static final class NamedFieldExpression implements TerminalExpression { private final String name; private final ExprType type; + @Getter private final String nestedPath; public NamedFieldExpression( int refIndex, List schema, Map filedTypes) { this.name = refIndex >= schema.size() ? null : schema.get(refIndex); this.type = filedTypes.get(name); - } - - public NamedFieldExpression(String name, ExprType type) { - this.name = name; - this.type = type; + this.nestedPath = resolveNestedPath(name, filedTypes); } private NamedFieldExpression() { - this.name = null; - this.type = null; + this(null, null, ""); } private NamedFieldExpression( @@ -1591,11 +1625,11 @@ private NamedFieldExpression( this.name = (ref == null || ref.getIndex() >= schema.size()) ? null : schema.get(ref.getIndex()); this.type = filedTypes.get(name); + this.nestedPath = resolveNestedPath(name, filedTypes); } private NamedFieldExpression(RexLiteral literal) { - this.name = literal == null ? null : RexLiteral.stringValue(literal); - this.type = null; + this(literal == null ? null : RexLiteral.stringValue(literal), null, ""); } public String getRootName() { @@ -1802,4 +1836,33 @@ private static void checkForNestedFieldOperands(RexCall call) throws PredicateAn call.getKind())); } } + + /** + * Find the nested path for fields referenced in the expression. If multiple nested paths exist, + * returns the deepest one. + * + * @param name Field name to resolve + * @param fieldTypes Map of field names to their types. It HAS TO contain parent-level mappings + * @return The nested path, or empty string if no nested fields are found + */ + private static String resolveNestedPath(String name, Map fieldTypes) { + if (fieldTypes == null || fieldTypes.isEmpty()) { + return ""; + } + if (name.contains(".")) { + String[] parts = name.split("\\."); + // Check if the field is part of a nested structure + // Start from the deepest parent path and work backwards + // For "a.b.c.d", check "a.b.c" first, then "a.b", then "a" + for (int depth = parts.length - 1; depth > 0; depth--) { + String currentPath = String.join(".", java.util.Arrays.copyOfRange(parts, 0, depth)); + ExprType pathType = fieldTypes.get(currentPath); + // OpenSearchDataType.Nested is mapped to ExprCoreType.ARRAY + if (pathType == ExprCoreType.ARRAY) { + return currentPath; + } + } + } + return ""; + } } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java index 2821aa037da..2cb0afd1e41 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java @@ -153,10 +153,7 @@ public AbstractRelNode pushDownFilter(Filter filter) { try { RelDataType rowType = this.getRowType(); List schema = this.getRowType().getFieldNames(); - Map fieldTypes = - this.osIndex.getAllFieldTypes().entrySet().stream() - .filter(entry -> schema.contains(entry.getKey())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + Map fieldTypes = this.osIndex.getAllFieldTypes(); QueryExpression queryExpression = PredicateAnalyzer.analyzeExpression( filter.getCondition(), schema, fieldTypes, rowType, getCluster());