diff --git a/CMakeLists.txt b/CMakeLists.txt index 294af340..789eba4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,9 +28,15 @@ message(STATUS "BUILD_PYTHON_BINDINGS:${BUILD_PYTHON_BINDINGS}") option(BUILD_TOOLS "Build tools" ON) message(STATUS "BUILD_TOOLS:${BUILD_TOOLS}") +option(BUILD_EXAMPLES "Build examples" ON) +message(STATUS "BUILD_EXAMPLES:${BUILD_EXAMPLES}") + cc_directory(thirdparty) cc_directories(src) cc_directories(tests) +if(BUILD_EXAMPLES) + cc_directories(examples) +endif() if(BUILD_TOOLS) cc_directories(tools) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 00000000..66e943ad --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_subdirectory(c_api) \ No newline at end of file diff --git a/examples/c_api/CMakeLists.txt b/examples/c_api/CMakeLists.txt new file mode 100644 index 00000000..759f744f --- /dev/null +++ b/examples/c_api/CMakeLists.txt @@ -0,0 +1,65 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Basic example +add_executable(c_api_basic_example basic_example.c) +target_link_libraries(c_api_basic_example PRIVATE zvec_c_api) +target_include_directories(c_api_basic_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_basic_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + + +# Schema example +add_executable(c_api_collection_schema_example collection_schema_example.c) +target_link_libraries(c_api_collection_schema_example PRIVATE zvec_c_api) +target_include_directories(c_api_collection_schema_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_collection_schema_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + +# Struct document example +add_executable(c_api_doc_example doc_example.c) +target_link_libraries(c_api_doc_example PRIVATE zvec_c_api) +target_include_directories(c_api_doc_example PRIVATE + ${PROJECT_SOURCE_DIR}/src/include +) +set_target_properties(c_api_doc_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/examples/c_api +) + +# Index example +add_executable(c_api_index_example index_example.c) +target_link_libraries(c_api_index_example PRIVATE zvec_c_api) +set_target_properties(c_api_index_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) + +# Newly added field schema example +add_executable(c_api_field_schema_example field_schema_example.c) +target_link_libraries(c_api_field_schema_example PRIVATE zvec_c_api) +set_target_properties(c_api_field_schema_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) + +# Optimized example +add_executable(c_api_optimized_example optimized_example.c) +target_link_libraries(c_api_optimized_example PRIVATE zvec_c_api) +set_target_properties(c_api_optimized_example PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/examples/c_api +) diff --git a/examples/c_api/basic_example.c b/examples/c_api/basic_example.c new file mode 100644 index 00000000..081e631a --- /dev/null +++ b/examples/c_api/basic_example.c @@ -0,0 +1,239 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Create a simple test collection using CollectionSchema + */ +static ZVecErrorCode create_simple_test_collection( + ZVecCollection **collection) { + // Create collection schema using C API + ZVecCollectionSchema *schema = + zvec_collection_schema_create("test_collection"); + if (!schema) { + return ZVEC_ERROR_INTERNAL_ERROR; + } + + ZVecErrorCode error = ZVEC_OK; + + // Create index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + + // Create and add ID field (primary key) + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_field_schema_set_invert_index(id_field, invert_params); + error = zvec_collection_schema_add_field(schema, id_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return error; + } + + // Create text field (inverted index) + ZVecFieldSchema *text_field = + zvec_field_schema_create("text", ZVEC_DATA_TYPE_STRING, true, 0); + zvec_field_schema_set_invert_index(text_field, invert_params); + error = zvec_collection_schema_add_field(schema, text_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return error; + } + + // Create embedding field (HNSW index) + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, embedding_field); + if (error != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return error; + } + + // Use default options + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + + // Create collection using the new API + error = zvec_collection_create_and_open("./test_collection", schema, &options, + collection); + + // Cleanup resources + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + + return error; +} + +/** + * @brief Basic C API usage example + */ +int main() { + printf("=== ZVec C API Basic Example ===\n\n"); + + ZVecErrorCode error; + + // Create collection using simplified function + ZVecCollection *collection = NULL; + error = create_simple_test_collection(&collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + return 1; + } + printf("✓ Collection created successfully\n"); + + // Prepare test data + float vector1[] = {0.1f, 0.2f, 0.3f}; + float vector2[] = {0.4f, 0.5f, 0.6f}; + + ZVecDoc *docs[2]; + for (int i = 0; i < 2; ++i) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup allocated resources + for (int j = 0; j < i; ++j) { + zvec_doc_destroy(docs[j]); + } + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + + // Manually add fields to document 1 + zvec_doc_set_pk(docs[0], "doc1"); + zvec_doc_add_field_by_value(docs[0], "id", ZVEC_DATA_TYPE_STRING, "doc1", + strlen("doc1")); + zvec_doc_add_field_by_value(docs[0], "text", ZVEC_DATA_TYPE_STRING, + "First document", strlen("First document")); + zvec_doc_add_field_by_value(docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector1, 3 * sizeof(float)); + + // Manually add fields to document 2 + zvec_doc_set_pk(docs[1], "doc2"); + zvec_doc_add_field_by_value(docs[1], "id", ZVEC_DATA_TYPE_STRING, "doc2", + strlen("doc2")); + zvec_doc_add_field_by_value(docs[1], "text", ZVEC_DATA_TYPE_STRING, + "Second document", strlen("Second document")); + zvec_doc_add_field_by_value(docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector2, 3 * sizeof(float)); + + // Insert documents + size_t success_count = 0; + size_t error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") != ZVEC_OK) { + zvec_collection_destroy(collection); + return 1; + } + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + for (int i = 0; i < 2; ++i) { + zvec_doc_destroy(docs[i]); + } + + // Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") != ZVEC_OK) { + printf("Collection flush failed\n"); + } else { + printf("✓ Collection flushed successfully\n"); + } + + // Get collection statistics + ZVecCollectionStats *stats = NULL; + error = zvec_collection_get_stats(collection, &stats); + if (handle_error(error, "getting collection stats") == ZVEC_OK) { + printf("✓ Collection stats - Document count: %llu\n", + (unsigned long long)stats->doc_count); + // Free statistics memory + zvec_collection_stats_destroy(stats); + } + + printf("Testing vector query...\n"); + // Query documents + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = + (ZVecByteArray){.data = (uint8_t *)vector1, .length = 3 * sizeof(float)}; + query.topk = 10; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = true; + query.include_doc_id = true; + query.output_fields = NULL; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + printf("[ERROR] Query failed: %s\n", + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + goto cleanup; + } + + printf("✓ Query successful - Returned %zu results\n", result_count); + + // Process query results + for (size_t i = 0; i < result_count && i < 5; ++i) { + const ZVecDoc *doc = results[i]; + const char *pk = zvec_doc_get_pk_copy(doc); + + printf(" Result %zu: PK=%s, DocID=%llu, Score=%.4f\n", i + 1, + pk ? pk : "NULL", (unsigned long long)zvec_doc_get_doc_id(doc), + zvec_doc_get_score(doc)); + + if (pk) { + free((void *)pk); + } + } + + // Free query results memory + zvec_docs_free(results, result_count); + +cleanup: + // Cleanup resources + zvec_collection_destroy(collection); + printf("✓ Example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/collection_schema_example.c b/examples/c_api/collection_schema_example.c new file mode 100644 index 00000000..af66daa0 --- /dev/null +++ b/examples/c_api/collection_schema_example.c @@ -0,0 +1,252 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Collection schema creation and management example + */ +int main() { + printf("=== ZVec Collection Schema Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("schema_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return 1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Set schema properties + schema->max_doc_count_per_segment = 1000000; + printf("✓ Set max documents per segment: %llu\n", + (unsigned long long)schema->max_doc_count_per_segment); + + // 3. Create index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + + if (!invert_params || !hnsw_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return 1; + } + + // 4. Create and add ID field (primary key) + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (!id_field) { + fprintf(stderr, "Failed to create ID field\n"); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ ID field added successfully\n"); + + // 5. Create and add text field with inverted index + ZVecFieldSchema *text_field = + zvec_field_schema_create("content", ZVEC_DATA_TYPE_STRING, true, 0); + if (!text_field) { + fprintf(stderr, "Failed to create text field\n"); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + + zvec_field_schema_set_invert_index(text_field, invert_params); + error = zvec_collection_schema_add_field(schema, text_field); + if (handle_error(error, "adding text field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Text field with inverted index added successfully\n"); + + // 6. Create and add vector field with HNSW index + ZVecFieldSchema *vector_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (!vector_field) { + fprintf(stderr, "Failed to create vector field\n"); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + + zvec_field_schema_set_hnsw_index(vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, vector_field); + if (handle_error(error, "adding vector field") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Vector field with HNSW index added successfully\n"); + + // 7. Check field count + // Note: This function may not exist in current API, commenting out for now + // size_t field_count = zvec_collection_schema_get_field_count(schema); + // printf("✓ Total field count: %zu\n", field_count); + + // 8. Create collection with schema + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./schema_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection with schema") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Collection created successfully with schema\n"); + + // 9. Prepare test data + float vector1[128]; + float vector2[128]; + for (int i = 0; i < 128; i++) { + vector1[i] = (float)(i + 1) / 128.0f; + vector2[i] = (float)(i + 2) / 128.0f; + } + + // 10. Create documents + ZVecDoc *docs[2]; + for (int i = 0; i < 2; i++) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(docs[j]); + } + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + } + + // Add fields to document 1 + zvec_doc_set_pk(docs[0], "doc1"); + zvec_doc_add_field_by_value(docs[0], "id", ZVEC_DATA_TYPE_STRING, "doc1", + strlen("doc1")); + zvec_doc_add_field_by_value(docs[0], "content", ZVEC_DATA_TYPE_STRING, + "First test document", + strlen("First test document")); + zvec_doc_add_field_by_value(docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector1, 128 * sizeof(float)); + + // Add fields to document 2 + zvec_doc_set_pk(docs[1], "doc2"); + zvec_doc_add_field_by_value(docs[1], "id", ZVEC_DATA_TYPE_STRING, "doc2", + strlen("doc2")); + zvec_doc_add_field_by_value(docs[1], "content", ZVEC_DATA_TYPE_STRING, + "Second test document", + strlen("Second test document")); + zvec_doc_add_field_by_value(docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, + vector2, 128 * sizeof(float)); + + // 11. Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") != ZVEC_OK) { + // Cleanup + for (int i = 0; i < 2; i++) { + zvec_doc_destroy(docs[i]); + } + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return 1; + } + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + + // Cleanup documents + for (int i = 0; i < 2; i++) { + zvec_doc_destroy(docs[i]); + } + + // 12. Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") == ZVEC_OK) { + printf("✓ Collection flushed successfully\n"); + } + + // 13. Query test + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)vector1, + .length = 128 * sizeof(float)}; + query.topk = 5; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = true; + query.include_doc_id = true; + query.output_fields = NULL; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error == ZVEC_OK) { + printf("✓ Vector query successful - Returned %zu results\n", result_count); + zvec_docs_free(results, result_count); + } + + // 14. Cleanup resources + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + printf("✓ Schema example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/c_api/doc_example.c b/examples/c_api/doc_example.c new file mode 100644 index 00000000..81a74506 --- /dev/null +++ b/examples/c_api/doc_example.c @@ -0,0 +1,520 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Create a test document with all data types + * @param doc_index Document index for generating unique data + * @return ZVecDoc* Created document pointer + */ +static ZVecDoc *create_full_type_test_doc(int doc_index) { + ZVecDoc *doc = zvec_doc_create(); + if (!doc) { + fprintf(stderr, "Failed to create document\n"); + return NULL; + } + + // Set primary key + char pk_buffer[32]; + snprintf(pk_buffer, sizeof(pk_buffer), "doc_%d", doc_index); + zvec_doc_set_pk(doc, pk_buffer); + + // Add Id field with inverted index + char id_buffer[32]; + snprintf(id_buffer, sizeof(id_buffer), "id_%d", doc_index); + zvec_doc_add_field_by_value(doc, "id", ZVEC_DATA_TYPE_STRING, id_buffer, + strlen(id_buffer)); + + // Add scalar fields with different data types + // String field + char string_value[64]; + snprintf(string_value, sizeof(string_value), "test_string_%d", doc_index); + zvec_doc_add_field_by_value(doc, "string_field", ZVEC_DATA_TYPE_STRING, + string_value, strlen(string_value)); + + // Boolean field + bool bool_value = (doc_index % 2 == 0); + zvec_doc_add_field_by_value(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_value, sizeof(bool_value)); + + // Integer fields + int32_t int32_value = doc_index * 1000; + zvec_doc_add_field_by_value(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_value, sizeof(int32_value)); + + int64_t int64_value = (int64_t)doc_index * 1000000LL; + zvec_doc_add_field_by_value(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_value, sizeof(int64_value)); + + // Floating point fields + float float_value = (float)doc_index * 1.5f; + zvec_doc_add_field_by_value(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_value, sizeof(float_value)); + + double double_value = (double)doc_index * 2.718281828; + zvec_doc_add_field_by_value(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_value, sizeof(double_value)); + + // Vector fields with different dimensions + // FP32 vector (3D) + float fp32_vector[3] = {(float)doc_index, (float)doc_index * 2.0f, + (float)doc_index * 3.0f}; + zvec_doc_add_field_by_value(doc, "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, + fp32_vector, 3 * sizeof(float)); + + // Larger FP32 vector (16D) + float large_vector[16]; + for (int i = 0; i < 16; i++) { + large_vector[i] = (float)(doc_index * 16 + i) / 256.0f; + } + zvec_doc_add_field_by_value(doc, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vector, 16 * sizeof(float)); + + return doc; +} + +/** + * @brief Compare two documents for equality + */ +static bool compare_documents(const ZVecDoc *doc1, const ZVecDoc *doc2) { + if (!doc1 || !doc2) return false; + + // Compare primary keys + const char *pk1 = zvec_doc_get_pk_pointer(doc1); + const char *pk2 = zvec_doc_get_pk_pointer(doc2); + + if (!pk1 || !pk2 || strcmp(pk1, pk2) != 0) { + return false; + } + + // TODO: Compare other fields and values + + return true; +} + +/** + * @brief Print document fields and their values + * @param doc The document to print + * @param doc_index Document index for identification + */ +static void print_doc(const ZVecDoc *doc, int doc_index) { + if (!doc) { + printf("Document %d: NULL document\n", doc_index); + return; + } + + printf("\n=== Document %d ===\n", doc_index); + + // Print primary key + const char *pk = zvec_doc_get_pk_pointer(doc); + printf("Primary Key: %s\n", pk ? pk : "NULL"); + + // Print document ID + uint64_t doc_id = zvec_doc_get_doc_id(doc); + printf("Document ID: %llu\n", (unsigned long long)doc_id); + + // Print score + float score = zvec_doc_get_score(doc); + printf("Score: %.6f\n", score); + + // Print scalar fields + printf("\nScalar Fields:\n"); + + // ID field (using pointer function for strings) + const void *id_value = NULL; + size_t id_size = 0; + ZVecErrorCode error = zvec_doc_get_field_value_pointer( + doc, "id", ZVEC_DATA_TYPE_STRING, &id_value, &id_size); + if (error == ZVEC_OK && id_value) { + printf(" id: %.*s\n", (int)id_size, (const char *)id_value); + } + + // String field (using pointer function for strings) + const void *string_value = NULL; + size_t string_size = 0; + error = zvec_doc_get_field_value_pointer( + doc, "string_field", ZVEC_DATA_TYPE_STRING, &string_value, &string_size); + if (error == ZVEC_OK && string_value) { + printf(" string_field: %.*s\n", (int)string_size, + (const char *)string_value); + } + + // Boolean field + bool bool_value; + error = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_value, sizeof(bool_value)); + if (error == ZVEC_OK) { + printf(" bool_field: %s\n", bool_value ? "true" : "false"); + } + + // Int32 field + int32_t int32_value; + error = + zvec_doc_get_field_value_basic(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_value, sizeof(int32_value)); + if (error == ZVEC_OK) { + printf(" int32_field: %d\n", int32_value); + } + + // Int64 field + int64_t int64_value; + error = + zvec_doc_get_field_value_basic(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_value, sizeof(int64_value)); + if (error == ZVEC_OK) { + printf(" int64_field: %lld\n", (long long)int64_value); + } + + // Float field + float float_value; + error = + zvec_doc_get_field_value_basic(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_value, sizeof(float_value)); + if (error == ZVEC_OK) { + printf(" float_field: %.6f\n", float_value); + } + + // Double field + double double_value; + error = + zvec_doc_get_field_value_basic(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_value, sizeof(double_value)); + if (error == ZVEC_OK) { + printf(" double_field: %.6f\n", double_value); + } + + // Print vector fields (using copy function for complex types) + printf("\nVector Fields:\n"); + + // FP32 vector (3D) + void *fp32_vector = NULL; + size_t fp32_size = 0; + error = zvec_doc_get_field_value_copy( + doc, "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, &fp32_vector, &fp32_size); + if (error == ZVEC_OK && fp32_vector) { + const float *vec = (const float *)fp32_vector; + size_t dim = fp32_size / sizeof(float); + printf(" vector_fp32 (%zuD): [", dim); + for (size_t i = 0; i < dim && i < 10; i++) { // Limit to first 10 elements + printf("%.3f", vec[i]); + if (i < dim - 1 && i < 9) printf(", "); + } + if (dim > 10) printf(", ..."); + printf("]\n"); + free(fp32_vector); // Free the allocated memory + } + + // Large vector (16D) + void *large_vector = NULL; + size_t large_size = 0; + error = zvec_doc_get_field_value_copy(doc, "large_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, + &large_vector, &large_size); + if (error == ZVEC_OK && large_vector) { + const float *vec = (const float *)large_vector; + size_t dim = large_size / sizeof(float); + printf(" large_vector (%zuD): [", dim); + for (size_t i = 0; i < dim && i < 10; i++) { // Limit to first 10 elements + printf("%.3f", vec[i]); + if (i < dim - 1 && i < 9) printf(", "); + } + if (dim > 10) printf(", ..."); + printf("]\n"); + free(large_vector); // Free the allocated memory + } + + printf("==================\n\n"); +} + +/** + * @brief Document creation, manipulation, and query example + */ +int main() { + printf("=== ZVec Document Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema for document testing + ZVecCollectionSchema *schema = + zvec_collection_schema_create("doc_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created\n"); + + // 2. Create index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + + if (!invert_params || !hnsw_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields for all data types + printf("Creating fields for all data types...\n"); + + // Id field with inverted index + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (id_field) { + zvec_field_schema_set_invert_index(id_field, invert_params); + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") == ZVEC_OK) { + printf("✓ ID field with inverted index added\n"); + } + } + + // Scalar fields + ZVecFieldSchema *string_field = + zvec_field_schema_create("string_field", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecFieldSchema *bool_field = + zvec_field_schema_create("bool_field", ZVEC_DATA_TYPE_BOOL, true, 0); + ZVecFieldSchema *int32_field = + zvec_field_schema_create("int32_field", ZVEC_DATA_TYPE_INT32, true, 0); + ZVecFieldSchema *int64_field = + zvec_field_schema_create("int64_field", ZVEC_DATA_TYPE_INT64, true, 0); + ZVecFieldSchema *float_field = + zvec_field_schema_create("float_field", ZVEC_DATA_TYPE_FLOAT, true, 0); + ZVecFieldSchema *double_field = + zvec_field_schema_create("double_field", ZVEC_DATA_TYPE_DOUBLE, true, 0); + + if (string_field) zvec_collection_schema_add_field(schema, string_field); + if (bool_field) zvec_collection_schema_add_field(schema, bool_field); + if (int32_field) zvec_collection_schema_add_field(schema, int32_field); + if (int64_field) zvec_collection_schema_add_field(schema, int64_field); + if (float_field) zvec_collection_schema_add_field(schema, float_field); + if (double_field) zvec_collection_schema_add_field(schema, double_field); + + // Vector fields + ZVecFieldSchema *vector_fp32_field = zvec_field_schema_create( + "vector_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, false, 3); + ZVecFieldSchema *large_vector_field = zvec_field_schema_create( + "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 16); + + if (vector_fp32_field) { + zvec_field_schema_set_hnsw_index(vector_fp32_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, vector_fp32_field); + if (handle_error(error, "adding vector FP32 field") == ZVEC_OK) { + printf("✓ Vector FP32 field with HNSW index added\n"); + } + } + + if (large_vector_field) { + zvec_field_schema_set_hnsw_index(large_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, large_vector_field); + if (handle_error(error, "adding large vector field") == ZVEC_OK) { + printf("✓ Large vector field with HNSW index added\n"); + } + } + + // 4. Create collection + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./doc_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 5. Create and insert multiple test documents + printf("Creating and inserting test documents...\n"); + + const int doc_count = 5; + ZVecDoc *test_docs[doc_count]; + + for (int i = 0; i < doc_count; i++) { + test_docs[i] = create_full_type_test_doc(i); + if (!test_docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(test_docs[j]); + } + goto cleanup; + } + printf("✓ Created document %d with PK: %s\n", i, + zvec_doc_get_pk_pointer(test_docs[i])); + } + + // Print all documents before insertion + printf("\nDocuments before insertion:\n"); + for (int i = 0; i < doc_count; i++) { + print_doc(test_docs[i], i); + } + + // Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)test_docs, + doc_count, &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // 6. Flush collection + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") != ZVEC_OK) { + printf("Warning: Collection flush failed\n"); + } else { + printf("✓ Collection flushed successfully\n"); + } + + // Use the first document's vector for querying + float query_vector[] = {0.0f, 0.0f, 0.0f}; + ZVecVectorQuery query = { + .field_name = + (ZVecString){.data = "vector_fp32", .length = strlen("vector_fp32")}, + .query_vector = (ZVecByteArray){.data = (uint8_t *)query_vector, + .length = 3 * sizeof(float)}, + .topk = 5, + .filter = (ZVecString){.data = "", .length = 0}, + .include_vector = true, + .include_doc_id = true, + .output_fields = NULL}; + + ZVecDoc **query_results = NULL; + size_t result_count = 0; + + error = + zvec_collection_query(collection, &query, &query_results, &result_count); + if (handle_error(error, "querying documents") != ZVEC_OK) { + query_results = NULL; + result_count = 0; + } + + printf("Query returned %zu results\n", result_count); + + // Print query results + printf("\nQuery Results:\n"); + for (size_t i = 0; i < result_count; i++) { + print_doc(query_results[i], i); + } + + // Compare query results + for (size_t i = 0; i < result_count && i < doc_count; i++) { + const char *result_pk = zvec_doc_get_pk_pointer(query_results[i]); + printf("Comparing query result[%zu]: %s\n", i, result_pk); + + // Find matching original document + bool found = false; + for (int j = 0; j < doc_count; j++) { + const char *original_pk = zvec_doc_get_pk_pointer(test_docs[j]); + if (strcmp(result_pk, original_pk) == 0) { + if (compare_documents(test_docs[j], query_results[i])) { + printf("✓ Query result %s matches original document\n", result_pk); + } else { + printf("✗ Query result %s does not match original document\n", + result_pk); + } + found = true; + break; + } + } + + if (!found) { + printf("⚠ Original document not found for: %s\n", result_pk); + } + } + + // 7. Filter query test + printf("\n=== Filter Query Test ===\n"); + + // Create filtered query + ZVecVectorQuery filtered_query = query; + filtered_query.filter = + (ZVecString){.data = "string_field = 'string_field_0'", + .length = strlen("string_field = 'string_field_0'")}; + + ZVecDoc **filtered_results = NULL; + size_t filtered_count = 0; + + error = zvec_collection_query(collection, &filtered_query, &filtered_results, + &filtered_count); + if (handle_error(error, "filtered querying") == ZVEC_OK) { + printf("Filtered query returned %zu results\n", filtered_count); + + // Verify filter results + bool filter_correct = true; + for (size_t i = 0; i < filtered_count; i++) { + // Note: Field value access may require different API + // For now, we'll just check that we got results + const char *pk = zvec_doc_get_pk_pointer(filtered_results[i]); + if (strstr(pk, "doc_") == NULL) { + filter_correct = false; + break; + } + } + + if (filter_correct) { + printf("✓ Filter query results are correct\n"); + } else { + printf("✗ Filter query results are incorrect\n"); + } + + if (filtered_results) { + zvec_docs_free(filtered_results, filtered_count); + } + } + + // 8. Cleanup query results + if (query_results) { + zvec_docs_free(query_results, result_count); + } + + // 9. Cleanup documents + for (int i = 0; i < doc_count; i++) { + zvec_doc_destroy(test_docs[i]); + } + + // 10. Final cleanup +cleanup: + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + + printf("✓ Document example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/examples/c_api/field_schema_example.c b/examples/c_api/field_schema_example.c new file mode 100644 index 00000000..2c1bd9d7 --- /dev/null +++ b/examples/c_api/field_schema_example.c @@ -0,0 +1,281 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Field schema creation and management example + */ +int main() { + printf("=== ZVec Field Schema Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("field_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Create different types of index parameters + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 200, 50); + ZVecFlatIndexParams *flat_params = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + + if (!invert_params || !hnsw_params || !flat_params) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create scalar fields with different data types + printf("Creating scalar fields...\n"); + + // String field with inverted index + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + if (name_field) { + zvec_field_schema_set_invert_index(name_field, invert_params); + error = zvec_collection_schema_add_field(schema, name_field); + if (handle_error(error, "adding name field") == ZVEC_OK) { + printf("✓ String field 'name' with inverted index added\n"); + } + } + + // Integer field + ZVecFieldSchema *age_field = + zvec_field_schema_create("age", ZVEC_DATA_TYPE_INT32, true, 0); + if (age_field) { + error = zvec_collection_schema_add_field(schema, age_field); + if (handle_error(error, "adding age field") == ZVEC_OK) { + printf("✓ Integer field 'age' added\n"); + } + } + + // Float field + ZVecFieldSchema *score_field = + zvec_field_schema_create("score", ZVEC_DATA_TYPE_FLOAT, true, 0); + if (score_field) { + error = zvec_collection_schema_add_field(schema, score_field); + if (handle_error(error, "adding score field") == ZVEC_OK) { + printf("✓ Float field 'score' added\n"); + } + } + + // Boolean field + ZVecFieldSchema *active_field = + zvec_field_schema_create("active", ZVEC_DATA_TYPE_BOOL, false, 0); + if (active_field) { + error = zvec_collection_schema_add_field(schema, active_field); + if (handle_error(error, "adding active field") == ZVEC_OK) { + printf("✓ Boolean field 'active' added\n"); + } + } + + // 4. Create vector fields with different dimensions and indexes + printf("Creating vector fields...\n"); + + // Small dimension vector with HNSW index + ZVecFieldSchema *small_vector_field = zvec_field_schema_create( + "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 32); + if (small_vector_field) { + zvec_field_schema_set_hnsw_index(small_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, small_vector_field); + if (handle_error(error, "adding small vector field") == ZVEC_OK) { + printf( + "✓ Small vector field 'small_vector' (32D) with HNSW index added\n"); + } + } + + // Medium dimension vector with Flat index + ZVecFieldSchema *medium_vector_field = zvec_field_schema_create( + "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (medium_vector_field) { + zvec_field_schema_set_flat_index(medium_vector_field, flat_params); + error = zvec_collection_schema_add_field(schema, medium_vector_field); + if (handle_error(error, "adding medium vector field") == ZVEC_OK) { + printf( + "✓ Medium vector field 'medium_vector' (128D) with Flat index " + "added\n"); + } + } + + // Large dimension vector with HNSW index + ZVecFieldSchema *large_vector_field = zvec_field_schema_create( + "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 512); + if (large_vector_field) { + zvec_field_schema_set_hnsw_index(large_vector_field, hnsw_params); + error = zvec_collection_schema_add_field(schema, large_vector_field); + if (handle_error(error, "adding large vector field") == ZVEC_OK) { + printf( + "✓ Large vector field 'large_vector' (512D) with HNSW index added\n"); + } + } + + // 5. Create collection with the schema + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./field_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + zvec_index_params_flat_destroy(flat_params); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 6. Create test documents with various field types + printf("Creating test documents...\n"); + + ZVecDoc *doc1 = zvec_doc_create(); + ZVecDoc *doc2 = zvec_doc_create(); + + if (!doc1 || !doc2) { + fprintf(stderr, "Failed to create documents\n"); + goto cleanup; + } + + // Document 1 + zvec_doc_set_pk(doc1, "user1"); + zvec_doc_add_field_by_value(doc1, "name", ZVEC_DATA_TYPE_STRING, + "Alice Johnson", strlen("Alice Johnson")); + int32_t age1 = 28; + zvec_doc_add_field_by_value(doc1, "age", ZVEC_DATA_TYPE_INT32, &age1, + sizeof(age1)); + float score1 = 87.5f; + zvec_doc_add_field_by_value(doc1, "score", ZVEC_DATA_TYPE_FLOAT, &score1, + sizeof(score1)); + bool active1 = true; + zvec_doc_add_field_by_value(doc1, "active", ZVEC_DATA_TYPE_BOOL, &active1, + sizeof(active1)); + + // Add vector data + float small_vec1[32]; + float medium_vec1[128]; + float large_vec1[512]; + + for (int i = 0; i < 32; i++) small_vec1[i] = (float)i / 32.0f; + for (int i = 0; i < 128; i++) medium_vec1[i] = (float)i / 128.0f; + for (int i = 0; i < 512; i++) large_vec1[i] = (float)i / 512.0f; + + zvec_doc_add_field_by_value(doc1, "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + small_vec1, 32 * sizeof(float)); + zvec_doc_add_field_by_value(doc1, "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + medium_vec1, 128 * sizeof(float)); + zvec_doc_add_field_by_value(doc1, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vec1, 512 * sizeof(float)); + + // Document 2 + zvec_doc_set_pk(doc2, "user2"); + zvec_doc_add_field_by_value(doc2, "name", ZVEC_DATA_TYPE_STRING, "Bob Smith", + strlen("Bob Smith")); + int32_t age2 = 35; + zvec_doc_add_field_by_value(doc2, "age", ZVEC_DATA_TYPE_INT32, &age2, + sizeof(age2)); + float score2 = 92.0f; + zvec_doc_add_field_by_value(doc2, "score", ZVEC_DATA_TYPE_FLOAT, &score2, + sizeof(score2)); + bool active2 = false; + zvec_doc_add_field_by_value(doc2, "active", ZVEC_DATA_TYPE_BOOL, &active2, + sizeof(active2)); + + // Add vector data + float small_vec2[32]; + float medium_vec2[128]; + float large_vec2[512]; + + for (int i = 0; i < 32; i++) small_vec2[i] = (float)(32 - i) / 32.0f; + for (int i = 0; i < 128; i++) medium_vec2[i] = (float)(128 - i) / 128.0f; + for (int i = 0; i < 512; i++) large_vec2[i] = (float)(512 - i) / 512.0f; + + zvec_doc_add_field_by_value(doc2, "small_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + small_vec2, 32 * sizeof(float)); + zvec_doc_add_field_by_value(doc2, "medium_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + medium_vec2, 128 * sizeof(float)); + zvec_doc_add_field_by_value(doc2, "large_vector", ZVEC_DATA_TYPE_VECTOR_FP32, + large_vec2, 512 * sizeof(float)); + + // 7. Insert documents + ZVecDoc *docs[] = {doc1, doc2}; + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 2, + &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // 8. Flush and test queries + zvec_collection_flush(collection); + printf("✓ Collection flushed\n"); + + // Test vector query on medium vector field + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "medium_vector", .length = strlen("medium_vector")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)medium_vec1, + .length = 128 * sizeof(float)}; + query.topk = 2; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = false; + query.include_doc_id = true; + query.output_fields = NULL; + + ZVecDoc **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error == ZVEC_OK) { + printf("✓ Vector query successful - Found %zu results\n", result_count); + zvec_docs_free(results, result_count); + } + + // 9. Cleanup +cleanup: + if (doc1) zvec_doc_destroy(doc1); + if (doc2) zvec_doc_destroy(doc2); + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + zvec_index_params_invert_destroy(invert_params); + zvec_index_params_hnsw_destroy(hnsw_params); + zvec_index_params_flat_destroy(flat_params); + + printf("✓ Field schema example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/index_example.c b/examples/c_api/index_example.c new file mode 100644 index 00000000..9e163930 --- /dev/null +++ b/examples/c_api/index_example.c @@ -0,0 +1,328 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Index creation and management example + */ +int main() { + printf("=== ZVec Index Example ===\n\n"); + + ZVecErrorCode error; + + // 1. Create collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("index_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created successfully\n"); + + // 2. Create different index parameter configurations + printf("Creating index parameters...\n"); + + // Inverted index parameters + ZVecInvertIndexParams *invert_params_standard = + zvec_index_params_invert_create(true, false); + ZVecInvertIndexParams *invert_params_extended = + zvec_index_params_invert_create(true, true); + + // HNSW index parameters with different configurations + ZVecHnswIndexParams *hnsw_params_fast = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + ZVecHnswIndexParams *hnsw_params_balanced = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 32, 200, 100); + ZVecHnswIndexParams *hnsw_params_accurate = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_IP, ZVEC_QUANTIZE_TYPE_UNDEFINED, 64, 400, 200); + + // Flat index parameters + ZVecFlatIndexParams *flat_params_l2 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecFlatIndexParams *flat_params_cosine = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED); + + if (!invert_params_standard || !invert_params_extended || !hnsw_params_fast || + !hnsw_params_balanced || !hnsw_params_accurate || !flat_params_l2 || + !flat_params_cosine) { + fprintf(stderr, "Failed to create index parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields with different index types + printf("Creating fields with various index types...\n"); + + // Fields with inverted indexes + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + if (id_field) { + zvec_field_schema_set_invert_index(id_field, invert_params_standard); + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") == ZVEC_OK) { + printf("✓ ID field with standard inverted index added\n"); + } + } + + ZVecFieldSchema *category_field = + zvec_field_schema_create("category", ZVEC_DATA_TYPE_STRING, true, 0); + if (category_field) { + zvec_field_schema_set_invert_index(category_field, invert_params_extended); + error = zvec_collection_schema_add_field(schema, category_field); + if (handle_error(error, "adding category field") == ZVEC_OK) { + printf("✓ Category field with extended inverted index added\n"); + } + } + + // Vector fields with HNSW indexes (different configurations) + ZVecFieldSchema *fast_search_field = zvec_field_schema_create( + "fast_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 64); + if (fast_search_field) { + zvec_field_schema_set_hnsw_index(fast_search_field, hnsw_params_fast); + error = zvec_collection_schema_add_field(schema, fast_search_field); + if (handle_error(error, "adding fast search field") == ZVEC_OK) { + printf("✓ Fast search vector field (64D) with HNSW index added\n"); + } + } + + ZVecFieldSchema *balanced_field = zvec_field_schema_create( + "balanced_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (balanced_field) { + zvec_field_schema_set_hnsw_index(balanced_field, hnsw_params_balanced); + error = zvec_collection_schema_add_field(schema, balanced_field); + if (handle_error(error, "adding balanced field") == ZVEC_OK) { + printf("✓ Balanced vector field (128D) with HNSW index added\n"); + } + } + + ZVecFieldSchema *accurate_field = zvec_field_schema_create( + "accurate_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 256); + if (accurate_field) { + zvec_field_schema_set_hnsw_index(accurate_field, hnsw_params_accurate); + error = zvec_collection_schema_add_field(schema, accurate_field); + if (handle_error(error, "adding accurate field") == ZVEC_OK) { + printf("✓ Accurate vector field (256D) with HNSW index added\n"); + } + } + + // Vector field with Flat index + ZVecFieldSchema *exact_field = zvec_field_schema_create( + "exact_vector", ZVEC_DATA_TYPE_VECTOR_FP32, false, 32); + if (exact_field) { + zvec_field_schema_set_flat_index(exact_field, flat_params_l2); + error = zvec_collection_schema_add_field(schema, exact_field); + if (handle_error(error, "adding exact field") == ZVEC_OK) { + printf("✓ Exact search vector field (32D) with Flat index added\n"); + } + } + + // 4. Create collection + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + ZVecCollection *collection = NULL; + + error = zvec_collection_create_and_open("./index_example_collection", schema, + &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + zvec_collection_schema_destroy(schema); + // Cleanup index parameters + zvec_index_params_invert_destroy(invert_params_standard); + zvec_index_params_invert_destroy(invert_params_extended); + zvec_index_params_hnsw_destroy(hnsw_params_fast); + zvec_index_params_hnsw_destroy(hnsw_params_balanced); + zvec_index_params_hnsw_destroy(hnsw_params_accurate); + zvec_index_params_flat_destroy(flat_params_l2); + zvec_index_params_flat_destroy(flat_params_cosine); + return -1; + } + printf("✓ Collection created successfully\n"); + + // 5. Create test data + printf("Creating test documents...\n"); + + ZVecDoc *docs[3]; + for (int i = 0; i < 3; i++) { + docs[i] = zvec_doc_create(); + if (!docs[i]) { + fprintf(stderr, "Failed to create document %d\n", i); + // Cleanup + for (int j = 0; j < i; j++) { + zvec_doc_destroy(docs[j]); + } + goto cleanup; + } + } + + // Prepare vector data + float fast_vec[3][64]; + float balanced_vec[3][128]; + float accurate_vec[3][256]; + float exact_vec[3][32]; + + // Generate different vector patterns for testing + for (int doc_idx = 0; doc_idx < 3; doc_idx++) { + for (int i = 0; i < 64; i++) { + fast_vec[doc_idx][i] = (float)(doc_idx * 64 + i) / (64.0f * 3.0f); + } + for (int i = 0; i < 128; i++) { + balanced_vec[doc_idx][i] = (float)(doc_idx * 128 + i) / (128.0f * 3.0f); + } + for (int i = 0; i < 256; i++) { + accurate_vec[doc_idx][i] = (float)(doc_idx * 256 + i) / (256.0f * 3.0f); + } + for (int i = 0; i < 32; i++) { + exact_vec[doc_idx][i] = (float)(doc_idx * 32 + i) / (32.0f * 3.0f); + } + } + + // Populate documents + for (int i = 0; i < 3; i++) { + char pk[16]; + snprintf(pk, sizeof(pk), "doc%d", i + 1); + zvec_doc_set_pk(docs[i], pk); + + char id_val[16]; + snprintf(id_val, sizeof(id_val), "ID_%d", i + 1); + zvec_doc_add_field_by_value(docs[i], "id", ZVEC_DATA_TYPE_STRING, id_val, + strlen(id_val)); + + char category_val[16]; + snprintf(category_val, sizeof(category_val), "cat_%d", (i % 2) + 1); + zvec_doc_add_field_by_value(docs[i], "category", ZVEC_DATA_TYPE_STRING, + category_val, strlen(category_val)); + + zvec_doc_add_field_by_value(docs[i], "fast_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, fast_vec[i], + 64 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "balanced_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, balanced_vec[i], + 128 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "accurate_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, accurate_vec[i], + 256 * sizeof(float)); + zvec_doc_add_field_by_value(docs[i], "exact_vector", + ZVEC_DATA_TYPE_VECTOR_FP32, exact_vec[i], + 32 * sizeof(float)); + } + + // 6. Insert documents + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const ZVecDoc **)docs, 3, + &success_count, &error_count); + if (handle_error(error, "inserting documents") == ZVEC_OK) { + printf("✓ Documents inserted - Success: %zu, Failed: %zu\n", success_count, + error_count); + } + + // Cleanup documents + for (int i = 0; i < 3; i++) { + zvec_doc_destroy(docs[i]); + } + + // 7. Flush collection to build indexes + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") == ZVEC_OK) { + printf("✓ Collection flushed - indexes built\n"); + } + + // 8. Test different query types + printf("Testing various index queries...\n"); + + // Test HNSW query (balanced) + ZVecVectorQuery hnsw_query = {0}; + hnsw_query.field_name = (ZVecString){.data = "balanced_vector", + .length = strlen("balanced_vector")}; + hnsw_query.query_vector = (ZVecByteArray){.data = (uint8_t *)balanced_vec[0], + .length = 128 * sizeof(float)}; + hnsw_query.topk = 2; + hnsw_query.filter = (ZVecString){.data = "", .length = 0}; + hnsw_query.include_vector = false; + hnsw_query.include_doc_id = true; + hnsw_query.output_fields = NULL; + + ZVecDoc **hnsw_results = NULL; + size_t hnsw_result_count = 0; + error = zvec_collection_query(collection, &hnsw_query, &hnsw_results, + &hnsw_result_count); + if (error == ZVEC_OK) { + printf("✓ HNSW query successful - Found %zu results\n", hnsw_result_count); + zvec_docs_free(hnsw_results, hnsw_result_count); + } + + // Test Flat query (exact) + ZVecVectorQuery flat_query = {0}; + flat_query.field_name = + (ZVecString){.data = "exact_vector", .length = strlen("exact_vector")}; + flat_query.query_vector = (ZVecByteArray){.data = (uint8_t *)exact_vec[0], + .length = 32 * sizeof(float)}; + flat_query.topk = 2; + flat_query.filter = (ZVecString){.data = "", .length = 0}; + flat_query.include_vector = false; + flat_query.include_doc_id = true; + flat_query.output_fields = NULL; + + ZVecDoc **flat_results = NULL; + size_t flat_result_count = 0; + error = zvec_collection_query(collection, &flat_query, &flat_results, + &flat_result_count); + if (error == ZVEC_OK) { + printf("✓ Flat (exact) query successful - Found %zu results\n", + flat_result_count); + zvec_docs_free(flat_results, flat_result_count); + } + + // 9. Performance comparison information + printf("\nIndex Performance Characteristics:\n"); + printf("- Inverted Index: Fast text search, supports filtering\n"); + printf( + "- HNSW Index: Approximate nearest neighbor search, good balance of " + "speed/accuracy\n"); + printf("- Flat Index: Exact search, slower but 100%% accurate\n"); + printf( + "- Trade-off: Speed vs Accuracy - choose based on your requirements\n"); + + // 10. Cleanup +cleanup: + zvec_collection_destroy(collection); + zvec_collection_schema_destroy(schema); + + // Cleanup index parameters + zvec_index_params_invert_destroy(invert_params_standard); + zvec_index_params_invert_destroy(invert_params_extended); + zvec_index_params_hnsw_destroy(hnsw_params_fast); + zvec_index_params_hnsw_destroy(hnsw_params_balanced); + zvec_index_params_hnsw_destroy(hnsw_params_accurate); + zvec_index_params_flat_destroy(flat_params_l2); + zvec_index_params_flat_destroy(flat_params_cosine); + + printf("✓ Index example completed\n"); + return 0; +} \ No newline at end of file diff --git a/examples/c_api/optimized_example.c b/examples/c_api/optimized_example.c new file mode 100644 index 00000000..dca68305 --- /dev/null +++ b/examples/c_api/optimized_example.c @@ -0,0 +1,301 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include "zvec/c_api.h" + +/** + * @brief Print error message and return error code + */ +static ZVecErrorCode handle_error(ZVecErrorCode error, const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + } + return error; +} + +/** + * @brief Create test vector data + */ +static float *create_test_vector(size_t dimension) { + float *vector = malloc(dimension * sizeof(float)); + if (!vector) { + return NULL; + } + + for (size_t i = 0; i < dimension; i++) { + vector[i] = (float)rand() / RAND_MAX; + } + + return vector; +} + +/** + * @brief Optimized C API usage example with performance considerations + */ +int main() { + printf("=== ZVec Optimized C API Example ===\n\n"); + + // Get version information + const char *version = zvec_get_version(); + printf("ZVec Version: %s\n\n", version ? version : "Unknown"); + + ZVecErrorCode error; + + // 1. Create optimized collection schema + ZVecCollectionSchema *schema = + zvec_collection_schema_create("optimized_example_collection"); + if (!schema) { + fprintf(stderr, "Failed to create collection schema\n"); + return -1; + } + printf("✓ Collection schema created\n"); + + // 2. Create optimized index parameters + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, + 32, // Higher M for better connectivity + 200, // Construction ef for quality + 50 // Search ef for performance + ); + + if (!hnsw_params) { + fprintf(stderr, "Failed to create HNSW parameters\n"); + zvec_collection_schema_destroy(schema); + return -1; + } + + // 3. Create fields with optimized configuration + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *text_field = + zvec_field_schema_create("text", ZVEC_DATA_TYPE_STRING, true, 0); + ZVecFieldSchema *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + + if (!id_field || !text_field || !embedding_field) { + fprintf(stderr, "Failed to create field schemas\n"); + goto cleanup_params; + } + + // Set indexes + zvec_field_schema_set_hnsw_index(embedding_field, hnsw_params); + + // Add fields to schema + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding ID field") != ZVEC_OK) goto cleanup_fields; + + error = zvec_collection_schema_add_field(schema, text_field); + if (handle_error(error, "adding text field") != ZVEC_OK) goto cleanup_fields; + + error = zvec_collection_schema_add_field(schema, embedding_field); + if (handle_error(error, "adding embedding field") != ZVEC_OK) + goto cleanup_fields; + + printf("✓ Fields configured with indexes\n"); + + // 4. Create collection with optimized options + ZVecCollectionOptions options = ZVEC_DEFAULT_OPTIONS(); + options.enable_mmap = true; // Enable memory mapping for better performance + + ZVecCollection *collection = NULL; + error = zvec_collection_create_and_open("./optimized_example_collection", + schema, &options, &collection); + if (handle_error(error, "creating collection") != ZVEC_OK) { + goto cleanup_fields; + } + printf("✓ Collection created with optimized settings\n"); + + // 5. Bulk insert test data + const size_t DOC_COUNT = 1000; + const size_t BATCH_SIZE = 100; + + printf("Inserting %zu documents in batches of %zu...\n", DOC_COUNT, + BATCH_SIZE); + + clock_t start_time = clock(); + + for (size_t batch_start = 0; batch_start < DOC_COUNT; + batch_start += BATCH_SIZE) { + size_t current_batch_size = (batch_start + BATCH_SIZE > DOC_COUNT) + ? DOC_COUNT - batch_start + : BATCH_SIZE; + + ZVecDoc **batch_docs = malloc(current_batch_size * sizeof(ZVecDoc *)); + if (!batch_docs) { + fprintf(stderr, "Failed to allocate batch documents\n"); + break; + } + + // Create batch documents + for (size_t i = 0; i < current_batch_size; i++) { + batch_docs[i] = zvec_doc_create(); + if (!batch_docs[i]) { + fprintf(stderr, "Failed to create document\n"); + // Cleanup previous documents in batch + for (size_t j = 0; j < i; j++) { + zvec_doc_destroy(batch_docs[j]); + } + free(batch_docs); + goto cleanup_collection; + } + + size_t doc_id = batch_start + i; + char pk[32]; + snprintf(pk, sizeof(pk), "doc_%zu", doc_id); + zvec_doc_set_pk(batch_docs[i], pk); + + // Add ID field + char id_str[32]; + snprintf(id_str, sizeof(id_str), "ID_%zu", doc_id); + zvec_doc_add_field_by_value(batch_docs[i], "id", ZVEC_DATA_TYPE_STRING, + id_str, strlen(id_str)); + + // Add text field + char text_str[64]; + snprintf(text_str, sizeof(text_str), + "Document number %zu with sample text", doc_id); + zvec_doc_add_field_by_value(batch_docs[i], "text", ZVEC_DATA_TYPE_STRING, + text_str, strlen(text_str)); + + // Add vector field + float *vector = create_test_vector(128); + if (vector) { + zvec_doc_add_field_by_value(batch_docs[i], "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, vector, + 128 * sizeof(float)); + free(vector); + } + } + + // Insert batch + size_t success_count, error_count; + error = zvec_collection_insert(collection, (const ZVecDoc **)batch_docs, + current_batch_size, &success_count, + &error_count); + if (handle_error(error, "inserting batch") != ZVEC_OK) { + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + free(batch_docs); + goto cleanup_collection; + } + + printf(" Batch %zu-%zu: %zu successful, %zu failed\n", batch_start, + batch_start + current_batch_size - 1, success_count, error_count); + + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + free(batch_docs); + } + + clock_t insert_end_time = clock(); + double insert_time = + ((double)(insert_end_time - start_time)) / CLOCKS_PER_SEC; + printf("✓ Bulk insertion completed in %.3f seconds (%.0f docs/sec)\n", + insert_time, DOC_COUNT / insert_time); + + // 6. Flush and optimize collection + printf("Flushing and optimizing collection...\n"); + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + printf("✓ Collection optimized\n"); + + // 7. Performance query test + printf("Testing query performance...\n"); + + float *query_vector = create_test_vector(128); + if (!query_vector) { + fprintf(stderr, "Failed to create query vector\n"); + goto cleanup_collection; + } + + ZVecVectorQuery query = {0}; + query.field_name = + (ZVecString){.data = "embedding", .length = strlen("embedding")}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)query_vector, + .length = 128 * sizeof(float)}; + query.topk = 10; + query.filter = (ZVecString){.data = "", .length = 0}; + query.include_vector = false; + query.include_doc_id = true; + query.output_fields = NULL; + + const int QUERY_COUNT = 100; + start_time = clock(); + + for (int q = 0; q < QUERY_COUNT; q++) { + ZVecDoc **results = NULL; + size_t result_count = 0; + + error = zvec_collection_query(collection, &query, &results, &result_count); + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + printf("Query %d failed: %s\n", q, + error_msg ? error_msg : "Unknown error"); + zvec_free_str(error_msg); + continue; + } + + if (results) { + zvec_docs_free(results, result_count); + } + } + + clock_t query_end_time = clock(); + double query_time = ((double)(query_end_time - start_time)) / CLOCKS_PER_SEC; + double avg_query_time = (query_time * 1000) / QUERY_COUNT; + + printf("✓ Performance test completed\n"); + printf(" Average query time: %.2f ms\n", avg_query_time); + printf(" Queries per second: %.0f\n", 1000.0 / avg_query_time); + + free(query_vector); + + // 8. Memory usage information + ZVecCollectionStats *stats = NULL; + error = zvec_collection_get_stats(collection, &stats); + if (error == ZVEC_OK && stats) { + printf("Collection Statistics:\n"); + printf(" Document count: %llu\n", (unsigned long long)stats->doc_count); + zvec_collection_stats_destroy(stats); + } + + // 9. Cleanup +cleanup_collection: + zvec_collection_destroy(collection); + +cleanup_fields: + // Field schemas are managed by the collection schema, no need to destroy + // individually + +cleanup_params: + zvec_collection_schema_destroy(schema); + zvec_index_params_hnsw_destroy(hnsw_params); + + printf("✓ Optimized example completed\n"); + + return 0; +} \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c516187c..39cc3712 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -8,6 +8,7 @@ git_version(ZVEC_VERSION ${CMAKE_CURRENT_SOURCE_DIR}) cc_directory(ailego) cc_directory(core) cc_directory(db) +cc_directory(c_api) if(BUILD_PYTHON_BINDINGS) cc_directory(binding) endif() diff --git a/src/c_api/CMakeLists.txt b/src/c_api/CMakeLists.txt new file mode 100644 index 00000000..c47fcaf3 --- /dev/null +++ b/src/c_api/CMakeLists.txt @@ -0,0 +1,133 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) +include(${PROJECT_ROOT_DIR}/cmake/option.cmake) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# C API library source files +set(ZVEC_C_API_SOURCES + c_api.cc +) + +# C API library header files +set(ZVEC_C_API_HEADERS + ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h +) + +# Create shared library +add_library(zvec_c_api SHARED + ${ZVEC_C_API_SOURCES} + ${ZVEC_C_API_HEADERS} +) + +# Set library properties +set_target_properties(zvec_c_api PROPERTIES + VERSION ${PROJECT_VERSION} + SOVERSION ${PROJECT_VERSION_MAJOR} + OUTPUT_NAME "zvec_c_api" +) + +find_package(Threads REQUIRED) + +# Link dependencies - Add force_load flag to ensure static initialization is executed +if(APPLE) + target_link_libraries(zvec_c_api + PRIVATE + "-Wl,-force_load" "$" + zvec_db + Threads::Threads + ) +else() + target_link_libraries(zvec_c_api + PRIVATE + "-Wl,--whole-archive" zvec_core "-Wl,--no-whole-archive" + zvec_db + Threads::Threads + ) +endif() + +# Include directories +target_include_directories(zvec_c_api + PUBLIC + $ + $ + PRIVATE + ${PROJECT_SOURCE_DIR}/src +) + +# Compile options +target_compile_options(zvec_c_api PRIVATE + $<$:-Wall -Wextra -Wpedantic> + $<$:-Wall -Wextra -Wpedantic> +) + +# Installation rules +install(TARGETS zvec_c_api + EXPORT zvecTargets + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + RUNTIME DESTINATION bin + INCLUDES DESTINATION include +) + +install(FILES ${PROJECT_SOURCE_DIR}/src/include/zvec/c_api.h + DESTINATION include/zvec +) + +# Create static library version (optional) +if(BUILD_STATIC_LIBS) + add_library(zvec_c_api_static STATIC + ${ZVEC_C_API_SOURCES} + ${ZVEC_C_API_HEADERS} + ) + + set_target_properties(zvec_c_api_static PROPERTIES + OUTPUT_NAME "zvec_c_api" + ) + + # Static library also adds force_load flag + if(APPLE) + target_link_libraries(zvec_c_api_static + PRIVATE + "-Wl,-force_load" "$" + zvec_db + Threads::Threads + ) + else() + target_link_libraries(zvec_c_api_static + PRIVATE + "-Wl,--whole-archive" zvec_core "-Wl,--no-whole-archive" + zvec_db + Threads::Threads + ) + endif() + + target_include_directories(zvec_c_api_static + PUBLIC + $ + $ + PRIVATE + ${PROJECT_SOURCE_DIR}/src + ) + + install(TARGETS zvec_c_api_static + EXPORT zvecTargets + ARCHIVE DESTINATION lib + INCLUDES DESTINATION include + ) +endif() \ No newline at end of file diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc new file mode 100644 index 00000000..1c4d830d --- /dev/null +++ b/src/c_api/c_api.cc @@ -0,0 +1,5766 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "zvec/c_api.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Global status flags +static std::atomic g_initialized{false}; +static std::mutex g_init_mutex; + +// Thread-local storage for error information +static thread_local std::string last_error_message; +static thread_local ZVecErrorDetails last_error_details; + +// Helper function: set error information +static void set_last_error(const std::string &msg) { + last_error_message = msg; + + last_error_details.code = ZVEC_ERROR_UNKNOWN; + last_error_details.message = last_error_message.c_str(); + last_error_details.file = nullptr; + last_error_details.line = 0; + last_error_details.function = nullptr; +} + +// Error setting function with detailed information +static void set_last_error_details(ZVecErrorCode code, const std::string &msg, + const char *file = nullptr, int line = 0, + const char *function = nullptr) { + last_error_message = msg; + last_error_details.code = code; + last_error_details.message = last_error_message.c_str(); + last_error_details.file = file; + last_error_details.line = line; + last_error_details.function = function; +} + +// ============================================================================= +// Version information interface implementation +// ============================================================================= + +// Store dynamically generated version information +static std::string g_version_info; +static std::mutex g_version_mutex; + +const char *zvec_get_version(void) { + std::lock_guard lock(g_version_mutex); + + if (g_version_info.empty()) { + try { + std::string version = ZVEC_VERSION_STRING; + + // Try to get Git information + std::string git_info; +#ifdef ZVEC_GIT_DESCRIBE + git_info = ZVEC_GIT_DESCRIBE; +#elif defined(ZVEC_GIT_COMMIT_HASH) + git_info = std::string("g") + ZVEC_GIT_COMMIT_HASH; +#endif + + if (!git_info.empty()) { + version += "-" + git_info; + } + + version += " (built " + std::string(__DATE__) + " " + + std::string(__TIME__) + ")"; + + g_version_info = version; + } catch (const std::exception &e) { + // If getting version information fails, fall back to basic version + g_version_info = ZVEC_VERSION_STRING; + } + } + + return g_version_info.c_str(); +} + +bool zvec_check_version(int major, int minor, int patch) { + if (major < 0 || minor < 0 || patch < 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Version numbers must be non-negative", __FILE__, + __LINE__, __FUNCTION__); + return false; + } + + if (ZVEC_VERSION_MAJOR > major) return true; + if (ZVEC_VERSION_MAJOR < major) return false; + + if (ZVEC_VERSION_MINOR > minor) return true; + if (ZVEC_VERSION_MINOR < minor) return false; + + return ZVEC_VERSION_PATCH >= patch; +} + +int zvec_get_version_major(void) { + return ZVEC_VERSION_MAJOR; +} + +int zvec_get_version_minor(void) { + return ZVEC_VERSION_MINOR; +} + +int zvec_get_version_patch(void) { + return ZVEC_VERSION_PATCH; +} + +// ============================================================================= +// String management functions implementation +// ============================================================================= + +ZVecString *zvec_string_create(const char *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + ZVecString *zstr = nullptr; + char *data_buffer = nullptr; + + try { + size_t len = strlen(str); + zstr = new ZVecString(); + data_buffer = new char[len + 1]; + strcpy(const_cast(data_buffer), str); + + zstr->data = data_buffer; + zstr->length = len; + zstr->capacity = len + 1; + + return zstr; + + } catch (const std::exception &e) { + if (data_buffer) { + delete[] data_buffer; + } + if (zstr) { + delete zstr; + } + + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("String creation failed: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } +} + + +ZVecString *zvec_string_create_from_view(const ZVecStringView *view) { + if (!view || !view->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String view or data cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + try { + auto zstr = new ZVecString(); + + zstr->data = new char[view->length + 1]; + memcpy(const_cast(zstr->data), view->data, view->length); + const_cast(zstr->data)[view->length] = '\0'; + zstr->length = view->length; + zstr->capacity = view->length + 1; + + return zstr; + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("String creation from view failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("String creation from view failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } +} + +ZVecString *zvec_bin_create(const uint8_t *data, size_t length) { + if (!data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Binary data pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + try { + auto zstr = new ZVecString(); + + zstr->data = new char[length + 1]; + memcpy(const_cast(zstr->data), data, length); + const_cast(zstr->data)[length] = '\0'; // Null terminate for safety + zstr->length = length; + zstr->capacity = length + 1; + + return zstr; + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Binary string creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("Binary string creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } +} + +ZVecString *zvec_string_copy(const ZVecString *str) { + if (!str || !str->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Source string or data cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + return zvec_string_create(str->data); +} + +const char *zvec_string_c_str(const ZVecString *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + return str->data; +} + +size_t zvec_string_length(const ZVecString *str) { + if (!str) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointer cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return 0; + } + + return str->length; +} + +int zvec_string_compare(const ZVecString *str1, const ZVecString *str2) { + if (!str1 || !str2) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String pointers cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return -1; + } + + if (!str1->data || !str2->data) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "String data cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return -1; + } + + return strcmp(str1->data, str2->data); +} + + +// ============================================================================= +// Configuration-related functions implementation +// ============================================================================= + +ZVecConsoleLogConfig *zvec_config_console_log_create(ZVecLogLevel level) { + try { + auto config = new ZVecConsoleLogConfig(); + config->level = level; + return config; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create console log config: ") + + e.what()); + return nullptr; + } +} + +ZVecFileLogConfig *zvec_config_file_log_create(ZVecLogLevel level, + const char *dir, + const char *basename, + uint32_t file_size, + uint32_t overdue_days) { + try { + auto config = new ZVecFileLogConfig(); + config->level = level; + config->dir = *(zvec_string_create(dir)); + config->basename = *(zvec_string_create(basename)); + config->file_size = file_size; + config->overdue_days = overdue_days; + return config; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create file log config: ") + + e.what()); + return nullptr; + } +} + +ZVecLogConfig *zvec_config_log_create(ZVecLogType type, void *config_data) { + try { + auto log_config = new ZVecLogConfig(); + log_config->type = type; + + switch (type) { + case ZVEC_LOG_TYPE_CONSOLE: { + if (config_data) { + auto console_config = + reinterpret_cast(config_data); + log_config->config.console_config = *console_config; + } else { + log_config->config.console_config.level = ZVEC_LOG_LEVEL_WARN; + } + break; + } + case ZVEC_LOG_TYPE_FILE: { + if (config_data) { + auto file_config = reinterpret_cast(config_data); + log_config->config.file_config = *file_config; + } else { + log_config->config.file_config.level = ZVEC_LOG_LEVEL_WARN; + log_config->config.file_config.dir = *zvec_string_create("./log"); + log_config->config.file_config.basename = *zvec_string_create("zvec"); + log_config->config.file_config.file_size = 100; + log_config->config.file_config.overdue_days = 7; + } + break; + } + default: + set_last_error("Invalid log type"); + delete log_config; + return nullptr; + } + + return log_config; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create log config: ") + e.what()); + return nullptr; + } +} + +ZVecConfigData *zvec_config_data_create(void) { + ZVecConfigData *config = nullptr; + ZVecConsoleLogConfig *log_config = nullptr; + ZVecLogConfig *final_log_config = nullptr; + + try { + config = new ZVecConfigData(); + + log_config = zvec_config_console_log_create(ZVEC_LOG_LEVEL_WARN); + if (!log_config) { + throw std::runtime_error("Failed to create console log config"); + } + + final_log_config = + zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, log_config); + if (!final_log_config) { + throw std::runtime_error("Failed to create log config"); + } + + config->log_config = final_log_config; + + // Set default values from C++ ConfigData + zvec::GlobalConfig::ConfigData config_data; + config->memory_limit_bytes = config_data.memory_limit_bytes; + config->query_thread_count = config_data.query_thread_count; + config->invert_to_forward_scan_ratio = + config_data.invert_to_forward_scan_ratio; + config->brute_force_by_keys_ratio = config_data.brute_force_by_keys_ratio; + config->optimize_thread_count = config_data.optimize_thread_count; + + zvec_config_console_log_destroy(log_config); + return config; + + } catch (const std::exception &e) { + if (final_log_config) { + zvec_config_log_destroy(final_log_config); + } + if (log_config) { + zvec_config_console_log_destroy(log_config); + } + if (config) { + delete config; + } + + set_last_error(std::string("Failed to create config data: ") + e.what()); + return nullptr; + } +} + +void zvec_config_console_log_destroy(ZVecConsoleLogConfig *config) { + if (config) { + delete config; + } +} + +void zvec_config_file_log_destroy(ZVecFileLogConfig *config) { + if (config) { + if (config->dir.data) zvec_free_str(config->dir.data); + if (config->basename.data) zvec_free_str(config->basename.data); + delete config; + } +} + +void zvec_config_log_destroy(ZVecLogConfig *config) { + if (config) { + delete config; + } +} + +void zvec_config_data_destroy(ZVecConfigData *config) { + if (config) { + delete config; + } +} + +ZVecErrorCode zvec_config_data_set_memory_limit(ZVecConfigData *config, + uint64_t memory_limit_bytes) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->memory_limit_bytes = memory_limit_bytes; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_log_config(ZVecConfigData *config, + ZVecLogConfig *log_config) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->log_config = log_config; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_query_thread_count(ZVecConfigData *config, + uint32_t thread_count) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->query_thread_count = thread_count; + return ZVEC_OK; +} + +ZVecErrorCode zvec_config_data_set_optimize_thread_count( + ZVecConfigData *config, uint32_t thread_count) { + if (!config) { + set_last_error("Config data pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + config->optimize_thread_count = thread_count; + return ZVEC_OK; +} + + +// ============================================================================= +// Initialization and cleanup interface implementation +// ============================================================================= + +ZVecErrorCode zvec_initialize(const ZVecConfigData *config) { + std::lock_guard lock(g_init_mutex); + + if (g_initialized.load()) { + set_last_error_details(ZVEC_ERROR_ALREADY_EXISTS, + "Library already initialized"); + return ZVEC_ERROR_ALREADY_EXISTS; + } + + try { + // Convert to C++ configuration object + if (config) { + zvec::GlobalConfig::ConfigData cpp_config{}; + cpp_config.memory_limit_bytes = config->memory_limit_bytes; + cpp_config.query_thread_count = config->query_thread_count; + cpp_config.invert_to_forward_scan_ratio = + config->invert_to_forward_scan_ratio; + cpp_config.brute_force_by_keys_ratio = config->brute_force_by_keys_ratio; + cpp_config.optimize_thread_count = config->optimize_thread_count; + + // Set log configuration + if (config->log_config) { + std::shared_ptr log_config; + + switch (config->log_config->type) { + case ZVEC_LOG_TYPE_CONSOLE: { + auto console_level = static_cast( + config->log_config->config.console_config.level); + log_config = std::make_shared( + console_level); + break; + } + case ZVEC_LOG_TYPE_FILE: { + auto file_level = static_cast( + config->log_config->config.file_config.level); + std::string dir(config->log_config->config.file_config.dir.data, + config->log_config->config.file_config.dir.length); + std::string basename( + config->log_config->config.file_config.basename.data, + config->log_config->config.file_config.basename.length); + log_config = std::make_shared( + file_level, dir, basename); + break; + } + default: + throw std::runtime_error("Unknown log type"); + } + cpp_config.log_config = log_config; + } + // Initialize global configuration + auto status = zvec::GlobalConfig::Instance().Initialize(cpp_config); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } else { + // Initialize with default configuration + zvec::GlobalConfig::ConfigData default_config; + auto status = zvec::GlobalConfig::Instance().Initialize(default_config); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + g_initialized.store(true); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Initialization failed: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_shutdown(void) { + std::lock_guard lock(g_init_mutex); + + if (!g_initialized.load()) { + set_last_error_details(ZVEC_ERROR_FAILED_PRECONDITION, + "Library not initialized"); + return ZVEC_ERROR_FAILED_PRECONDITION; + } + + try { + g_initialized.store(false); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Shutdown failed: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_is_initialized(bool *initialized) { + if (!initialized) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Initialized flag pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *initialized = g_initialized.load(); + return ZVEC_OK; +} + +// ============================================================================= +// Error handling interface implementation +// ============================================================================= + +ZVecErrorCode zvec_get_last_error_details(ZVecErrorDetails *error_details) { + if (!error_details) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Error details pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *error_details = last_error_details; + return ZVEC_OK; +} + +void zvec_clear_error(void) { + last_error_message.clear(); + last_error_details = {}; +} + +// Helper functions: convert internal status to error code +static ZVecErrorCode status_to_error_code(const zvec::Status &status) { + if (status.code() < zvec::StatusCode::OK || + status.code() > zvec::StatusCode::UNKNOWN) { + set_last_error("Unexpected status code: " + + std::to_string(static_cast(status.code()))); + return ZVEC_ERROR_UNKNOWN; + } + + return static_cast(status.code()); +} + +// Helper function: handle Expected results +template +static ZVecErrorCode handle_expected_result( + const tl::expected &result, T *out_value = nullptr) { + if (result.has_value()) { + if (out_value) { + *out_value = result.value(); + } + return ZVEC_OK; + } else { + set_last_error(result.error().message()); + return status_to_error_code(result.error()); + } +} + +// Helper function: copy strings +static char *copy_string(const std::string &str) { + if (str.empty()) return nullptr; + + char *copy = new char[str.length() + 1]; + strcpy(copy, str.c_str()); + return copy; +} + +static zvec::DataType convert_data_type(ZVecDataType zvec_type) { + if (zvec_type < ZVEC_DATA_TYPE_UNDEFINED || + zvec_type > ZVEC_DATA_TYPE_ARRAY_DOUBLE) { + return zvec::DataType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +static ZVecDataType convert_zvec_data_type(zvec::DataType cpp_type) { + if (cpp_type < zvec::DataType::UNDEFINED || + cpp_type > zvec::DataType::ARRAY_DOUBLE) { + return ZVEC_DATA_TYPE_UNDEFINED; + } + + return static_cast(cpp_type); +} + +// Helper function: convert metric type +static zvec::MetricType convert_metric_type(ZVecMetricType metric_type) { + if (metric_type < ZVEC_METRIC_TYPE_UNDEFINED || + metric_type > ZVEC_METRIC_TYPE_MIPSL2) { + return zvec::MetricType::UNDEFINED; + } + + return static_cast(metric_type); +} + +// Helper function: convert ZVecIndexType to internal IndexType +static zvec::IndexType convert_index_type(ZVecIndexType zvec_type) { + if (zvec_type < ZVEC_INDEX_TYPE_UNDEFINED || + zvec_type > ZVEC_INDEX_TYPE_INVERT) { + return zvec::IndexType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +// Helper function: convert ZVecQuantizeType to internal QuantizeType +static zvec::QuantizeType convert_quantize_type(ZVecQuantizeType zvec_type) { + if (zvec_type < ZVEC_QUANTIZE_TYPE_UNDEFINED || + zvec_type > ZVEC_QUANTIZE_TYPE_INT4) { + return zvec::QuantizeType::UNDEFINED; + } + + return static_cast(zvec_type); +} + +// Helper function: set field index params +static zvec::Status set_field_index_params(zvec::FieldSchema::Ptr &field_schema, + const ZVecFieldSchema *zvec_field) { + if (!zvec_field->index_params) { + return zvec::Status::OK(); + } + + switch (zvec_field->index_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + const ZVecHnswIndexParams *params = + &zvec_field->index_params->params.hnsw_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = std::make_shared( + metric, params->m, params->ef_construction, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + const ZVecFlatIndexParams *params = + &zvec_field->index_params->params.flat_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = + std::make_shared(metric, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_INVERT: { + const ZVecInvertIndexParams *params = + &zvec_field->index_params->params.invert_params; + auto index_params = std::make_shared( + params->enable_range_optimization, params->enable_extended_wildcard); + field_schema->set_index_params(index_params); + break; + } + default: + break; + } + + return zvec::Status::OK(); +} + +// ============================================================================= +// Memory Management interface implementation +// ============================================================================= + +void *zvec_malloc(size_t size) { + if (size == 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Cannot allocate zero bytes", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + try { + return malloc(size); + } catch (const std::bad_alloc &e) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Memory allocation failed: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } +} + +void *zvec_realloc(void *ptr, size_t size) { + if (size == 0 && ptr == nullptr) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Cannot reallocate null pointer to zero size", + __FILE__, __LINE__, __FUNCTION__); + return nullptr; + } + + try { + return realloc(ptr, size); + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Memory reallocation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } +} + +void zvec_free(void *ptr) { + if (ptr) { + free(ptr); + } +} + +void zvec_free_string(ZVecString *str) { + if (str) { + if (str->data) { + delete[] str->data; + } + delete str; + } +} + +void zvec_free_string_array(ZVecStringArray *array) { + if (array) { + if (array->strings) { + for (size_t i = 0; i < array->count; ++i) { + zvec_free_string(&array->strings[i]); + } + delete[] array->strings; + } + delete array; + } +} + +void zvec_free_byte_array(ZVecMutableByteArray *array) { + if (array) { + if (array->data) { + delete[] array->data; + } + delete array; + } +} + +void zvec_free_str(char *str) { + if (str) { + free(str); + } +} + +void zvec_free_float_array(float *array) { + if (array) { + free(array); + } +} + +void zvec_free_str_array(char **array, size_t count) { + if (!array) return; + + // If count is 0, only free the string array itself, don't process internal + // strings + if (count == 0) { + free(array); + return; + } + + for (size_t i = 0; i < count; ++i) { + if (array[i]) { // Only free when string pointer is not null + free(array[i]); + } + } + free(array); +} + +ZVecErrorCode zvec_get_last_error(char **error_msg) { + if (!error_msg) { + set_last_error("Invalid argument: error_msg cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + *error_msg = copy_string(last_error_message); + return ZVEC_OK; +} + +void zvec_free_uint8_array(uint8_t *array) { + if (array) { + free(array); + } +} + +void zvec_free_field_schema_array(ZVecFieldSchema **array, size_t count) { + if (!array) return; + + for (size_t i = 0; i < count; ++i) { + zvec_free_field_schema(array[i]); + } + free(array); +} + +void zvec_free_field_schema(ZVecFieldSchema *field_schema) { + if (field_schema) { + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + } + delete field_schema; + } +} + + +// ============================================================================= +// Index parameters management interface implementation +// ============================================================================= + +void zvec_index_params_base_init(ZVecBaseIndexParams *params, + ZVecIndexType index_type) { + if (params) { + params->index_type = index_type; + } +} + +void zvec_index_params_invert_init(ZVecInvertIndexParams *params, + bool enable_range_opt, + bool enable_wildcard) { + if (params) { + zvec_index_params_base_init(¶ms->base, ZVEC_INDEX_TYPE_INVERT); + params->enable_range_optimization = enable_range_opt; + params->enable_extended_wildcard = enable_wildcard; + } +} + +void zvec_index_params_vector_init(ZVecVectorIndexParams *params, + ZVecIndexType index_type, + ZVecMetricType metric_type, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_base_init(¶ms->base, index_type); + params->metric_type = metric_type; + params->quantize_type = quantize_type; + } +} + +void zvec_index_params_hnsw_init(ZVecHnswIndexParams *params, + ZVecMetricType metric_type, int m, + int ef_construction, int ef_search, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_HNSW, + metric_type, quantize_type); + params->m = m; + params->ef_construction = ef_construction; + params->ef_search = ef_search; + } +} + +void zvec_index_params_flat_init(ZVecFlatIndexParams *params, + ZVecMetricType metric_type, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_FLAT, + metric_type, quantize_type); + } +} + +void zvec_index_params_ivf_init(ZVecIVFIndexParams *params, + ZVecMetricType metric_type, int n_list, + int n_iters, bool use_soar, int n_probe, + ZVecQuantizeType quantize_type) { + if (params) { + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_IVF, + metric_type, quantize_type); + params->n_list = n_list; + params->n_iters = n_iters; + params->use_soar = use_soar; + params->n_probe = n_probe; + } +} + +void zvec_index_params_init_default(ZVecIndexParams *params, + ZVecIndexType index_type, + ZVecMetricType metric_type) { + if (!params) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Index params pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return; + } + + params->index_type = index_type; + + switch (index_type) { + case ZVEC_INDEX_TYPE_INVERT: + zvec_index_params_invert_init(¶ms->params.invert_params, false, + false); + break; + + case ZVEC_INDEX_TYPE_HNSW: + zvec_index_params_hnsw_init(¶ms->params.hnsw_params, metric_type, 16, + 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + break; + + case ZVEC_INDEX_TYPE_FLAT: + zvec_index_params_flat_init(¶ms->params.flat_params, metric_type, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + break; + + case ZVEC_INDEX_TYPE_IVF: + zvec_index_params_ivf_init(¶ms->params.ivf_params, metric_type, 100, + 10, false, 10, ZVEC_QUANTIZE_TYPE_UNDEFINED); + break; + + default: + set_last_error_details(ZVEC_ERROR_NOT_SUPPORTED, "Unsupported index type", + __FILE__, __LINE__, __FUNCTION__); + break; + } +} + +void zvec_index_params_destroy(ZVecIndexParams *params) { + if (params) { + delete params; + } +} + +ZVecInvertIndexParams *zvec_index_params_invert_create(bool enable_range_opt, + bool enable_wildcard) { + try { + auto params = new ZVecInvertIndexParams(); + zvec_index_params_base_init(¶ms->base, ZVEC_INDEX_TYPE_INVERT); + params->enable_range_optimization = enable_range_opt; + params->enable_extended_wildcard = enable_wildcard; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create invert index params: ") + + e.what()); + return nullptr; + } +} + +ZVecVectorIndexParams *zvec_index_params_vector_create( + ZVecIndexType index_type, ZVecMetricType metric_type, + ZVecQuantizeType quantize_type) { + try { + auto params = new ZVecVectorIndexParams(); + zvec_index_params_base_init(¶ms->base, index_type); + params->metric_type = metric_type; + params->quantize_type = quantize_type; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create vector index params: ") + + e.what()); + return nullptr; + } +} + +ZVecHnswIndexParams *zvec_index_params_hnsw_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int m, + int ef_construction, int ef_search) { + try { + auto params = new ZVecHnswIndexParams(); + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_HNSW, + metric_type, quantize_type); + params->m = m; + params->ef_construction = ef_construction; + params->ef_search = ef_search; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create HNSW index params: ") + + e.what()); + return nullptr; + } +} + +ZVecFlatIndexParams *zvec_index_params_flat_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type) { + try { + auto params = new ZVecFlatIndexParams(); + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_FLAT, + metric_type, quantize_type); + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create Flat index params: ") + + e.what()); + return nullptr; + } +} + +ZVecIVFIndexParams *zvec_index_params_ivf_create(ZVecMetricType metric_type, + ZVecQuantizeType quantize_type, + int n_list, int n_iters, + bool use_soar, int n_probe) { + try { + auto params = new ZVecIVFIndexParams(); + zvec_index_params_vector_init(¶ms->base, ZVEC_INDEX_TYPE_IVF, + metric_type, quantize_type); + params->n_list = n_list; + params->n_iters = n_iters; + params->use_soar = use_soar; + params->n_probe = n_probe; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create IVF index params: ") + + e.what()); + return nullptr; + } +} + +void zvec_index_params_invert_destroy(ZVecInvertIndexParams *params) { + if (params) { + delete params; + } +} + +void zvec_index_params_vector_destroy(ZVecVectorIndexParams *params) { + if (params) { + delete params; + } +} + +void zvec_index_params_hnsw_destroy(ZVecHnswIndexParams *params) { + if (params) { + delete params; + } +} + +void zvec_index_params_flat_destroy(ZVecFlatIndexParams *params) { + if (params) { + delete params; + } +} + +void zvec_index_params_ivf_destroy(ZVecIVFIndexParams *params) { + if (params) { + delete params; + } +} + +// ============================================================================= +// FieldSchema management interface implementation +// ============================================================================= + +ZVecFieldSchema *zvec_field_schema_create(const char *name, + ZVecDataType data_type, bool nullable, + uint32_t dimension) { + if (!name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + try { + auto schema = new ZVecFieldSchema(); + + schema->name = zvec_string_create(name); + if (!schema->name) { + delete schema; + return nullptr; + } + + schema->data_type = data_type; + schema->nullable = nullable; + schema->dimension = dimension; + schema->index_params = nullptr; + + return schema; + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Field schema creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("Field schema creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } +} + +void zvec_field_schema_destroy(ZVecFieldSchema *schema) { + if (schema) { + zvec_free_string(schema->name); + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + schema->index_params = nullptr; + } + delete schema; + } +} + +ZVecErrorCode zvec_field_schema_set_index_params( + ZVecFieldSchema *schema, const ZVecIndexParams *index_params) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!index_params) { + if (schema->index_params) { + zvec_index_params_destroy(schema->index_params); + delete schema->index_params; + schema->index_params = nullptr; + } + return ZVEC_OK; + } + + try { + if (!schema->index_params) { + schema->index_params = new ZVecIndexParams(); + } + + *schema->index_params = *index_params; + + return ZVEC_OK; + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Failed to set index params: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to set index params: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +void zvec_field_schema_set_invert_index( + ZVecFieldSchema *field_schema, const ZVecInvertIndexParams *invert_params) { + if (field_schema && invert_params) { + if (!field_schema->index_params) { + field_schema->index_params = new ZVecIndexParams(); + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_INVERT; + field_schema->index_params->params.invert_params = *invert_params; + } +} + +void zvec_field_schema_set_hnsw_index(ZVecFieldSchema *field_schema, + const ZVecHnswIndexParams *hnsw_params) { + if (field_schema && hnsw_params) { + if (!field_schema->index_params) { + field_schema->index_params = new ZVecIndexParams(); + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_HNSW; + field_schema->index_params->params.hnsw_params = *hnsw_params; + } +} + +void zvec_field_schema_set_flat_index(ZVecFieldSchema *field_schema, + const ZVecFlatIndexParams *flat_params) { + if (field_schema && flat_params) { + if (!field_schema->index_params) { + field_schema->index_params = new ZVecIndexParams(); + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_FLAT; + field_schema->index_params->params.flat_params = *flat_params; + } +} + +void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, + const ZVecIVFIndexParams *ivf_params) { + if (field_schema && ivf_params) { + if (!field_schema->index_params) { + field_schema->index_params = new ZVecIndexParams(); + } + + field_schema->index_params->index_type = ZVEC_INDEX_TYPE_IVF; + field_schema->index_params->params.ivf_params = *ivf_params; + } +} + +static void zvec_field_schema_cleanup(ZVecFieldSchema *field_schema) { + if (!field_schema) return; + + if (field_schema->index_params) { + zvec_index_params_destroy(field_schema->index_params); + delete field_schema->index_params; + field_schema->index_params = nullptr; + } + + zvec_free_string(field_schema->name); + field_schema->name = nullptr; +} + + +// ============================================================================= +// CollectionOptions management interface implementation +// ============================================================================= + +void zvec_collection_options_init_default(ZVecCollectionOptions *options) { + if (!options) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection options pointer cannot be null", + __FILE__, __LINE__, __FUNCTION__); + return; + } + + options->enable_mmap = true; + options->max_buffer_size = zvec::DEFAULT_MAX_BUFFER_SIZE; + options->read_only = false; + options->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; +} + +// ============================================================================= +// CollectionSchema management interface implementation +// ============================================================================= + +ZVecCollectionSchema *zvec_collection_schema_create(const char *name) { + if (!name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + try { + auto schema = new ZVecCollectionSchema(); + + schema->name = zvec_string_create(name); + if (!schema->name) { + delete schema; + return nullptr; + } + + schema->fields = nullptr; + schema->field_count = 0; + schema->field_capacity = 0; + schema->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + + return schema; + } catch (const std::bad_alloc &e) { + set_last_error_details( + ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Collection schema creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("Collection schema creation failed: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } +} + +void zvec_collection_schema_destroy(ZVecCollectionSchema *schema) { + if (schema) { + zvec_free_string(schema->name); + + if (schema->fields) { + for (size_t i = 0; i < schema->field_count; ++i) { + zvec_field_schema_destroy(schema->fields[i]); + } + delete[] schema->fields; + } + + delete schema; + } +} + +ZVecErrorCode zvec_collection_schema_add_field(ZVecCollectionSchema *schema, + ZVecFieldSchema *field) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field || !field->name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field or field name cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && field->name && + zvec_string_compare(schema->fields[i]->name, field->name) == 0) { + set_last_error_details( + ZVEC_ERROR_ALREADY_EXISTS, + std::string("Field '") + field->name->data + "' already exists", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_ALREADY_EXISTS; + } + } + + if (schema->field_count >= schema->field_capacity) { + size_t new_capacity = + schema->field_capacity == 0 ? 8 : schema->field_capacity * 2; + auto new_fields = new ZVecFieldSchema *[new_capacity]; + + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; + } + + delete[] schema->fields; + schema->fields = new_fields; + schema->field_capacity = new_capacity; + } + + schema->fields[schema->field_count] = field; + schema->field_count++; + + return ZVEC_OK; + } catch (const std::bad_alloc &e) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Failed to add field: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to add field: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_schema_add_fields(ZVecCollectionSchema *schema, + const ZVecFieldSchema *fields, + size_t field_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!fields && field_count > 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Fields array cannot be null when field_count > 0", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (field_count == 0) { + return ZVEC_OK; + } + + try { + for (size_t i = 0; i < field_count; ++i) { + const ZVecFieldSchema &field = fields[i]; + if (!field.name || !field.name->data || field.name->length == 0) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field at index ") + + std::to_string(i) + " has invalid name", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + size_t total_needed = schema->field_count + field_count; + if (total_needed > schema->field_capacity) { + size_t new_capacity = schema->field_capacity; + while (new_capacity < total_needed) { + new_capacity = new_capacity == 0 ? 8 : new_capacity * 2; + } + + auto new_fields = new ZVecFieldSchema *[new_capacity]; + + for (size_t i = 0; i < schema->field_count; ++i) { + new_fields[i] = schema->fields[i]; + } + + delete[] schema->fields; + schema->fields = new_fields; + schema->field_capacity = new_capacity; + } + + for (size_t i = 0; i < field_count; ++i) { + const ZVecFieldSchema &src_field = fields[i]; + + ZVecFieldSchema *new_field = new ZVecFieldSchema(); + + new_field->name = zvec_string_copy(src_field.name); + + new_field->data_type = src_field.data_type; + new_field->nullable = src_field.nullable; + new_field->dimension = src_field.dimension; + + if (src_field.index_params) { + new_field->index_params = new ZVecIndexParams(); + *(new_field->index_params) = *(src_field.index_params); + } else { + new_field->index_params = nullptr; + } + + schema->fields[schema->field_count] = new_field; + schema->field_count++; + } + + return ZVEC_OK; + } catch (const std::bad_alloc &e) { + set_last_error_details(ZVEC_ERROR_RESOURCE_EXHAUSTED, + std::string("Failed to add fields: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to add fields: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_schema_remove_field(ZVecCollectionSchema *schema, + const char *field_name) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field_name) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name cannot be null", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, field_name) == 0) { + zvec_field_schema_destroy(schema->fields[i]); + + for (size_t j = i; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; + } + + schema->field_count--; + return ZVEC_OK; + } + } + + set_last_error_details(ZVEC_ERROR_NOT_FOUND, + std::string("Field '") + field_name + "' not found", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_NOT_FOUND; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to remove field: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_schema_remove_fields( + ZVecCollectionSchema *schema, const char *const *field_names, + size_t field_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field_names && field_count > 0) { + set_last_error_details( + ZVEC_ERROR_INVALID_ARGUMENT, + "Field names array cannot be null when field_count > 0", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (field_count == 0) { + return ZVEC_OK; + } + + try { + for (size_t i = 0; i < field_count; ++i) { + if (!field_names[i]) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + std::string("Field name at index ") + + std::to_string(i) + " is null", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + std::vector remove_indices; + std::vector not_found_fields; + + for (size_t field_idx = 0; field_idx < field_count; ++field_idx) { + std::string target_name(field_names[field_idx]); + bool found = false; + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, target_name.c_str()) == 0) { + remove_indices.push_back(i); + found = true; + break; + } + } + + if (!found) { + not_found_fields.push_back(target_name); + } + } + + if (!not_found_fields.empty()) { + std::string error_msg = "Fields not found: "; + for (size_t i = 0; i < not_found_fields.size(); ++i) { + error_msg += "'" + not_found_fields[i] + "'"; + if (i < not_found_fields.size() - 1) { + error_msg += ", "; + } + } + set_last_error_details(ZVEC_ERROR_NOT_FOUND, error_msg, __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_NOT_FOUND; + } + + std::sort(remove_indices.begin(), remove_indices.end(), + std::greater()); + + for (size_t remove_index : remove_indices) { + zvec_field_schema_destroy(schema->fields[remove_index]); + + for (size_t j = remove_index; j < schema->field_count - 1; ++j) { + schema->fields[j] = schema->fields[j + 1]; + } + + schema->field_count--; + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error_details(ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to remove fields: ") + e.what(), + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecFieldSchema *zvec_collection_schema_find_field( + const ZVecCollectionSchema *schema, const char *field_name) { + if (!schema || !field_name) { + return nullptr; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + if (schema->fields[i]->name && + strcmp(schema->fields[i]->name->data, field_name) == 0) { + return schema->fields[i]; + } + } + + return nullptr; +} + +size_t zvec_collection_schema_get_field_count( + const ZVecCollectionSchema *schema) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return 0; + } + + return schema->field_count; +} + +ZVecFieldSchema *zvec_collection_schema_get_field( + const ZVecCollectionSchema *schema, size_t index) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return nullptr; + } + + if (index >= schema->field_count) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field index out of bounds", __FILE__, __LINE__, + __FUNCTION__); + return nullptr; + } + + return schema->fields[index]; +} + +ZVecErrorCode zvec_collection_schema_set_max_doc_count_per_segment( + ZVecCollectionSchema *schema, uint64_t max_doc_count) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + schema->max_doc_count_per_segment = max_doc_count; + return ZVEC_OK; +} + +uint64_t zvec_collection_schema_get_max_doc_count_per_segment( + const ZVecCollectionSchema *schema) { + if (!schema) return 0; + return schema->max_doc_count_per_segment; +} + + +ZVecErrorCode zvec_collection_schema_validate( + const ZVecCollectionSchema *schema, ZVecString **error_msg) { + if (!schema) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection schema pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (error_msg) { + *error_msg = nullptr; + } + + if (!schema->name) { + if (error_msg) { + *error_msg = zvec_string_create("Collection name is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Collection name is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (schema->field_count == 0) { + if (error_msg) { + *error_msg = zvec_string_create("At least one field is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "At least one field is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + for (size_t i = 0; i < schema->field_count; ++i) { + auto field = schema->fields[i]; + if (!field) { + if (error_msg) { + *error_msg = zvec_string_create("Null field found"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, "Null field found", + __FILE__, __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + if (!field->name) { + if (error_msg) { + *error_msg = zvec_string_create("Field name is required"); + } + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Field name is required", __FILE__, __LINE__, + __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; +} + +void zvec_collection_schema_cleanup(ZVecCollectionSchema *schema) { + if (!schema) return; + + try { + if (schema->name) { + zvec_free_string(schema->name); + } + + if (schema->fields) { + for (size_t i = 0; i < schema->field_count; ++i) { + zvec_field_schema_cleanup(schema->fields[i]); + } + delete[] schema->fields; + schema->fields = nullptr; + schema->field_count = 0; + } + + schema->max_doc_count_per_segment = 0; + } catch (const std::exception &e) { + fprintf(stderr, + "Warning: Exception in zvec_collection_schema_cleanup: %s\n", + e.what()); + } +} + + +// ============================================================================= +// Helper functions +// ============================================================================= + +const char *zvec_error_code_to_string(ZVecErrorCode error_code) { + switch (error_code) { + case ZVEC_OK: + return "OK"; + case ZVEC_ERROR_NOT_FOUND: + return "NOT_FOUND"; + case ZVEC_ERROR_ALREADY_EXISTS: + return "ALREADY_EXISTS"; + case ZVEC_ERROR_INVALID_ARGUMENT: + return "INVALID_ARGUMENT"; + case ZVEC_ERROR_PERMISSION_DENIED: + return "PERMISSION_DENIED"; + case ZVEC_ERROR_FAILED_PRECONDITION: + return "FAILED_PRECONDITION"; + case ZVEC_ERROR_RESOURCE_EXHAUSTED: + return "RESOURCE_EXHAUSTED"; + case ZVEC_ERROR_UNAVAILABLE: + return "UNAVAILABLE"; + case ZVEC_ERROR_INTERNAL_ERROR: + return "INTERNAL_ERROR"; + case ZVEC_ERROR_NOT_SUPPORTED: + return "NOT_SUPPORTED"; + case ZVEC_ERROR_UNKNOWN: + return "UNKNOWN"; + default: + return "UNKNOWN_ERROR_CODE"; + } +} + +const char *zvec_data_type_to_string(ZVecDataType data_type) { + switch (data_type) { + case ZVEC_DATA_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_DATA_TYPE_BINARY: + return "BINARY"; + case ZVEC_DATA_TYPE_STRING: + return "STRING"; + case ZVEC_DATA_TYPE_BOOL: + return "BOOL"; + case ZVEC_DATA_TYPE_INT32: + return "INT32"; + case ZVEC_DATA_TYPE_INT64: + return "INT64"; + case ZVEC_DATA_TYPE_UINT32: + return "UINT32"; + case ZVEC_DATA_TYPE_UINT64: + return "UINT64"; + case ZVEC_DATA_TYPE_FLOAT: + return "FLOAT"; + case ZVEC_DATA_TYPE_DOUBLE: + return "DOUBLE"; + case ZVEC_DATA_TYPE_VECTOR_BINARY32: + return "VECTOR_BINARY32"; + case ZVEC_DATA_TYPE_VECTOR_BINARY64: + return "VECTOR_BINARY64"; + case ZVEC_DATA_TYPE_VECTOR_FP16: + return "VECTOR_FP16"; + case ZVEC_DATA_TYPE_VECTOR_FP32: + return "VECTOR_FP32"; + case ZVEC_DATA_TYPE_VECTOR_FP64: + return "VECTOR_FP64"; + case ZVEC_DATA_TYPE_VECTOR_INT4: + return "VECTOR_INT4"; + case ZVEC_DATA_TYPE_VECTOR_INT8: + return "VECTOR_INT8"; + case ZVEC_DATA_TYPE_VECTOR_INT16: + return "VECTOR_INT16"; + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: + return "SPARSE_VECTOR_FP16"; + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: + return "SPARSE_VECTOR_FP32"; + case ZVEC_DATA_TYPE_ARRAY_BINARY: + return "ARRAY_BINARY"; + case ZVEC_DATA_TYPE_ARRAY_STRING: + return "ARRAY_STRING"; + case ZVEC_DATA_TYPE_ARRAY_BOOL: + return "ARRAY_BOOL"; + case ZVEC_DATA_TYPE_ARRAY_INT32: + return "ARRAY_INT32"; + case ZVEC_DATA_TYPE_ARRAY_INT64: + return "ARRAY_INT64"; + case ZVEC_DATA_TYPE_ARRAY_UINT32: + return "ARRAY_UINT32"; + case ZVEC_DATA_TYPE_ARRAY_UINT64: + return "ARRAY_UINT64"; + case ZVEC_DATA_TYPE_ARRAY_FLOAT: + return "ARRAY_FLOAT"; + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: + return "ARRAY_DOUBLE"; + default: + return "UNKNOWN_DATA_TYPE"; + } +} + +const char *zvec_index_type_to_string(ZVecIndexType index_type) { + switch (index_type) { + case ZVEC_INDEX_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_INDEX_TYPE_HNSW: + return "HNSW"; + case ZVEC_INDEX_TYPE_IVF: + return "IVF"; + case ZVEC_INDEX_TYPE_FLAT: + return "FLAT"; + case ZVEC_INDEX_TYPE_INVERT: + return "INVERT"; + default: + return "UNKNOWN_INDEX_TYPE"; + } +} + +const char *zvec_metric_type_to_string(ZVecMetricType metric_type) { + switch (metric_type) { + case ZVEC_METRIC_TYPE_UNDEFINED: + return "UNDEFINED"; + case ZVEC_METRIC_TYPE_L2: + return "L2"; + case ZVEC_METRIC_TYPE_IP: + return "IP"; + case ZVEC_METRIC_TYPE_COSINE: + return "COSINE"; + case ZVEC_METRIC_TYPE_MIPSL2: + return "MIPSL2"; + default: + return "UNKNOWN_METRIC_TYPE"; + } +} + +ZVecErrorCode zvec_get_system_info(ZVecString **info_json) { + if (!info_json) { + set_last_error_details(ZVEC_ERROR_INVALID_ARGUMENT, + "Info JSON pointer cannot be null", __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + std::ostringstream oss; + oss << "{"; + oss << "\"version\":\"" << ZVEC_VERSION_STRING << "\","; + oss << "\"platform\":\"" + << +#ifdef _WIN32 + "Windows" +#elif __APPLE__ + "macOS" +#elif __linux__ + "Linux" +#else + "Unknown" +#endif + << "\","; + oss << "\"architecture\":\"" + << +#ifdef __x86_64__ + "x86_64" +#elif __aarch64__ + "ARM64" +#elif __arm__ + "ARM" +#else + "Unknown" +#endif + << "\","; + oss << "\"compiler\":\"" + << +#ifdef __GNUC__ + "GCC " << __GNUC__ << "." << __GNUC_MINOR__ +#elif _MSC_VER + "MSVC " << _MSC_VER +#elif __clang__ + "Clang " << __clang_major__ << "." << __clang_minor__ +#else + "Unknown" +#endif + << "\""; + oss << "}"; + + *info_json = zvec_string_create(oss.str().c_str()); + if (!*info_json) { + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error_details( + ZVEC_ERROR_INTERNAL_ERROR, + std::string("Failed to get system info: ") + e.what(), __FILE__, + __LINE__, __FUNCTION__); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +bool check_is_vector_field(const ZVecFieldSchema &zvec_field) { + bool is_vector_field = + (zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP32 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP64 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP16 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_BINARY32 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_BINARY64 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT4 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT8 || + zvec_field.data_type == ZVEC_DATA_TYPE_VECTOR_INT16 || + zvec_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 || + zvec_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16); + return is_vector_field; +} + +// ============================================================================= +// Doc functions implementation +// ============================================================================= + +ZVecDoc *zvec_doc_create(void) { + try { + auto doc_ptr = + new std::shared_ptr(std::make_shared()); + return reinterpret_cast(doc_ptr); + + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create document: ") + e.what()); + return nullptr; + } +} + +void zvec_doc_destroy(ZVecDoc *doc) { + if (doc) { + delete reinterpret_cast *>(doc); + } +} + +void zvec_doc_clear(ZVecDoc *doc) { + if (doc) { + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->clear(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to cleanup document: ") + e.what()); + } + } +} + +void zvec_docs_free(ZVecDoc **docs, size_t count) { + if (!docs) return; + + for (size_t i = 0; i < count; ++i) { + zvec_doc_destroy(docs[i]); + } + + free(docs); +} + +void zvec_doc_set_pk(ZVecDoc *doc, const char *pk) { + if (!doc || !pk) return; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_pk(std::string(pk)); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to set document PK: ") + e.what()); + } +} + +void zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id) { + if (!doc) return; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_doc_id(doc_id); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to set document id: ") + e.what()); + } +} + + +void zvec_doc_set_score(ZVecDoc *doc, float score) { + if (!doc) return; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_score(score); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to set document score: ") + e.what()); + } +} + +void zvec_doc_set_operator(ZVecDoc *doc, ZVecDocOperator op) { + if (!doc) return; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->set_operator(static_cast(op)); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to set document operator: ") + e.what()); + } +} + +// ============================================================================= +// Document interface implementation +// ============================================================================= + +// Helper function to extract scalar values from raw data +template +T extract_scalar_value(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size != sizeof(T)) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return T{}; + } + return *static_cast(value); +} + +// Helper function to extract vector values from raw data +template +std::vector extract_vector_values(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size % sizeof(T) != 0) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::vector(); + } + size_t count = value_size / sizeof(T); + const T *vals = static_cast(value); + return std::vector(vals, vals + count); +} + +// Helper function to extract array values from raw data +template +std::vector extract_array_values(const void *value, size_t value_size, + ZVecErrorCode *error_code) { + if (value_size % sizeof(T) != 0) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::vector(); + } + size_t count = value_size / sizeof(T); + const T *vals = static_cast(value); + return std::vector(vals, vals + count); +} + +// Helper function to handle sparse vector extraction +template +std::pair, std::vector> extract_sparse_vector( + const void *value, size_t value_size, ZVecErrorCode *error_code) { + if (value_size < sizeof(uint32_t)) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::make_pair(std::vector(), std::vector()); + } + + const uint32_t *data = static_cast(value); + uint32_t nnz = data[0]; + + size_t required_size = + sizeof(uint32_t) + nnz * (sizeof(uint32_t) + sizeof(T)); + if (value_size < required_size) { + if (error_code) { + *error_code = ZVEC_ERROR_INVALID_ARGUMENT; + } + return std::make_pair(std::vector(), std::vector()); + } + + const uint32_t *indices = data + 1; + const T *values = reinterpret_cast(indices + nnz); + + std::vector index_vec(indices, indices + nnz); + std::vector value_vec(values, values + nnz); + + return std::make_pair(std::move(index_vec), std::move(value_vec)); +} + +// Helper function to extract string array from raw data +std::vector extract_string_array(const void *value, + size_t value_size) { + std::vector string_array; + const char *data = static_cast(value); + size_t pos = 0; + + while (pos < value_size) { + size_t str_len = strlen(data + pos); + if (pos + str_len >= value_size) { + break; + } + string_array.emplace_back(data + pos, str_len); + pos += str_len + 1; + } + return string_array; +} + +// Helper function to extract binary array from raw data +std::vector extract_binary_array(const void *value, + size_t value_size) { + std::vector binary_array; + const char *data = static_cast(value); + size_t pos = 0; + + while (pos < value_size) { + if (pos + sizeof(uint32_t) > value_size) { + break; + } + uint32_t bin_len = *reinterpret_cast(data + pos); + pos += sizeof(uint32_t); + + if (pos + bin_len > value_size) { + break; + } + binary_array.emplace_back(data + pos, bin_len); + pos += bin_len; + } + return binary_array; +} + +static std::vector convert_zvec_docs_to_internal( + const ZVecDoc **zvec_docs, size_t doc_count) { + std::vector docs; + docs.reserve(doc_count); + + for (size_t i = 0; i < doc_count; ++i) { + docs.push_back( + *(*reinterpret_cast *>(zvec_docs[i]))); + } + + return docs; +} + + +static zvec::Status convert_zvec_collection_schema_to_internal( + const ZVecCollectionSchema *schema, + zvec::CollectionSchema::Ptr &collection_schema) { + std::string coll_name(schema->name->data, schema->name->length); + collection_schema = std::make_shared(coll_name); + collection_schema->set_max_doc_count_per_segment( + schema->max_doc_count_per_segment); + + for (size_t i = 0; i < schema->field_count; ++i) { + const ZVecFieldSchema &zvec_field = *schema->fields[i]; + zvec::DataType data_type = convert_data_type(zvec_field.data_type); + std::string field_name = + std::string(zvec_field.name->data, zvec_field.name->length); + zvec::FieldSchema::Ptr field_schema; + + bool is_vector_field = check_is_vector_field(zvec_field); + + if (is_vector_field) { + field_schema = std::make_shared( + field_name, data_type, zvec_field.dimension, zvec_field.nullable); + } else { + field_schema = std::make_shared(field_name, data_type, + zvec_field.nullable); + } + + if (zvec_field.index_params != nullptr) { + zvec::Status status = set_field_index_params(field_schema, &zvec_field); + if (!status.ok()) { + return status; + } + } + + zvec::Status status = collection_schema->add_field(field_schema); + if (!status.ok()) { + return status; + } + } + + return zvec::Status::OK(); +} + +static zvec::Status convert_zvec_field_schema_to_internal( + const ZVecFieldSchema &zvec_field, zvec::FieldSchema::Ptr &field_schema) { + // Validate input + if (!zvec_field.name) { + return zvec::Status::InvalidArgument("Field name cannot be null"); + } + + zvec::DataType data_type = convert_data_type(zvec_field.data_type); + if (data_type == zvec::DataType::UNDEFINED) { + return zvec::Status::InvalidArgument("Invalid data type"); + } + + std::string field_name(zvec_field.name->data, zvec_field.name->length); + bool is_vector_field = check_is_vector_field(zvec_field); + + if (is_vector_field) { + field_schema = std::make_shared( + field_name, data_type, zvec_field.dimension, zvec_field.nullable); + + if (zvec_field.index_params != nullptr) { + switch (zvec_field.index_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto *params = &zvec_field.index_params->params.hnsw_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = std::make_shared( + metric, params->m, params->ef_construction, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto *params = &zvec_field.index_params->params.flat_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = + std::make_shared(metric, quantize); + field_schema->set_index_params(index_params); + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto *params = &zvec_field.index_params->params.ivf_params; + auto metric = convert_metric_type(params->base.metric_type); + auto quantize = convert_quantize_type(params->base.quantize_type); + auto index_params = std::make_shared( + metric, params->n_list, params->n_iters, params->use_soar, + quantize); + field_schema->set_index_params(index_params); + break; + } + default: + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + break; + } + } else { + field_schema->set_index_params( + std::make_shared(zvec::MetricType::L2)); + } + } else { + field_schema = std::make_shared(field_name, data_type, + zvec_field.nullable); + + if (zvec_field.index_params != nullptr && + zvec_field.index_params->index_type == ZVEC_INDEX_TYPE_INVERT) { + auto *params = &zvec_field.index_params->params.invert_params; + auto index_params = std::make_shared( + params->enable_range_optimization, params->enable_extended_wildcard); + field_schema->set_index_params(index_params); + } + } + + return zvec::Status::OK(); +} + +ZVecErrorCode zvec_doc_add_field_by_value(ZVecDoc *doc, const char *field_name, + ZVecDataType data_type, + const void *value, + size_t value_size) { + if (!doc || !field_name || !value) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + std::string name(field_name); + ZVecErrorCode error_code = ZVEC_OK; + + switch (data_type) { + // Scalar types + case ZVEC_DATA_TYPE_BOOL: { + bool val = extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for bool type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_INT32: { + int32_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for int32 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_INT64: { + int64_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for int64 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + uint32_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for uint32 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + uint64_t val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for uint64 type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + float val = extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for float type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + double val = + extract_scalar_value(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for double type"); + return error_code; + } + (*doc_ptr)->set(name, val); + break; + } + + // String and binary types + case ZVEC_DATA_TYPE_STRING: + case ZVEC_DATA_TYPE_BINARY: { + std::string val(static_cast(value), value_size); + (*doc_ptr)->set(name, val); + break; + } + + // Vector types + case ZVEC_DATA_TYPE_VECTOR_FP32: { + auto vec = extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + auto vec = extract_vector_values(value, value_size, + &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp16 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_fp64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_int8 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_int16 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + // INT4 vectors are packed - each byte contains 2 int4 values + size_t count = value_size * 2; + const int8_t *packed_vals = static_cast(value); + std::vector vec; + vec.reserve(count); + + // Unpack int4 values + for (size_t i = 0; i < value_size; ++i) { + int8_t byte_val = packed_vals[i]; + // Extract lower 4 bits + vec.push_back(byte_val & 0x0F); + // Extract upper 4 bits + vec.push_back((byte_val >> 4) & 0x0F); + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + auto vec = + extract_vector_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for vector_binary64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + + // Sparse vector types + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + auto sparse_vec = + extract_sparse_vector(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid sparse vector data size"); + return error_code; + } + (*doc_ptr)->set(name, sparse_vec); + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + auto sparse_vec = extract_sparse_vector( + value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid sparse vector data size"); + return error_code; + } + (*doc_ptr)->set(name, sparse_vec); + break; + } + + // Array types + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + auto vec = extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_bool type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_int32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_int64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_uint32 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + auto vec = + extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_uint64 type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + auto vec = extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_float type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + auto vec = extract_array_values(value, value_size, &error_code); + if (error_code != ZVEC_OK) { + set_last_error("Invalid value size for array_double type"); + return error_code; + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + auto string_array = extract_string_array(value, value_size); + (*doc_ptr)->set(name, string_array); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + auto binary_array = extract_binary_array(value, value_size); + (*doc_ptr)->set(name, binary_array); + break; + } + + default: + set_last_error("Unsupported data type: " + std::to_string(data_type)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to add field: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_add_field_by_struct(ZVecDoc *doc, + const ZVecDocField *field) { + if (!doc || !field) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + + std::string name(field->name.data, field->name.length); + + switch (field->data_type) { + // Scalar basic types + case ZVEC_DATA_TYPE_BOOL: { + (*doc_ptr)->set(name, field->value.bool_value); + break; + } + case ZVEC_DATA_TYPE_INT32: { + (*doc_ptr)->set(name, field->value.int32_value); + break; + } + case ZVEC_DATA_TYPE_INT64: { + (*doc_ptr)->set(name, field->value.int64_value); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + (*doc_ptr)->set(name, field->value.uint32_value); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + (*doc_ptr)->set(name, field->value.uint64_value); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + (*doc_ptr)->set(name, field->value.float_value); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + (*doc_ptr)->set(name, field->value.double_value); + break; + } + + // String and binary types + case ZVEC_DATA_TYPE_STRING: { + std::string val(field->value.string_value.data, + field->value.string_value.length); + (*doc_ptr)->set(name, val); + break; + } + case ZVEC_DATA_TYPE_BINARY: { + std::string val( + reinterpret_cast(field->value.binary_value.data), + field->value.binary_value.length); + (*doc_ptr)->set(name, val); + break; + } + + // Vector types + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + std::vector vec( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + std::vector vec( + field->value.vector_value.data, + field->value.vector_value.data + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + size_t byte_count = (field->value.vector_value.length + 1) / 2; + const int8_t *packed_data = + reinterpret_cast(field->value.vector_value.data); + std::vector vec; + vec.reserve(field->value.vector_value.length); + + for (size_t i = 0; + i < byte_count && vec.size() < field->value.vector_value.length; + ++i) { + int8_t byte_val = packed_data[i]; + // Extract lower 4 bits + vec.push_back(byte_val & 0x0F); + // Extract upper 4 bits + if (vec.size() < field->value.vector_value.length) { + vec.push_back((byte_val >> 4) & 0x0F); + } + } + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + std::vector vec( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + + // Sparse vector types + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + std::vector vec( + reinterpret_cast( + field->value.vector_value.data), + reinterpret_cast( + field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + std::vector vec( + field->value.vector_value.data, + field->value.vector_value.data + field->value.vector_value.length); + (*doc_ptr)->set(name, vec); + break; + } + + // Array types + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + std::vector array_values; + const uint8_t *data_ptr = field->value.binary_value.data; + size_t total_length = field->value.binary_value.length; + size_t offset = 0; + + while (offset + sizeof(uint32_t) <= total_length) { + uint32_t elem_length = + *reinterpret_cast(data_ptr + offset); + offset += sizeof(uint32_t); + + if (offset + elem_length <= total_length) { + std::string elem(reinterpret_cast(data_ptr + offset), + elem_length); + array_values.push_back(elem); + offset += elem_length; + } else { + break; + } + } + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + std::vector array_values; + const char *data_ptr = field->value.string_value.data; + size_t total_length = field->value.string_value.length; + size_t offset = 0; + + while (offset < total_length) { + size_t str_len = strlen(data_ptr + offset); + if (str_len > 0 && offset + str_len <= total_length) { + array_values.emplace_back(data_ptr + offset, str_len); + offset += str_len + 1; + } else { + break; + } + } + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + std::vector array_values( + reinterpret_cast(field->value.binary_value.data), + reinterpret_cast(field->value.binary_value.data) + + field->value.binary_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + std::vector array_values( + field->value.vector_value.data, + field->value.vector_value.data + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + std::vector array_values( + reinterpret_cast(field->value.vector_value.data), + reinterpret_cast(field->value.vector_value.data) + + field->value.vector_value.length); + (*doc_ptr)->set(name, array_values); + break; + } + + default: + set_last_error("Unsupported data type: " + + std::to_string(field->data_type)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to add field: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +const char *zvec_doc_get_pk_pointer(const ZVecDoc *doc) { + if (!doc) return nullptr; + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->pk_ref().data(); +} + +const char *zvec_doc_get_pk_copy(const ZVecDoc *doc) { + if (!doc) return nullptr; + auto doc_ptr = reinterpret_cast *>(doc); + const std::string &pk = (*doc_ptr)->pk_ref(); + if (pk.empty()) return nullptr; + + char *result = new char[pk.length() + 1]; + strcpy(result, pk.c_str()); + return result; +} + +uint64_t zvec_doc_get_doc_id(const ZVecDoc *doc) { + if (!doc) return 0; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->doc_id(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get document ID: ") + e.what()); + return 0; + } +} + +float zvec_doc_get_score(const ZVecDoc *doc) { + if (!doc) return 0.0f; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->score(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get document score: ") + e.what()); + return 0.0f; + } +} + +ZVecDocOperator zvec_doc_get_operator(const ZVecDoc *doc) { + if (!doc) return ZVEC_DOC_OP_INSERT; // default + try { + auto doc_ptr = reinterpret_cast *>(doc); + zvec::Operator op = (*doc_ptr)->get_operator(); + return static_cast(op); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get document operator: ") + e.what()); + return ZVEC_DOC_OP_INSERT; + } +} + +size_t zvec_doc_get_field_count(const ZVecDoc *doc) { + if (!doc) return 0; + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->field_names().size(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get field count: ") + e.what()); + return 0; + } +} + +ZVecErrorCode zvec_doc_get_field_value_basic(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + void *value_buffer, + size_t buffer_size) { + if (!doc || !field_name || !value_buffer) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle basic data types that return values directly + switch (field_type) { + case ZVEC_DATA_TYPE_BOOL: { + if (buffer_size < sizeof(bool)) { + set_last_error("Buffer too small for bool value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const bool val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_INT32: { + if (buffer_size < sizeof(int32_t)) { + set_last_error("Buffer too small for int32 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const int32_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_INT64: { + if (buffer_size < sizeof(int64_t)) { + set_last_error("Buffer too small for int64 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const int64_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_UINT32: { + if (buffer_size < sizeof(uint32_t)) { + set_last_error("Buffer too small for uint32 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const uint32_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_UINT64: { + if (buffer_size < sizeof(uint64_t)) { + set_last_error("Buffer too small for uint64 value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const uint64_t val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + if (buffer_size < sizeof(float)) { + set_last_error("Buffer too small for float value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const float val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + if (buffer_size < sizeof(double)) { + set_last_error("Buffer too small for double value"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + const double val = (*doc_ptr)->get_ref(field_name); + *static_cast(value_buffer) = val; + break; + } + default: { + set_last_error("Data type not supported for basic value return"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_get_field_value_copy(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + void **value, size_t *value_size) { + if (!doc || !field_name || !value || !value_size) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle copy-returning data types (allocate new memory) + switch (field_type) { + // Basic types - copy the actual values + case ZVEC_DATA_TYPE_BOOL: { + const bool val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(bool)); + if (!buffer) { + set_last_error("Memory allocation failed for bool"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(bool); + break; + } + case ZVEC_DATA_TYPE_INT32: { + const int32_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(int32_t)); + if (!buffer) { + set_last_error("Memory allocation failed for int32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_INT64: { + const int64_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(int64_t)); + if (!buffer) { + set_last_error("Memory allocation failed for int64"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + const uint32_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(uint32_t)); + if (!buffer) { + set_last_error("Memory allocation failed for uint32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + const uint64_t val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(uint64_t)); + if (!buffer) { + set_last_error("Memory allocation failed for uint64"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + const float val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(float)); + if (!buffer) { + set_last_error("Memory allocation failed for float"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(float); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + const double val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(sizeof(double)); + if (!buffer) { + set_last_error("Memory allocation failed for double"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + *static_cast(buffer) = val; + *value = buffer; + *value_size = sizeof(double); + break; + } + + // String and binary types - copy the data + case ZVEC_DATA_TYPE_BINARY: + case ZVEC_DATA_TYPE_STRING: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + void *buffer = malloc(val.length()); + if (!buffer) { + set_last_error("Memory allocation failed for string/binary"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), val.length()); + *value = buffer; + *value_size = val.length(); + break; + } + + // Vector types - copy the data + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(uint32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint32 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(uint64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint64 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(zvec::float16_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp16 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(float); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp32 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(double); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for fp64 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: + case ZVEC_DATA_TYPE_VECTOR_INT8: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(int8_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int8 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + size_t total_size = val.size() * sizeof(int16_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int16 vector"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + memcpy(buffer, val.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + + // Sparse vector types - create flattened representation + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + using SparseVecFP16 = + std::pair, std::vector>; + const SparseVecFP16 &sparse_vec = + (*doc_ptr)->get_ref(field_name); + size_t nnz = sparse_vec.first.size(); + size_t total_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(zvec::float16_t)); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for sparse vector FP16"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + *reinterpret_cast(ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.first[i]; + ptr += sizeof(uint32_t); + } + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.second[i]; + ptr += sizeof(zvec::float16_t); + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + using SparseVecFP32 = + std::pair, std::vector>; + const SparseVecFP32 &sparse_vec = + (*doc_ptr)->get_ref(field_name); + size_t nnz = sparse_vec.first.size(); + size_t total_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for sparse vector FP32"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + *reinterpret_cast(ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.first[i]; + ptr += sizeof(uint32_t); + } + for (size_t i = 0; i < nnz; ++i) { + *reinterpret_cast(ptr) = sparse_vec.second[i]; + ptr += sizeof(float); + } + + *value = buffer; + *value_size = total_size; + break; + } + + // Array types - create serialized representations + case ZVEC_DATA_TYPE_ARRAY_BINARY: { + using BinaryArray = std::vector; + const BinaryArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = 0; + for (const auto &bin_val : array_vals) { + total_size += bin_val.length(); + } + + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for binary array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + for (const auto &bin_val : array_vals) { + memcpy(ptr, bin_val.data(), bin_val.length()); + ptr += bin_val.length(); + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + using StringArray = std::vector; + const StringArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = 0; + for (const auto &str_val : array_vals) { + total_size += str_val.length() + 1; // +1 for null terminator + } + + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for string array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + char *ptr = static_cast(buffer); + for (const auto &str_val : array_vals) { + memcpy(ptr, str_val.c_str(), str_val.length()); + ptr += str_val.length(); + *ptr = '\0'; + ptr++; + } + + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + using BoolArray = std::vector; + const BoolArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t byte_count = (array_vals.size() + 7) / 8; + void *buffer = malloc(byte_count); + if (!buffer) { + set_last_error("Memory allocation failed for bool array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + uint8_t *bytes = static_cast(buffer); + memset(bytes, 0, byte_count); + + for (size_t i = 0; i < array_vals.size(); ++i) { + if (array_vals[i]) { + bytes[i / 8] |= (1 << (i % 8)); + } + } + + *value = buffer; + *value_size = byte_count; + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + using Int32Array = std::vector; + const Int32Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(int32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int32 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + using Int64Array = std::vector; + const Int64Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(int64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for int64 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + using UInt32Array = std::vector; + const UInt32Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(uint32_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint32 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + using UInt64Array = std::vector; + const UInt64Array &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(uint64_t); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for uint64 array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + using FloatArray = std::vector; + const FloatArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(float); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for float array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + using DoubleArray = std::vector; + const DoubleArray &array_vals = + (*doc_ptr)->get_ref(field_name); + size_t total_size = array_vals.size() * sizeof(double); + void *buffer = malloc(total_size); + if (!buffer) { + set_last_error("Memory allocation failed for double array"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(buffer, array_vals.data(), total_size); + *value = buffer; + *value_size = total_size; + break; + } + default: { + set_last_error("Unknown data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_get_field_value_pointer(const ZVecDoc *doc, + const char *field_name, + ZVecDataType field_type, + const void **value, + size_t *value_size) { + if (!doc || !field_name || !value || !value_size) { + set_last_error("Invalid arguments: null pointer"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + + // Check if field exists + if (!(*doc_ptr)->has(field_name)) { + set_last_error("Field not found in document"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Get field value based on data type + switch (field_type) { + case ZVEC_DATA_TYPE_BINARY: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + *value = val.data(); + *value_size = val.length(); + break; + } + case ZVEC_DATA_TYPE_STRING: { + const std::string &val = (*doc_ptr)->get_ref(field_name); + *value = val.c_str(); + *value_size = val.length(); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + const bool val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(bool); + break; + } + case ZVEC_DATA_TYPE_INT32: { + const int32_t val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_INT64: { + const int64_t val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + const uint32_t val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + const uint64_t val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + const float val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(float); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + const double val = (*doc_ptr)->get_ref(field_name); + *value = &val; + *value_size = sizeof(double); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + // FP16 vectors typically stored as uint16_t + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(zvec::float16_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(float); + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(double); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT4: { + // INT4 vectors typically stored as int8_t with 2 values per byte + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int8_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int8_t); + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + const std::vector &val = + (*doc_ptr)->get_ref>(field_name); + *value = val.data(); + *value_size = val.size() * sizeof(int16_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(int32_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(int64_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(uint32_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + auto &array_vals = + (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(uint64_t); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + auto &array_vals = (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(float); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + auto &array_vals = (*doc_ptr)->get_ref>(field_name); + *value = array_vals.data(); + *value_size = array_vals.size() * sizeof(double); + break; + } + default: { + set_last_error("Unknown data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +bool zvec_doc_is_empty(const ZVecDoc *doc) { + if (!doc) { + set_last_error("Document pointer is null"); + return true; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->is_empty(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to check if document is empty: ") + + e.what()); + return true; + } +} + +ZVecErrorCode zvec_doc_remove_field(ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + (*doc_ptr)->remove(std::string(field_name)); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to remove field: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + + +bool zvec_doc_has_field(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->has(std::string(field_name)); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to check field existence: ") + e.what()); + return false; + } +} + +bool zvec_doc_has_field_value(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->has_value(std::string(field_name)); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to check field value existence: ") + + e.what()); + return false; + } +} + +bool zvec_doc_is_field_null(const ZVecDoc *doc, const char *field_name) { + if (!doc || !field_name) { + set_last_error("Document pointer or field name is null"); + return false; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->is_null(std::string(field_name)); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to check if field is null: ") + + e.what()); + return false; + } +} + +ZVecErrorCode zvec_doc_get_field_names(const ZVecDoc *doc, char ***field_names, + size_t *count) { + if (!doc || !field_names || !count) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + std::vector names = (*doc_ptr)->field_names(); + + *count = names.size(); + if (*count == 0) { + *field_names = nullptr; + return ZVEC_OK; + } + + *field_names = static_cast(malloc(*count * sizeof(char *))); + if (!*field_names) { + set_last_error("Failed to allocate memory for field names"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *count; ++i) { + (*field_names)[i] = copy_string(names[i]); + if (!(*field_names)[i]) { + for (size_t j = 0; j < i; ++j) { + free((*field_names)[j]); + } + free(*field_names); + *field_names = nullptr; + set_last_error("Failed to copy field name"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get field names: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_serialize(const ZVecDoc *doc, uint8_t **data, + size_t *size) { + if (!doc || !data || !size) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + std::vector serialized_data = (*doc_ptr)->serialize(); + + *size = serialized_data.size(); + if (*size == 0) { + *data = nullptr; + return ZVEC_OK; + } + + *data = static_cast(malloc(*size)); + if (!*data) { + set_last_error("Failed to allocate memory for serialized data"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + memcpy(*data, serialized_data.data(), *size); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to serialize document: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_deserialize(const uint8_t *data, size_t size, + ZVecDoc **doc) { + if (!data || !doc || size == 0) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto deserialized_doc = zvec::Doc::deserialize(data, size); + if (!deserialized_doc) { + set_last_error("Failed to deserialize document"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + auto doc_ptr = new std::shared_ptr(deserialized_doc); + *doc = reinterpret_cast(doc_ptr); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to deserialize document: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +void zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other) { + if (!doc || !other) { + set_last_error("Document pointers are null"); + return; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + auto other_ptr = + reinterpret_cast *>(other); + (*doc_ptr)->merge(**other_ptr); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to merge documents: ") + e.what()); + } +} + +size_t zvec_doc_memory_usage(const ZVecDoc *doc) { + if (!doc) { + set_last_error("Document pointer is null"); + return 0; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + return (*doc_ptr)->memory_usage(); + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get document memory usage: ") + + e.what()); + return 0; + } +} + +ZVecErrorCode zvec_doc_validate(const ZVecDoc *doc, + const ZVecCollectionSchema *schema, + bool is_update, char **error_msg) { + if (!doc || !schema) { + set_last_error("Document or schema pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + std::shared_ptr schema_ptr = nullptr; + auto status = + convert_zvec_collection_schema_to_internal(schema, schema_ptr); + if (!status.ok()) { + if (error_msg) { + *error_msg = copy_string(status.message()); + } + return status_to_error_code(status); + } + + auto doc_ptr = reinterpret_cast *>(doc); + status = (*doc_ptr)->validate(schema_ptr, is_update); + if (!status.ok()) { + if (error_msg) { + *error_msg = copy_string(status.message()); + } + return status_to_error_code(status); + } + + if (error_msg) { + *error_msg = nullptr; + } + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to validate document: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str) { + if (!doc || !detail_str) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto doc_ptr = reinterpret_cast *>(doc); + std::string detail = (*doc_ptr)->to_detail_string(); + *detail_str = copy_string(detail); + + if (!*detail_str && !detail.empty()) { + set_last_error("Failed to copy detail string"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get document detail string: ") + + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +// ============================================================================= +// Collection functions implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_create_and_open( + const char *path, const ZVecCollectionSchema *schema, + const ZVecCollectionOptions *options, ZVecCollection **collection) { + try { + if (!path || !schema || !collection) { + set_last_error("Path, schema, or collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + std::shared_ptr schema_ptr = nullptr; + auto status = + convert_zvec_collection_schema_to_internal(schema, schema_ptr); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + zvec::CollectionOptions collection_options; + if (options) { + collection_options.enable_mmap_ = options->enable_mmap; + collection_options.max_buffer_size_ = options->max_buffer_size; + collection_options.read_only_ = options->read_only; + } + + auto result = + zvec::Collection::CreateAndOpen(path, *schema_ptr, collection_options); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *collection = reinterpret_cast( + new std::shared_ptr(std::move(result.value()))); + } + + return error_code; + } catch (const std::exception &e) { + set_last_error( + std::string( + "Exception in zvec_collection_create_and_open_with_schema: ") + + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_open(const char *path, + const ZVecCollectionOptions *options, + ZVecCollection **collection) { + if (!path || !collection) { + set_last_error("Invalid arguments: path and collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + zvec::CollectionOptions collection_options; + if (options) { + collection_options.enable_mmap_ = options->enable_mmap; + collection_options.max_buffer_size_ = options->max_buffer_size; + collection_options.read_only_ = options->read_only; + } + + auto result = zvec::Collection::Open(path, collection_options); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *collection = reinterpret_cast( + new std::shared_ptr(std::move(result.value()))); + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_close(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + delete reinterpret_cast *>(collection); + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_destroy(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto &coll = + *reinterpret_cast *>(collection); + zvec::Status status = coll->Destroy(); + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_flush(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto &coll = + *reinterpret_cast *>(collection); + zvec::Status status = coll->Flush(); + + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_get_path(const ZVecCollection *collection, + char **path) { + if (!collection || !path) { + set_last_error("Invalid arguments: collection and path cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto &coll = *reinterpret_cast *>( + collection); + auto result = coll->Path(); + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + *path = copy_string(result.value()); + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_get_name(const ZVecCollection *collection, + char **name) { + if (!collection || !name) { + set_last_error("Invalid arguments: collection and name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto &coll = *reinterpret_cast *>( + collection); + auto result = coll->Schema(); + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + *name = copy_string(result.value().name()); + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_get_schema(const ZVecCollection *collection, + ZVecCollectionSchema **schema) { + if (!collection || !schema) { + set_last_error("Invalid arguments: collection and schema cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto &coll = *reinterpret_cast *>( + collection); + auto result = coll->Schema(); + + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + const auto &cpp_schema = result.value(); + + // Create new schema structure + ZVecCollectionSchema *c_schema = new ZVecCollectionSchema(); + if (!c_schema) { + set_last_error("Failed to allocate memory for schema"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Initialize the schema structure + c_schema->name = nullptr; + c_schema->fields = nullptr; + c_schema->field_count = 0; + c_schema->field_capacity = 0; + c_schema->max_doc_count_per_segment = + cpp_schema.max_doc_count_per_segment(); + + // Set collection name + c_schema->name = zvec_string_create(cpp_schema.name().c_str()); + if (!c_schema->name) { + delete c_schema; + set_last_error("Failed to allocate memory for collection name"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Convert and copy fields + const auto &cpp_fields = cpp_schema.fields(); + c_schema->field_count = cpp_fields.size(); + c_schema->field_capacity = cpp_fields.size(); + + if (c_schema->field_count > 0) { + // Allocate array of field pointers + c_schema->fields = new ZVecFieldSchema *[c_schema->field_count]; + if (!c_schema->fields) { + zvec_collection_schema_destroy(c_schema); + set_last_error("Failed to allocate memory for fields"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + // Initialize all field pointers to nullptr + for (size_t i = 0; i < c_schema->field_count; ++i) { + c_schema->fields[i] = nullptr; + } + + size_t i = 0; + for (const auto &cpp_field : cpp_fields) { + try { + // Create new field schema + c_schema->fields[i] = new ZVecFieldSchema(); + + // Copy field name using zvec_string_create + c_schema->fields[i]->name = + zvec_string_create(cpp_field->name().c_str()); + if (!c_schema->fields[i]->name) { + throw std::bad_alloc(); + } + + // Convert data type + c_schema->fields[i]->data_type = + convert_zvec_data_type(cpp_field->data_type()); + + // Copy dimension for vector fields + c_schema->fields[i]->dimension = cpp_field->dimension(); + + // Copy nullable flag + c_schema->fields[i]->nullable = cpp_field->nullable(); + + // Initialize index parameters + c_schema->fields[i]->index_params = nullptr; + + // Convert index parameters based on the actual type + auto index_params = cpp_field->index_params(); + if (index_params) { + switch (index_params->type()) { + case zvec::IndexType::HNSW: { + // Cast to HnswIndexParams and convert + auto hnsw_params = + std::dynamic_pointer_cast( + index_params); + if (hnsw_params) { + auto c_hnsw_params = new ZVecHnswIndexParams(); + if (!c_hnsw_params) { + throw std::bad_alloc(); + } + + // Initialize the base vector index parameters + c_hnsw_params->base.base.index_type = ZVEC_INDEX_TYPE_HNSW; + c_hnsw_params->base.metric_type = + static_cast(hnsw_params->metric_type()); + c_hnsw_params->base.quantize_type = + static_cast( + hnsw_params->quantize_type()); + + // Set HNSW-specific parameters + c_hnsw_params->m = hnsw_params->m(); + c_hnsw_params->ef_construction = + hnsw_params->ef_construction(); + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_hnsw_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_HNSW; + } + break; + } + + case zvec::IndexType::IVF: { + // Cast to IVFIndexParams and convert + auto ivf_params = + std::dynamic_pointer_cast( + index_params); + if (ivf_params) { + auto c_ivf_params = new ZVecIVFIndexParams(); + if (!c_ivf_params) { + throw std::bad_alloc(); + } + + // Initialize the base vector index parameters + c_ivf_params->base.base.index_type = ZVEC_INDEX_TYPE_IVF; + c_ivf_params->base.metric_type = + static_cast(ivf_params->metric_type()); + c_ivf_params->base.quantize_type = + static_cast( + ivf_params->quantize_type()); + + // Set IVF-specific parameters + c_ivf_params->n_list = ivf_params->n_list(); + c_ivf_params->n_iters = ivf_params->n_iters(); + c_ivf_params->use_soar = ivf_params->use_soar(); + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_ivf_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_IVF; + } + break; + } + + case zvec::IndexType::FLAT: { + // Cast to FlatIndexParams and convert + auto flat_params = + std::dynamic_pointer_cast( + index_params); + if (flat_params) { + auto c_flat_params = new ZVecFlatIndexParams(); + if (!c_flat_params) { + throw std::bad_alloc(); + } + + // Initialize the base vector index parameters + c_flat_params->base.base.index_type = ZVEC_INDEX_TYPE_FLAT; + c_flat_params->base.metric_type = + static_cast(flat_params->metric_type()); + c_flat_params->base.quantize_type = + static_cast( + flat_params->quantize_type()); + + // Flat index has no additional parameters + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_flat_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_FLAT; + } + break; + } + + case zvec::IndexType::INVERT: { + // Cast to InvertIndexParams and convert + auto invert_params = + std::dynamic_pointer_cast( + index_params); + if (invert_params) { + auto c_invert_params = new ZVecInvertIndexParams(); + if (!c_invert_params) { + throw std::bad_alloc(); + } + + // Initialize the base index parameters + c_invert_params->base.index_type = ZVEC_INDEX_TYPE_INVERT; + + // Set Invert-specific parameters + c_invert_params->enable_range_optimization = + invert_params->enable_range_optimization(); + c_invert_params->enable_extended_wildcard = + invert_params->enable_extended_wildcard(); + + // Assign to field schema (using pointer assignment) + c_schema->fields[i]->index_params = + reinterpret_cast(c_invert_params); + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_INVERT; + } + break; + } + + default: + // For undefined or unsupported index types, set to NULL + c_schema->fields[i]->index_params = nullptr; + c_schema->fields[i]->index_params->index_type = + ZVEC_INDEX_TYPE_UNDEFINED; + break; + } + } else { + // No index parameters, set to NULL + c_schema->fields[i]->index_params = nullptr; + } + } catch (const std::bad_alloc &) { + // Clean up already allocated fields + for (size_t j = 0; j <= i; ++j) { + if (c_schema->fields[j]) { + zvec_field_schema_destroy(c_schema->fields[j]); + } + } + delete[] c_schema->fields; + zvec_free_string(c_schema->name); + delete c_schema; + set_last_error("Failed to allocate memory for field"); + return ZVEC_ERROR_RESOURCE_EXHAUSTED; + } + + ++i; + } + } + + *schema = c_schema; + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_get_options(const ZVecCollection *collection, + ZVecCollectionOptions **options) { + if (!collection || !options) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto collection_ptr = + reinterpret_cast *>(collection); + auto result = (*collection_ptr)->Options(); + + if (!result.has_value()) { + set_last_error("Failed to get collection option: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // 创建并初始化选项结构体 + *options = new ZVecCollectionOptions(); + + (*options)->enable_mmap = result.value().enable_mmap_; + (*options)->max_buffer_size = result.value().max_buffer_size_; + (*options)->read_only = result.value().read_only_; + (*options)->max_doc_count_per_segment = zvec::MAX_DOC_COUNT_PER_SEGMENT; + + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get collection options: ") + + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_get_stats(const ZVecCollection *collection, + ZVecCollectionStats **stats) { + if (!collection || !stats) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto collection_ptr = + reinterpret_cast *>(collection); + auto result = (*collection_ptr)->Stats(); + + if (!result.has_value()) { + set_last_error("Failed to get collection stats: " + + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + *stats = new ZVecCollectionStats(); + ZVecErrorCode error_code = handle_expected_result(result); + if (error_code == ZVEC_OK) { + (*stats)->doc_count = result.value().doc_count; + (*stats)->index_count = result.value().index_completeness.size(); + if ((*stats)->index_count > 0) { + (*stats)->index_completeness = + static_cast(malloc((*stats)->index_count * sizeof(float))); + (*stats)->index_names = static_cast( + malloc((*stats)->index_count * sizeof(ZVecString *))); + int i = 0; + for (auto &[name, completeness] : result.value().index_completeness) { + (*stats)->index_completeness[i] = completeness; + (*stats)->index_names[i] = zvec_string_create(name.c_str()); + i++; + } + } + } else { + (*stats)->index_completeness = nullptr; + *(*stats)->index_names = nullptr; + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to get detailed collection stats: ") + + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +void zvec_collection_stats_destroy(ZVecCollectionStats *stats) { + if (stats) { + if (stats->index_names) { + for (size_t i = 0; i < stats->index_count; ++i) { + zvec_free_string(stats->index_names[i]); + } + free(stats->index_names); + } + + if (stats->index_completeness) { + free(stats->index_completeness); + } + + delete stats; + } +} + +// ============================================================================= +// QueryParams functions implementation +// ============================================================================= + +ZVecQueryParams *zvec_query_params_create(ZVecIndexType index_type) { + try { + auto params = new ZVecQueryParams(); + params->index_type = index_type; + params->radius = 0.0f; + params->is_linear = false; + params->is_using_refiner = false; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create query params: ") + e.what()); + return nullptr; + } +} + +ZVecHnswQueryParams *zvec_query_params_hnsw_create(ZVecIndexType index_type, + int ef, float radius, + bool is_linear, + bool is_using_refiner) { + try { + auto params = new ZVecHnswQueryParams(); + params->base.index_type = index_type; + params->base.radius = radius; + params->base.is_linear = is_linear; + params->base.is_using_refiner = is_using_refiner; + params->ef = ef; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create HNSW query params: ") + + e.what()); + return nullptr; + } +} + +ZVecIVFQueryParams *zvec_query_params_ivf_create(ZVecIndexType index_type, + int nprobe, + bool is_using_refiner, + float scale_factor) { + try { + auto params = new ZVecIVFQueryParams(); + params->base.index_type = index_type; + params->base.is_using_refiner = is_using_refiner; + params->nprobe = nprobe; + params->scale_factor = scale_factor; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create IVF query params: ") + + e.what()); + return nullptr; + } +} + +ZVecFlatQueryParams *zvec_query_params_flat_create(ZVecIndexType index_type, + bool is_using_refiner, + float scale_factor) { + try { + auto params = new ZVecFlatQueryParams(); + params->base.index_type = index_type; + params->base.is_using_refiner = is_using_refiner; + params->scale_factor = scale_factor; + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create Flat query params: ") + + e.what()); + return nullptr; + } +} + +ZVecQueryParamsUnion *zvec_query_params_union_create(ZVecIndexType index_type) { + try { + auto params = new ZVecQueryParamsUnion(); + params->index_type = index_type; + + switch (index_type) { + case ZVEC_INDEX_TYPE_HNSW: + params->params.hnsw_params.base.index_type = index_type; + params->params.hnsw_params.ef = + zvec::core_interface::kDefaultHnswEfSearch; + break; + case ZVEC_INDEX_TYPE_IVF: + params->params.ivf_params.base.index_type = index_type; + params->params.ivf_params.nprobe = 10; + params->params.ivf_params.scale_factor = 10.0f; + break; + case ZVEC_INDEX_TYPE_FLAT: + params->params.flat_params.base.index_type = index_type; + params->params.flat_params.scale_factor = 10.0f; + break; + default: + params->params.base_params.index_type = index_type; + break; + } + + return params; + } catch (const std::exception &e) { + set_last_error(std::string("Failed to create query params union: ") + + e.what()); + return nullptr; + } +} + +void zvec_query_params_destroy(ZVecQueryParams *params) { + if (params) { + delete params; + } +} + +void zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params) { + if (params) { + delete params; + } +} + +void zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params) { + if (params) { + delete params; + } +} + +void zvec_query_params_flat_destroy(ZVecFlatQueryParams *params) { + if (params) { + delete params; + } +} + +void zvec_query_params_union_destroy(ZVecQueryParamsUnion *params) { + if (params) { + delete params; + } +} + +ZVecErrorCode zvec_query_params_set_index_type(ZVecQueryParams *params, + ZVecIndexType index_type) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->index_type = index_type; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_radius(ZVecQueryParams *params, + float radius) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->radius = radius; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_is_linear(ZVecQueryParams *params, + bool is_linear) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->is_linear = is_linear; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_set_is_using_refiner(ZVecQueryParams *params, + bool is_using_refiner) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->is_using_refiner = is_using_refiner; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, + int ef) { + if (!params) { + set_last_error("HNSW query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->ef = ef; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, + int nprobe) { + if (!params) { + set_last_error("IVF query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->nprobe = nprobe; + return ZVEC_OK; +} + +ZVecErrorCode zvec_query_params_ivf_set_scale_factor(ZVecIVFQueryParams *params, + float scale_factor) { + if (!params) { + set_last_error("Query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + params->scale_factor = scale_factor; + return ZVEC_OK; +} + + +// ============================================================================= +// Index Interface Implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_create_index( + ZVecCollection *collection, const char *column_name, + const ZVecIndexParams *index_params) { + if (!collection || !column_name || !index_params) { + set_last_error( + "Invalid arguments: collection, column_name, and index_params cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + std::string field_name_str(column_name); + + switch (index_params->index_type) { + case ZVEC_INDEX_TYPE_INVERT: { + const ZVecInvertIndexParams *invert_params = + &index_params->params.invert_params; + auto cpp_params = std::make_shared( + invert_params->enable_range_optimization, + invert_params->enable_extended_wildcard); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_HNSW: { + const ZVecHnswIndexParams *hnsw_params = + &index_params->params.hnsw_params; + auto metric = convert_metric_type(hnsw_params->base.metric_type); + auto quantize = convert_quantize_type(hnsw_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, hnsw_params->m, hnsw_params->ef_construction, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_FLAT: { + const ZVecFlatIndexParams *flat_params = + &index_params->params.flat_params; + auto metric = convert_metric_type(flat_params->base.metric_type); + auto quantize = convert_quantize_type(flat_params->base.quantize_type); + auto cpp_params = + std::make_shared(metric, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_IVF: { + const ZVecIVFIndexParams *ivf_params = &index_params->params.ivf_params; + auto metric = convert_metric_type(ivf_params->base.metric_type); + auto quantize = convert_quantize_type(ivf_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, ivf_params->n_list, ivf_params->n_iters, + ivf_params->use_soar, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + default: { + set_last_error("Unsupported index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + } catch (const std::exception &e) { + set_last_error(std::string("Exception in zvec_collection_create_index: ") + + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_create_index_with_params( + ZVecCollection *collection, const ZVecString *field_name, + const void *index_params) { + if (!collection || !field_name || !index_params) { + set_last_error("Invalid arguments"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + auto coll_ptr = + reinterpret_cast *>(collection); + std::string field_name_str(field_name->data, field_name->length); + + const ZVecBaseIndexParams *base_params = + static_cast(index_params); + + try { + switch (base_params->index_type) { + case ZVEC_INDEX_TYPE_INVERT: { + const ZVecInvertIndexParams *invert_params = + static_cast(index_params); + auto cpp_params = std::make_shared( + invert_params->enable_range_optimization, + invert_params->enable_extended_wildcard); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_HNSW: { + const ZVecHnswIndexParams *hnsw_params = + static_cast(index_params); + auto metric = convert_metric_type(hnsw_params->base.metric_type); + auto quantize = convert_quantize_type(hnsw_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, hnsw_params->m, hnsw_params->ef_construction, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_FLAT: { + const ZVecFlatIndexParams *flat_params = + static_cast(index_params); + auto metric = convert_metric_type(flat_params->base.metric_type); + auto quantize = convert_quantize_type(flat_params->base.quantize_type); + auto cpp_params = + std::make_shared(metric, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + case ZVEC_INDEX_TYPE_IVF: { + const ZVecIVFIndexParams *ivf_params = + static_cast(index_params); + auto metric = convert_metric_type(ivf_params->base.metric_type); + auto quantize = convert_quantize_type(ivf_params->base.quantize_type); + auto cpp_params = std::make_shared( + metric, ivf_params->n_list, ivf_params->n_iters, + ivf_params->use_soar, quantize); + auto status = (*coll_ptr)->CreateIndex(field_name_str, cpp_params); + return status_to_error_code(status); + } + + default: { + set_last_error("Unsupported index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + } catch (const std::exception &e) { + set_last_error(e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_create_hnsw_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecHnswIndexParams *hnsw_params) { + if (!hnsw_params) { + set_last_error("Invalid HNSW parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + hnsw_params); +} + +ZVecErrorCode zvec_collection_create_flat_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecFlatIndexParams *flat_params) { + if (!flat_params) { + set_last_error("Invalid Flat parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + flat_params); +} + +ZVecErrorCode zvec_collection_create_ivf_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecIVFIndexParams *ivf_params) { + if (!ivf_params) { + set_last_error("Invalid IVF parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + ivf_params); +} + +ZVecErrorCode zvec_collection_create_invert_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecInvertIndexParams *invert_params) { + if (!invert_params) { + set_last_error("Invalid Invert parameters"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + return zvec_collection_create_index_with_params(collection, field_name, + invert_params); +} + +ZVecErrorCode zvec_collection_drop_index(ZVecCollection *collection, + const char *column_name) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->DropIndex(column_name); + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_optimize(ZVecCollection *collection) { + if (!collection) { + set_last_error("Invalid argument: collection cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->Optimize(); + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + + +// ============================================================================= +// Column Interface Implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_add_column(ZVecCollection *collection, + const ZVecFieldSchema *field_schema, + const char *expression) { + if (!collection || !field_schema) { + set_last_error( + "Invalid arguments: collection and field_schema cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + zvec::DataType data_type = convert_data_type(field_schema->data_type); + if (data_type == zvec::DataType::UNDEFINED) { + set_last_error("Invalid data type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + std::string field_name(field_schema->name->data, + field_schema->name->length); + bool is_vector_field = check_is_vector_field(*field_schema); + zvec::FieldSchema::Ptr schema; + if (is_vector_field) { + schema = std::make_shared(field_name, data_type, + field_schema->dimension, + field_schema->nullable); + } else { + schema = std::make_shared(field_name, data_type, + field_schema->nullable); + } + + std::string expr = expression ? expression : ""; + zvec::Status status = (*coll_ptr)->AddColumn(schema, expr); + + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_drop_column(ZVecCollection *collection, + const char *column_name) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + zvec::Status status = (*coll_ptr)->DropColumn(column_name); + + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_alter_column(ZVecCollection *collection, + const char *column_name, + const char *new_name, + const ZVecFieldSchema *new_schema) { + if (!collection || !column_name) { + set_last_error( + "Invalid arguments: collection and column_name cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + std::string rename = new_name ? new_name : ""; + + zvec::FieldSchema::Ptr schema = nullptr; + if (new_schema) { + auto status = convert_zvec_field_schema_to_internal(*new_schema, schema); + if (!status.ok()) { + set_last_error(status.message()); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + zvec::Status status = (*coll_ptr)->AlterColumn(column_name, rename, schema); + if (!status.ok()) { + set_last_error(status.message()); + } + + return status_to_error_code(status); + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +// ============================================================================= +// DML Interface Implementation +// ============================================================================= + +ZVecErrorCode zvec_collection_insert(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Insert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } else { + *success_count = 0; + *error_count = doc_count; + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception in zvec_collection_insert_docs: ") + + e.what()); + *success_count = 0; + *error_count = doc_count; + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_update(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Update(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + + +ZVecErrorCode zvec_collection_upsert(ZVecCollection *collection, + const ZVecDoc **docs, size_t doc_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !docs || doc_count == 0 || !success_count || + !error_count) { + set_last_error( + "Invalid arguments: collection, docs, doc_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector internal_docs = + convert_zvec_docs_to_internal(docs, doc_count); + + auto result = (*coll_ptr)->Upsert(internal_docs); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_delete(ZVecCollection *collection, + const char *const *pks, size_t pk_count, + size_t *success_count, + size_t *error_count) { + if (!collection || !pks || pk_count == 0 || !success_count || !error_count) { + set_last_error( + "Invalid arguments: collection, pks, pk_count, success_count and " + "error_count cannot be null/zero"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + std::vector primary_keys; + primary_keys.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + primary_keys.emplace_back(pks[i]); + } + } + + auto result = (*coll_ptr)->Delete(primary_keys); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + *success_count = 0; + *error_count = 0; + for (const auto &status : result.value()) { + if (status.ok()) { + (*success_count)++; + } else { + (*error_count)++; + } + } + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_delete_by_filter(ZVecCollection *collection, + const char *filter) { + if (!collection || !filter) { + set_last_error("Invalid arguments: collection,filter cannot be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + auto status = (*coll_ptr)->DeleteByFilter(filter); + if (!status.ok()) { + set_last_error(status.message()); + return status_to_error_code(status); + } + return ZVEC_OK; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + + +// ============================================================================= +// Data query interface implementation +// ============================================================================= + + +// Helper function to convert common query parameters +void convert_common_query_params(zvec::VectorQuery &internal_query, + const ZVecVectorQuery *query) { + internal_query.topk_ = query->topk; + internal_query.field_name_ = + std::string(query->field_name.data, query->field_name.length); + internal_query.filter_ = + std::string(query->filter.data, query->filter.length); + internal_query.include_vector_ = query->include_vector; + internal_query.include_doc_id_ = query->include_doc_id; + + // Binary data conversion (query_vector) + if (query->query_vector.data && query->query_vector.length > 0) { + internal_query.query_vector_.assign( + reinterpret_cast(query->query_vector.data), + query->query_vector.length); + } + + // Sparse vector data conversion + if (query->query_sparse_indices.data && + query->query_sparse_indices.length > 0) { + internal_query.query_sparse_indices_.assign( + reinterpret_cast(query->query_sparse_indices.data), + query->query_sparse_indices.length); + } + + if (query->query_sparse_values.data && + query->query_sparse_values.length > 0) { + internal_query.query_sparse_values_.assign( + reinterpret_cast(query->query_sparse_values.data), + query->query_sparse_values.length); + } + + // Output fields conversion + if (query->output_fields && query->output_fields->count > 0) { + internal_query.output_fields_ = std::vector(); + for (size_t i = 0; i < query->output_fields->count; ++i) { + internal_query.output_fields_->emplace_back( + query->output_fields->strings[i].data, + query->output_fields->strings[i].length); + } + } +} + +// Helper function to convert query parameters +void convert_query_params(zvec::VectorQuery &internal_query, + const ZVecVectorQuery *query) { + convert_common_query_params(internal_query, query); + + // QueryParams conversion + if (query->query_params) { + auto query_params = std::make_shared( + static_cast(query->query_params->index_type)); + + switch (query->query_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto hnsw_params = std::make_shared( + query->query_params->params.hnsw_params.ef, + query->query_params->params.hnsw_params.base.radius, + query->query_params->params.hnsw_params.base.is_linear, + query->query_params->params.hnsw_params.base.is_using_refiner); + internal_query.query_params_ = hnsw_params; + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto ivf_params = std::make_shared( + query->query_params->params.ivf_params.nprobe, + query->query_params->params.ivf_params.base.is_using_refiner, + query->query_params->params.ivf_params.scale_factor); + internal_query.query_params_ = ivf_params; + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = std::make_shared( + query->query_params->params.flat_params.base.is_using_refiner, + query->query_params->params.flat_params.scale_factor); + internal_query.query_params_ = flat_params; + break; + } + default: { + query_params->set_radius( + query->query_params->params.base_params.radius); + query_params->set_is_linear( + query->query_params->params.base_params.is_linear); + query_params->set_is_using_refiner( + query->query_params->params.base_params.is_using_refiner); + internal_query.query_params_ = query_params; + break; + } + } + } +} + +// Helper function to convert group by query parameters +void convert_groupby_query_params(zvec::GroupByVectorQuery &internal_query, + const ZVecGroupByVectorQuery *query) { + internal_query.field_name_ = + std::string(query->field_name.data, query->field_name.length); + internal_query.filter_ = + std::string(query->filter.data, query->filter.length); + internal_query.include_vector_ = query->include_vector; + internal_query.group_by_field_name_ = std::string( + query->group_by_field_name.data, query->group_by_field_name.length); + internal_query.group_count_ = query->group_count; + internal_query.group_topk_ = query->group_topk; + + if (query->query_vector.data && query->query_vector.length > 0) { + internal_query.query_vector_.assign( + reinterpret_cast(query->query_vector.data), + query->query_vector.length); + } + + if (query->query_sparse_indices.data && + query->query_sparse_indices.length > 0) { + internal_query.query_sparse_indices_.assign( + reinterpret_cast(query->query_sparse_indices.data), + query->query_sparse_indices.length); + } + + if (query->query_sparse_values.data && + query->query_sparse_values.length > 0) { + internal_query.query_sparse_values_.assign( + reinterpret_cast(query->query_sparse_values.data), + query->query_sparse_values.length); + } + + if (query->output_fields && query->output_fields->count > 0) { + if (!internal_query.output_fields_.has_value()) { + internal_query.output_fields_ = std::vector(); + } + for (size_t i = 0; i < query->output_fields->count; ++i) { + internal_query.output_fields_->push_back( + std::string(query->output_fields->strings[i].data, + query->output_fields->strings[i].length)); + } + } + + if (query->query_params) { + auto query_params = std::make_shared( + static_cast(query->query_params->index_type)); + + switch (query->query_params->index_type) { + case ZVEC_INDEX_TYPE_HNSW: { + auto hnsw_params = std::make_shared( + query->query_params->params.hnsw_params.ef, + query->query_params->params.hnsw_params.base.radius, + query->query_params->params.hnsw_params.base.is_linear, + query->query_params->params.hnsw_params.base.is_using_refiner); + internal_query.query_params_ = hnsw_params; + break; + } + case ZVEC_INDEX_TYPE_IVF: { + auto ivf_params = std::make_shared( + query->query_params->params.ivf_params.nprobe, + query->query_params->params.ivf_params.base.is_using_refiner, + query->query_params->params.ivf_params.scale_factor); + internal_query.query_params_ = ivf_params; + break; + } + case ZVEC_INDEX_TYPE_FLAT: { + auto flat_params = std::make_shared( + query->query_params->params.flat_params.base.is_using_refiner, + query->query_params->params.flat_params.scale_factor); + internal_query.query_params_ = flat_params; + break; + } + default: { + query_params->set_radius( + query->query_params->params.base_params.radius); + query_params->set_is_linear( + query->query_params->params.base_params.is_linear); + query_params->set_is_using_refiner( + query->query_params->params.base_params.is_using_refiner); + internal_query.query_params_ = query_params; + break; + } + } + } +} + +// Helper function to convert document results to C API format +ZVecErrorCode convert_document_results( + const std::vector> &query_results, + ZVecDoc ***results, size_t *result_count) { + *result_count = query_results.size(); + *results = static_cast(malloc(*result_count * sizeof(ZVecDoc *))); + + if (!*results) { + set_last_error("Failed to allocate memory for query results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + for (size_t i = 0; i < *result_count; ++i) { + const auto &internal_doc = query_results[i]; + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < i; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto doc_ptr = reinterpret_cast *>(c_doc); + *(*doc_ptr) = *internal_doc; // Copy assignment + (*results)[i] = c_doc; // Store the pointer, not dereference + } + + return ZVEC_OK; +} + +// Helper function to convert grouped document results to C API format +ZVecErrorCode convert_grouped_document_results( + const std::vector &group_results, ZVecDoc ***results, + ZVecString ***group_by_values, size_t *result_count) { + // Calculate total document count across all groups + size_t total_docs = 0; + for (const auto &group_result : group_results) { + total_docs += group_result.docs_.size(); + } + + // Allocate memory for document pointers and group by values + *result_count = total_docs; + *results = static_cast(malloc(*result_count * sizeof(ZVecDoc *))); + *group_by_values = static_cast( + malloc(group_results.size() * sizeof(ZVecString *))); + + if (!*results) { + set_last_error("Failed to allocate memory for query results"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Convert C++ grouped results to C API format + size_t doc_index = 0; + for (const auto &group_result : group_results) { + for (const auto &internal_doc : group_result.docs_) { + if (doc_index >= *result_count) { + break; + } + + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < doc_index; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto doc_ptr = + reinterpret_cast *>(c_doc); + *(*doc_ptr) = internal_doc; // Copy assignment + + ZVecString *c_group_value = + zvec_string_create(group_result.group_by_value_.c_str()); + if (!c_group_value) { + for (size_t j = 0; j < doc_index; ++j) { + zvec_doc_destroy((*results)[j]); + zvec_free_string((*group_by_values)[doc_index]); + } + free(*results); + *results = nullptr; + *result_count = 0; + set_last_error("Failed to create string wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + (*group_by_values)[doc_index] = c_group_value; + (*results)[doc_index] = c_doc; + ++doc_index; + } + } + + return ZVEC_OK; +} + +// Helper function to convert fetched document results to C API format +ZVecErrorCode convert_fetched_document_results(const zvec::DocPtrMap &doc_map, + ZVecDoc ***results, + size_t *doc_count) { + // Calculate actual document count (some PKs might not exist) + size_t actual_count = 0; + for (const auto &[pk, doc_ptr] : doc_map) { + if (doc_ptr) { + actual_count++; + } + } + + // Allocate memory for document pointers + *doc_count = actual_count; + if (*doc_count == 0) { + *results = nullptr; + return ZVEC_OK; + } + + *results = static_cast(malloc(*doc_count * sizeof(ZVecDoc *))); + if (!*results) { + set_last_error("Failed to allocate memory for document pointers"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Convert C++ DocPtrMap to C ZVecDoc pointer array + size_t index = 0; + for (const auto &[pk, doc_ptr] : doc_map) { + if (doc_ptr && index < *doc_count) { + // Create new document wrapper + ZVecDoc *c_doc = zvec_doc_create(); + if (!c_doc) { + // Clean up previously allocated documents + for (size_t j = 0; j < index; ++j) { + zvec_doc_destroy((*results)[j]); + } + free(*results); + *results = nullptr; + *doc_count = 0; + set_last_error("Failed to create document wrapper"); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + // Copy the C++ document to our wrapper + auto cpp_doc_ptr = reinterpret_cast *>(c_doc); + *(*cpp_doc_ptr) = *doc_ptr; // Copy assignment + + // Set the primary key explicitly + zvec_doc_set_pk(c_doc, pk.c_str()); + + (*results)[index] = c_doc; + ++index; + } + } + + return ZVEC_OK; +} + +ZVecErrorCode zvec_collection_query(const ZVecCollection *collection, + const ZVecVectorQuery *query, + ZVecDoc ***results, size_t *result_count) { + if (!collection || !query || !results || !result_count) { + set_last_error( + "Invalid arguments: collection, query, results and result_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + // Convert query parameters using helper function + zvec::VectorQuery internal_query; + convert_query_params(internal_query, query); + + auto result = (*coll_ptr)->Query(internal_query); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + const auto &query_results = result.value(); + error_code = + convert_document_results(query_results, results, result_count); + } else { + *results = nullptr; + *result_count = 0; + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + *results = nullptr; + *result_count = 0; + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_query_by_group( + const ZVecCollection *collection, const ZVecGroupByVectorQuery *query, + ZVecDoc ***results, ZVecString ***group_by_values, size_t *result_count) { + if (!collection || !query || !results || !group_by_values || !result_count) { + set_last_error( + "Invalid arguments: collection, query, results, group_by_values and " + "result_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + zvec::GroupByVectorQuery internal_query; + convert_groupby_query_params(internal_query, query); + + auto result = (*coll_ptr)->GroupByQuery(internal_query); + ZVecErrorCode error_code = handle_expected_result(result); + + if (error_code == ZVEC_OK) { + const auto &group_results = result.value(); + error_code = convert_grouped_document_results( + group_results, results, group_by_values, result_count); + } else { + *results = nullptr; + *group_by_values = nullptr; + *result_count = 0; + } + + return error_code; + } catch (const std::exception &e) { + set_last_error(std::string("Exception occurred: ") + e.what()); + *results = nullptr; + *group_by_values = nullptr; + *result_count = 0; + return ZVEC_ERROR_INTERNAL_ERROR; + } +} + +ZVecErrorCode zvec_collection_fetch(ZVecCollection *collection, + const char *const *pks, size_t pk_count, + ZVecDoc ***results, size_t *doc_count) { + if (!collection || !pks || !results || !doc_count) { + set_last_error( + "Invalid arguments: collection, pks, results and doc_count cannot " + "be null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + // Handle empty case + if (pk_count == 0) { + *results = nullptr; + *doc_count = 0; + return ZVEC_OK; + } + + try { + auto coll_ptr = + reinterpret_cast *>(collection); + + // Convert C array to C++ vector + std::vector pk_vector; + pk_vector.reserve(pk_count); + for (size_t i = 0; i < pk_count; ++i) { + if (pks[i]) { + pk_vector.emplace_back(pks[i]); + } else { + set_last_error("Null primary key at index " + std::to_string(i)); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + } + + // Call C++ fetch method + auto result = (*coll_ptr)->Fetch(pk_vector); + if (!result.has_value()) { + set_last_error("Failed to fetch documents: " + result.error().message()); + return ZVEC_ERROR_INTERNAL_ERROR; + } + + const auto &doc_map = result.value(); + return convert_fetched_document_results(doc_map, results, doc_count); + + } catch (const std::exception &e) { + set_last_error(std::string("Exception in zvec_collection_fetch: ") + + e.what()); + *results = nullptr; + *doc_count = 0; + return ZVEC_ERROR_INTERNAL_ERROR; + } +} diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h new file mode 100644 index 00000000..5695711a --- /dev/null +++ b/src/include/zvec/c_api.h @@ -0,0 +1,2516 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ZVEC_C_API_H +#define ZVEC_C_API_H + +#include +#include +#include + +// ============================================================================= +// API Export Control +// ============================================================================= + +#if defined(_WIN32) || defined(__CYGWIN__) +#ifdef ZVEC_BUILD_SHARED +#define ZVEC_EXPORT __declspec(dllexport) +#elif defined(ZVEC_USE_SHARED) +#define ZVEC_EXPORT __declspec(dllimport) +#else +#define ZVEC_EXPORT +#endif +#define ZVEC_CALL __cdecl +#else +#if __GNUC__ >= 4 +#define ZVEC_EXPORT __attribute__((visibility("default"))) +#else +#define ZVEC_EXPORT +#endif +#define ZVEC_CALL +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +// ============================================================================= +// Version Information +// ============================================================================= + +/** @brief Major version number */ +#define ZVEC_VERSION_MAJOR 0 + +/** @brief Minor version number */ +#define ZVEC_VERSION_MINOR 3 + +/** @brief Patch version number */ +#define ZVEC_VERSION_PATCH 0 + +/** @brief Full version string */ +#define ZVEC_VERSION_STRING "0.3.0" + +/** + * @brief Get library version information + * + * Return format: "{base_version}[-{git_info}] (built {build_time})" + * Example: "0.3.0-g3f8a2b1 (built 2025-05-13 10:30:45)" + * + * @return const char* Version string, managed internally by the library, caller + * should not free + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_get_version(void); + +/** + * @brief Check API version compatibility + * + * Check if the current library version meets the specified minimum version + * requirements Following semantic versioning specification: MAJOR.MINOR.PATCH + * + * @param major Required major version number + * @param minor Required minor version number + * @param patch Required patch version number + * @return bool Returns true if compatible, false otherwise + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_check_version(int major, int minor, int patch); + +/** + * @brief Get major version number + * + * @return int Major version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_major(void); + +/** + * @brief Get minor version number + * + * @return int Minor version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_minor(void); + + +/** + * @brief Get patch version number + * + * @return int Patch version number + */ +ZVEC_EXPORT int ZVEC_CALL zvec_get_version_patch(void); + + +// ============================================================================= +// Error Code Definitions +// ============================================================================= + +/** + * @brief ZVec C API error code enumeration + */ +typedef enum { + ZVEC_OK = 0, /**< Success */ + ZVEC_ERROR_NOT_FOUND = 1, /**< Resource not found */ + ZVEC_ERROR_ALREADY_EXISTS = 2, /**< Resource already exists */ + ZVEC_ERROR_INVALID_ARGUMENT = 3, /**< Invalid argument */ + ZVEC_ERROR_PERMISSION_DENIED = 4, /**< Permission denied */ + ZVEC_ERROR_FAILED_PRECONDITION = 5, /**< Failed precondition */ + ZVEC_ERROR_RESOURCE_EXHAUSTED = 6, /**< Resource exhausted */ + ZVEC_ERROR_UNAVAILABLE = 7, /**< Unavailable */ + ZVEC_ERROR_INTERNAL_ERROR = 8, /**< Internal error */ + ZVEC_ERROR_NOT_SUPPORTED = 9, /**< Unsupported operation */ + ZVEC_ERROR_UNKNOWN = 10 /**< Unknown error */ +} ZVecErrorCode; + +/** + * @brief Error details structure + */ +typedef struct { + ZVecErrorCode code; /**< Error code */ + const char *message; /**< Error message */ + const char *file; /**< File where error occurred */ + int line; /**< Line number where error occurred */ + const char *function; /**< Function where error occurred */ +} ZVecErrorDetails; + +/** + * @brief Get detailed information of the last error + * @param[out] error_details Pointer to error details structure + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_get_last_error_details(ZVecErrorDetails *error_details); + +/** + * @brief Get last error message + * @param[out] error_msg Returned error message string (needs to be freed by + * calling zvec_free) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_get_last_error(char **error_msg); + +/** + * @brief Clear error status + */ +ZVEC_EXPORT void ZVEC_CALL zvec_clear_error(void); + + +// ============================================================================= +// Basic Data Structures +// ============================================================================= + +/** + * @brief String view structure (does not own memory) + */ +typedef struct { + const char *data; /**< String data pointer */ + size_t length; /**< String length */ +} ZVecStringView; + +/** + * @brief Mutable string structure (owns memory) + */ +typedef struct { + char *data; /**< String data pointer */ + size_t length; /**< String length */ + size_t capacity; /**< Allocated capacity */ +} ZVecString; + +/** + * @brief String array structure + */ +typedef struct { + ZVecString *strings; /**< String array */ + size_t count; /**< String count */ +} ZVecStringArray; + +/** + * @brief Float array structure + */ +typedef struct { + const float *data; + size_t length; +} ZVecFloatArray; + +/** + * @brief Integer array structure + */ +typedef struct { + const int64_t *data; + size_t length; +} ZVecInt64Array; + +/** + * @brief Byte array structure + */ +typedef struct { + const uint8_t *data; /**< Byte data pointer */ + size_t length; /**< Array length */ +} ZVecByteArray; + +/** + * @brief Mutable byte array structure + */ +typedef struct { + uint8_t *data; /**< Byte data pointer */ + size_t length; /**< Current length */ + size_t capacity; /**< Allocated capacity */ +} ZVecMutableByteArray; + +// ============================================================================= +// String management functions +// ============================================================================= + +/** + * @brief Create string from C string + * @param str C string + * @return ZVecString* Pointer to the newly created string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_create(const char *str); + +/** + * @brief Create string from string view + * + * Creates a new ZVecString by copying data from a ZVecStringView. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param view Pointer to source string view (must not be NULL) + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL +zvec_string_create_from_view(const ZVecStringView *view); + +/** + * @brief Create binary-safe string from raw data + * + * Creates a new ZVecString from raw binary data that may contain null bytes. + * Unlike zvec_string_create(), this function takes explicit length parameter + * and doesn't rely on null-termination. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param data Raw binary data pointer (must not be NULL) + * @param length Length of data in bytes + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + * @note This function is suitable for binary data containing null bytes + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_bin_create(const uint8_t *data, + size_t length); + +/** + * @brief Copy string + * + * Creates a new ZVecString by copying an existing string. + * The created string owns its memory and must be freed with zvec_free_string(). + * + * @param str Pointer to source string (must not be NULL) + * @return ZVecString* New string instance on success, NULL on error + * @note Caller is responsible for freeing the returned string + */ +ZVEC_EXPORT ZVecString *ZVEC_CALL zvec_string_copy(const ZVecString *str); + +/** + * @brief Get C string from ZVecString + * @param str ZVecString pointer + * @return const char* C string + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_string_c_str(const ZVecString *str); + +/** + * @brief Get string length + * @param str ZVecString pointer + * @return size_t String length + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_string_length(const ZVecString *str); + +/** + * @brief Compare two strings + * @param str1 First string + * @param str2 Second string + * @return int Comparison result (-1, 0, or 1) + */ +ZVEC_EXPORT int ZVEC_CALL zvec_string_compare(const ZVecString *str1, + const ZVecString *str2); + + +// ============================================================================= +// Configuration and Options Structures +// ============================================================================= + +/** + * @brief Log level enumeration + */ +typedef enum { + ZVEC_LOG_LEVEL_DEBUG = 0, + ZVEC_LOG_LEVEL_INFO = 1, + ZVEC_LOG_LEVEL_WARN = 2, + ZVEC_LOG_LEVEL_ERROR = 3, + ZVEC_LOG_LEVEL_FATAL = 4 +} ZVecLogLevel; + +/** + * @brief Log type enumeration + */ +typedef enum { ZVEC_LOG_TYPE_CONSOLE = 0, ZVEC_LOG_TYPE_FILE = 1 } ZVecLogType; + +/** + * @brief Console log configuration structure + */ +typedef struct { + ZVecLogLevel level; /**< Log level */ +} ZVecConsoleLogConfig; + +/** + * @brief File log configuration structure + */ +typedef struct { + ZVecLogLevel level; /**< Log level */ + ZVecString dir; /**< Log directory */ + ZVecString basename; /**< Log file base name */ + uint32_t file_size; /**< Log file size (MB) */ + uint32_t overdue_days; /**< Log expiration days */ +} ZVecFileLogConfig; + +/** + * @brief Log configuration union + */ +typedef struct { + ZVecLogType type; /**< Log type */ + union { + ZVecConsoleLogConfig console_config; /**< Console log configuration */ + ZVecFileLogConfig file_config; /**< File log configuration */ + } config; +} ZVecLogConfig; + +/** + * @brief ZVec configuration data structure (corresponds to zvec::ConfigData) + */ +typedef struct { + uint64_t memory_limit_bytes; /**< Memory limit in bytes */ + + // log + ZVecLogConfig *log_config; /**< Log configuration (optional, NULL means using + default configuration) */ + + // query + uint32_t query_thread_count; /**< Query thread count */ + float invert_to_forward_scan_ratio; /**< Inverted to forward scan ratio */ + float brute_force_by_keys_ratio; /**< Brute force by keys ratio */ + + // optimize + uint32_t optimize_thread_count; /**< Optimize thread count */ +} ZVecConfigData; + +/** + * @brief Create console log configuration + * @param level Log level + * @return ZVecConsoleLogConfig* Pointer to the newly created console log + * configuration + */ +ZVEC_EXPORT ZVecConsoleLogConfig *ZVEC_CALL +zvec_config_console_log_create(ZVecLogLevel level); + +/** + * @brief Create file log configuration + * @param level Log level + * @param dir Log directory + * @param basename Log file base name + * @param file_size Log file size (MB) + * @param overdue_days Log expiration days + * @return ZVecFileLogConfig* Pointer to the newly created file log + * configuration + */ +ZVEC_EXPORT ZVecFileLogConfig *ZVEC_CALL zvec_config_file_log_create( + ZVecLogLevel level, const char *dir, const char *basename, + uint32_t file_size, uint32_t overdue_days); + +/** + * @brief Create log configuration + * @param type Log type + * @param config_data Configuration data (specific to log type) + * @return ZVecLogConfig* Pointer to the newly created log configuration + */ +ZVEC_EXPORT ZVecLogConfig *ZVEC_CALL zvec_config_log_create(ZVecLogType type, + void *config_data); + +/** + * @brief Destroy console log configuration + * @param config Console log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_config_console_log_destroy(ZVecConsoleLogConfig *config); + +/** + * @brief Destroy file log configuration + * @param config File log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_config_file_log_destroy(ZVecFileLogConfig *config); + +/** + * @brief Destroy log configuration + * @param config Log configuration pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_config_log_destroy(ZVecLogConfig *config); + +/** + * @brief Create configuration data + * @return ZVecConfigData* Pointer to the newly created configuration data + */ +ZVEC_EXPORT ZVecConfigData *ZVEC_CALL zvec_config_data_create(void); + +/** + * @brief Destroy configuration data + * @param config Configuration data pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_config_data_destroy(ZVecConfigData *config); + +/** + * @brief Set memory limit in configuration data + * @param config Configuration data pointer + * @param memory_limit_bytes Memory limit in bytes + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_memory_limit( + ZVecConfigData *config, uint64_t memory_limit_bytes); + +/** + * @brief Set log configuration in configuration data + * @param config Configuration data pointer + * @param log_config Log configuration pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_log_config( + ZVecConfigData *config, ZVecLogConfig *log_config); + +/** + * @brief Set query thread count in configuration data + * @param config Configuration data pointer + * @param thread_count Query thread count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_query_thread_count( + ZVecConfigData *config, uint32_t thread_count); + +/** + * @brief Set optimize thread count in configuration data + * @param config Configuration data pointer + * @param thread_count Optimize thread count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_config_data_set_optimize_thread_count( + ZVecConfigData *config, uint32_t thread_count); + +/** + * @brief Destroy log configuration + * @param config Log configuration structure pointer + */ +void zvec_config_log_destroy(ZVecLogConfig *config); + +// ============================================================================= +// Initialization and Cleanup Interface +// ============================================================================= + +/** + * @brief Initialize ZVec library + * @param config Configuration data (optional, NULL means using default + * configuration) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_initialize(const ZVecConfigData *config); + +/** + * @brief Clean up ZVec library resources + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_shutdown(void); + +/** + * @brief Check if library is initialized + * @param[out] initialized Whether initialized + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_is_initialized(bool *initialized); + +// ============================================================================= +// Data Type Enumerations +// ============================================================================= + +/** + * @brief Data type enumeration + */ +typedef enum { + ZVEC_DATA_TYPE_UNDEFINED = 0, + + ZVEC_DATA_TYPE_BINARY = 1, + ZVEC_DATA_TYPE_STRING = 2, + ZVEC_DATA_TYPE_BOOL = 3, + ZVEC_DATA_TYPE_INT32 = 4, + ZVEC_DATA_TYPE_INT64 = 5, + ZVEC_DATA_TYPE_UINT32 = 6, + ZVEC_DATA_TYPE_UINT64 = 7, + ZVEC_DATA_TYPE_FLOAT = 8, + ZVEC_DATA_TYPE_DOUBLE = 9, + + ZVEC_DATA_TYPE_VECTOR_BINARY32 = 20, + ZVEC_DATA_TYPE_VECTOR_BINARY64 = 21, + ZVEC_DATA_TYPE_VECTOR_FP16 = 22, + ZVEC_DATA_TYPE_VECTOR_FP32 = 23, + ZVEC_DATA_TYPE_VECTOR_FP64 = 24, + ZVEC_DATA_TYPE_VECTOR_INT4 = 25, + ZVEC_DATA_TYPE_VECTOR_INT8 = 26, + ZVEC_DATA_TYPE_VECTOR_INT16 = 27, + + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16 = 30, + ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 = 31, + + ZVEC_DATA_TYPE_ARRAY_BINARY = 40, + ZVEC_DATA_TYPE_ARRAY_STRING = 41, + ZVEC_DATA_TYPE_ARRAY_BOOL = 42, + ZVEC_DATA_TYPE_ARRAY_INT32 = 43, + ZVEC_DATA_TYPE_ARRAY_INT64 = 44, + ZVEC_DATA_TYPE_ARRAY_UINT32 = 45, + ZVEC_DATA_TYPE_ARRAY_UINT64 = 46, + ZVEC_DATA_TYPE_ARRAY_FLOAT = 47, + ZVEC_DATA_TYPE_ARRAY_DOUBLE = 48 +} ZVecDataType; + +/** + * @brief Index type enumeration + */ +typedef enum { + ZVEC_INDEX_TYPE_UNDEFINED = 0, + ZVEC_INDEX_TYPE_HNSW = 1, + ZVEC_INDEX_TYPE_IVF = 3, + ZVEC_INDEX_TYPE_FLAT = 4, + ZVEC_INDEX_TYPE_INVERT = 10 +} ZVecIndexType; + +/** + * @brief Distance metric type enumeration + */ +typedef enum { + ZVEC_METRIC_TYPE_UNDEFINED = 0, + ZVEC_METRIC_TYPE_L2 = 1, + ZVEC_METRIC_TYPE_IP = 2, + ZVEC_METRIC_TYPE_COSINE = 3, + ZVEC_METRIC_TYPE_MIPSL2 = 4 +} ZVecMetricType; + +/** + * @brief Quantization type enumeration + */ +typedef enum { + ZVEC_QUANTIZE_TYPE_UNDEFINED = 0, + ZVEC_QUANTIZE_TYPE_FP16 = 1, + ZVEC_QUANTIZE_TYPE_INT8 = 2, + ZVEC_QUANTIZE_TYPE_INT4 = 3 +} ZVecQuantizeType; + +// ============================================================================= +// Forward Declarations +// ============================================================================= + +typedef struct ZVecCollection ZVecCollection; + +// ============================================================================= +// Index Parameters Structures +// ============================================================================= + +/** + * @brief Base index parameters structure + */ +typedef struct { + ZVecIndexType index_type; /**< Index type */ +} ZVecBaseIndexParams; + +/** + * @brief Scalar index parameters structure + */ +typedef struct { + ZVecBaseIndexParams base; /**< Inherit base parameters */ + bool enable_range_optimization; /**< Whether to enable range optimization */ + bool enable_extended_wildcard; /**< Whether to enable extended wildcard */ +} ZVecInvertIndexParams; + +/** + * @brief Vector index base parameters structure + */ +typedef struct { + ZVecBaseIndexParams base; /**< Inherit base parameters */ + ZVecMetricType metric_type; /**< Distance metric type */ + ZVecQuantizeType quantize_type; /**< Quantization type */ +} ZVecVectorIndexParams; + +/** + * @brief HNSW index parameters structure + */ +typedef struct { + ZVecVectorIndexParams base; /**< Inherit vector index parameters */ + int m; /**< Graph connectivity parameter */ + int ef_construction; /**< Exploration factor during construction */ + int ef_search; /**< Exploration factor during search */ +} ZVecHnswIndexParams; + +/** + * @brief Flat index parameters structure + */ +typedef struct { + ZVecVectorIndexParams base; /**< Inherit vector index parameters */ + // Flat index has no additional parameters +} ZVecFlatIndexParams; + +/** + * @brief IVF index parameters structure + */ +typedef struct { + ZVecVectorIndexParams base; /**< Inherit vector index parameters */ + int n_list; /**< Number of cluster centers */ + int n_iters; /**< Number of iterations */ + bool use_soar; /**< Whether to use SOAR algorithm */ + int n_probe; /**< Number of clusters to probe during search */ +} ZVecIVFIndexParams; + +/** + * @brief Generic index parameters union + */ +typedef struct { + ZVecIndexType index_type; /**< Index type */ + union { + ZVecInvertIndexParams invert_params; /**< Scalar index parameters */ + ZVecHnswIndexParams hnsw_params; /**< HNSW index parameters */ + ZVecFlatIndexParams flat_params; /**< Flat index parameters */ + ZVecIVFIndexParams ivf_params; /**< IVF index parameters */ + } params; +} ZVecIndexParams; + +// ============================================================================= +// Field Schema Structures +// ============================================================================= + +/** + * @brief Field schema structure + */ +typedef struct { + ZVecString *name; /**< Field name */ + ZVecDataType data_type; /**< Data type */ + bool nullable; /**< Whether nullable */ + uint32_t dimension; /**< Vector dimension (only used for vector fields) */ + ZVecIndexParams *index_params; /**< Index parameters, NULL means no index */ +} ZVecFieldSchema; + + +// ============================================================================= +// Index Parameters Creation and Destruction Interface +// ============================================================================= + +/** + * @brief Initialize base index parameters + * @param params Base index parameters structure pointer + * @param index_type Index type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_base_init( + ZVecBaseIndexParams *params, ZVecIndexType index_type); + +/** + * @brief Initialize scalar index parameters + * @param params Scalar index parameters structure pointer + * @param enable_range_opt Whether to enable range optimization + * @param enable_wildcard Whether to enable wildcard expansion + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_invert_init( + ZVecInvertIndexParams *params, bool enable_range_opt, bool enable_wildcard); + +/** + * @brief Initialize vector index parameters + * @param params Vector index parameters structure pointer + * @param index_type Index type + * @param metric_type Metric type + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_vector_init( + ZVecVectorIndexParams *params, ZVecIndexType index_type, + ZVecMetricType metric_type, ZVecQuantizeType quantize_type); + +/** + * @brief Initialize HNSW index parameters + * @param params HNSW index parameters structure pointer + * @param metric_type Metric type + * @param m Connectivity parameter + * @param ef_construction Construction exploration factor + * @param ef_search Search exploration factor + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_hnsw_init( + ZVecHnswIndexParams *params, ZVecMetricType metric_type, int m, + int ef_construction, int ef_search, ZVecQuantizeType quantize_type); + +/** + * @brief Initialize Flat index parameters + * @param params Flat index parameters structure pointer + * @param metric_type Metric type + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_flat_init( + ZVecFlatIndexParams *params, ZVecMetricType metric_type, + ZVecQuantizeType quantize_type); + +/** + * @brief Initialize IVF index parameters + * @param params IVF index parameters structure pointer + * @param metric_type Metric type + * @param n_list Number of cluster centers + * @param n_iters Number of iterations + * @param use_soar Whether to use SOAR algorithm + * @param n_probe Search probe count + * @param quantize_type Quantization type + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_ivf_init( + ZVecIVFIndexParams *params, ZVecMetricType metric_type, int n_list, + int n_iters, bool use_soar, int n_probe, ZVecQuantizeType quantize_type); + +/** + * @brief Initialize generic index parameters + * @param params Generic index parameters structure pointer + * @param index_type Index type + * @param metric_type Metric type (only valid for vector indexes) + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_init_default( + ZVecIndexParams *params, ZVecIndexType index_type, + ZVecMetricType metric_type); + +/** + * @brief Destroy index parameters (free internal dynamically allocated memory) + * @param params Index parameters structure pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_index_params_destroy(ZVecIndexParams *params); + + +/** + * @brief Create inverted index parameters + * @param enable_range_opt Whether to enable range optimization + * @param enable_wildcard Whether to enable extended wildcard + * @return ZVecInvertIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecInvertIndexParams *ZVEC_CALL +zvec_index_params_invert_create(bool enable_range_opt, bool enable_wildcard); + +/** + * @brief Create vector index base parameters + * @param index_type Index type + * @param metric_type Metric type + * @param quantize_type Quantization type + * @return ZVecVectorIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecVectorIndexParams *ZVEC_CALL zvec_index_params_vector_create( + ZVecIndexType index_type, ZVecMetricType metric_type, + ZVecQuantizeType quantize_type); + +/** + * @brief Create HNSW index parameters + * @param metric_type Metric type + * @param quantize_type Quantization type + * @param m Graph degree parameter + * @param ef_construction Exploration factor during construction + * @param ef_search Exploration factor during search + + * @return ZVecHnswIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecHnswIndexParams *ZVEC_CALL zvec_index_params_hnsw_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int m, + int ef_construction, int ef_search); + +/** + * @brief Create Flat index parameters + * @param metric_type Metric type + * @param quantize_type Quantization type + * @return ZVecFlatIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecFlatIndexParams *ZVEC_CALL zvec_index_params_flat_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type); + +/** + * @brief Create IVF index parameters + * @param metric_type Metric type + * @param n_list Number of cluster centers + * @param n_iters Number of iterations + * @param use_soar Whether to use SOAR algorithm + * @param n_probe Number of clusters to probe during search + * @param quantize_type Quantization type + * @return ZVecIVFIndexParams* Pointer to the newly created index parameters + */ +ZVEC_EXPORT ZVecIVFIndexParams *ZVEC_CALL zvec_index_params_ivf_create( + ZVecMetricType metric_type, ZVecQuantizeType quantize_type, int n_list, + int n_iters, bool use_soar, int n_probe); + + +/** + * @brief Destroy inverted index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_invert_destroy(ZVecInvertIndexParams *params); + +/** + * @brief Destroy vector index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_vector_destroy(ZVecVectorIndexParams *params); + +/** + * @brief Destroy HNSW index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_hnsw_destroy(ZVecHnswIndexParams *params); + +/** + * @brief Destroy Flat index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_flat_destroy(ZVecFlatIndexParams *params); + +/** + * @brief Destroy IVF index parameters + * @param params Index parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_index_params_ivf_destroy(ZVecIVFIndexParams *params); + + +// ============================================================================= +// Query Parameters Structures +// ============================================================================= + +/** + * @brief Base query parameters structure (corresponds to zvec::QueryParams) + */ +typedef struct { + ZVecIndexType index_type; /**< Index type */ + float radius; /**< Search radius */ + bool is_linear; /**< Whether linear search */ + bool is_using_refiner; /**< Whether using refiner */ +} ZVecQueryParams; + +/** + * @brief HNSW query parameters structure (corresponds to zvec::HnswQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + int ef; /**< Exploration factor during search */ +} ZVecHnswQueryParams; + +/** + * @brief IVF query parameters structure (corresponds to zvec::IVFQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + int nprobe; /**< Number of clusters to probe during search */ + float scale_factor; /**< Scale factor */ +} ZVecIVFQueryParams; + +/** + * @brief Flat query parameters structure (corresponds to zvec::FlatQueryParams) + */ +typedef struct { + ZVecQueryParams base; /**< Inherit base query parameters */ + float scale_factor; /**< Scale factor */ +} ZVecFlatQueryParams; + +/** + * @brief Query parameters union (supports query parameters for different index + * types) + */ +typedef struct { + ZVecIndexType index_type; /**< Index type, used to distinguish the parameter + type stored in the union */ + union { + ZVecQueryParams base_params; /**< Base query parameters */ + ZVecHnswQueryParams hnsw_params; /**< HNSW query parameters */ + ZVecIVFQueryParams ivf_params; /**< IVF query parameters */ + ZVecFlatQueryParams flat_params; /**< Flat query parameters */ + } params; +} ZVecQueryParamsUnion; + +// ============================================================================= +// Query Structures (Updated Version, Including QueryParams) +// ============================================================================= + +/** + * @brief Vector query structure (aligned with zvec::VectorQuery, includes + * QueryParams) + */ +typedef struct { + int topk; /**< Number of results to return */ + ZVecString field_name; /**< Query field name */ + ZVecByteArray query_vector; /**< Query vector (binary data) */ + ZVecByteArray + query_sparse_indices; /**< Sparse vector indices (binary data) */ + ZVecByteArray query_sparse_values; /**< Sparse vector values (binary data) */ + ZVecString filter; /**< Filter expression */ + bool include_vector; /**< Whether to include vector data */ + bool include_doc_id; /**< Whether to include document ID */ + ZVecStringArray *output_fields; /**< Output field list (NULL means all) */ + ZVecQueryParamsUnion *query_params; /**< Query parameters (optional, NULL + means using default parameters) */ +} ZVecVectorQuery; + +/** + * @brief Grouped vector query structure (aligned with zvec::GroupByVectorQuery, + * includes QueryParams) + */ +typedef struct { + ZVecString field_name; /**< Query field name */ + ZVecByteArray query_vector; /**< Query vector (binary data) */ + ZVecByteArray + query_sparse_indices; /**< Sparse vector indices (binary data) */ + ZVecByteArray query_sparse_values; /**< Sparse vector values (binary data) */ + ZVecString filter; /**< Filter expression */ + bool include_vector; /**< Whether to include vector data */ + ZVecStringArray *output_fields; /**< Output field list */ + ZVecString group_by_field_name; /**< Group by field name */ + uint32_t group_count; /**< Number of groups */ + uint32_t group_topk; /**< Number of results to return per group */ + ZVecQueryParamsUnion *query_params; /**< Query parameters (optional, NULL + means using default parameters) */ +} ZVecGroupByVectorQuery; + + +// ============================================================================= +// Query Parameters Management Functions +// ============================================================================= + +/** + * @brief Create base query parameters + * @param index_type Index type + * @return ZVecQueryParams* Pointer to the newly created query parameters + */ +ZVEC_EXPORT ZVecQueryParams *ZVEC_CALL +zvec_query_params_create(ZVecIndexType index_type); + +/** + * @brief Create HNSW query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_HNSW) + * @param ef Exploration factor during search + * @param radius Search radius + * @param is_linear Whether linear search + * @param is_using_refiner Whether using refiner + * @return ZVecHnswQueryParams* Pointer to the newly created HNSW query + * parameters + */ +ZVEC_EXPORT ZVecHnswQueryParams *ZVEC_CALL +zvec_query_params_hnsw_create(ZVecIndexType index_type, int ef, float radius, + bool is_linear, bool is_using_refiner); + +/** + * @brief Create IVF query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_IVF) + * @param nprobe Number of clusters to probe during search + * @param is_using_refiner Whether using refiner + * @param scale_factor Scale factor + * @return ZVecIVFQueryParams* Pointer to the newly created IVF query parameters + */ +ZVEC_EXPORT ZVecIVFQueryParams *ZVEC_CALL +zvec_query_params_ivf_create(ZVecIndexType index_type, int nprobe, + bool is_using_refiner, float scale_factor); + +/** + * @brief Create Flat query parameters + * @param index_type Index type (should be ZVEC_INDEX_TYPE_FLAT) + * @param is_using_refiner Whether using refiner + * @param scale_factor Scale factor + * @return ZVecFlatQueryParams* Pointer to the newly created Flat query + * parameters + */ +ZVEC_EXPORT ZVecFlatQueryParams *ZVEC_CALL zvec_query_params_flat_create( + ZVecIndexType index_type, bool is_using_refiner, float scale_factor); + +/** + * @brief Create query parameters union + * @param index_type Index type + * @return ZVecQueryParamsUnion* Pointer to the newly created query parameters + * union + */ +ZVEC_EXPORT ZVecQueryParamsUnion *ZVEC_CALL +zvec_query_params_union_create(ZVecIndexType index_type); + + +/** + * @brief Destroy base query parameters + * @param params HNSW query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_query_params_destroy(ZVecQueryParams *params); + +/** + * @brief Destroy HNSW query parameters + * @param params HNSW query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_hnsw_destroy(ZVecHnswQueryParams *params); + +/** + * @brief Destroy IVF query parameters + * @param params IVF query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_ivf_destroy(ZVecIVFQueryParams *params); + +/** + * @brief Destroy Flat query parameters + * @param params Flat query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_flat_destroy(ZVecFlatQueryParams *params); + +/** + * @brief Destroy query parameters union + * @param params Query parameters union pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_union_destroy(ZVecQueryParamsUnion *params); + +/** + * @brief Set query parameters index type + * @param params Query parameters pointer + * @param index_type Index type + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_index_type( + ZVecQueryParams *params, ZVecIndexType index_type); + +/** + * @brief Set search radius for query parameters + * @param params Query parameters pointer + * @param radius Search radius + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_radius(ZVecQueryParams *params, float radius); + +/** + * @brief Set scale factor for query parameters + * @param params Query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_set_is_linear(ZVecQueryParams *params, bool is_linear); + +/** + * @brief Set whether to use refiner for query parameters + * @param params Query parameters pointer + * @param is_using_refiner Whether to use refiner + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_set_is_using_refiner( + ZVecQueryParams *params, bool is_using_refiner); + +/** + * @brief Set exploration factor for HNSW query parameters + * @param params HNSW query parameters pointer + * @param ef Exploration factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_hnsw_set_ef(ZVecHnswQueryParams *params, int ef); + +/** + * @brief Set number of probe clusters for IVF query parameters + * @param params IVF query parameters pointer + * @param nprobe Number of probe clusters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_query_params_ivf_set_nprobe(ZVecIVFQueryParams *params, int nprobe); + +/** + * @brief Set scale factor for IVF/Flat query parameters + * @param params IVF or Flat query parameters pointer + * @param scale_factor Scale factor + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_query_params_ivf_set_scale_factor( + ZVecIVFQueryParams *params, float scale_factor); + +/** + * @brief Collection options structure + */ +typedef struct { + bool enable_mmap; /**< Whether to enable memory mapping */ + size_t max_buffer_size; /**< Maximum buffer size */ + bool read_only; /**< Whether read-only mode */ + uint64_t max_doc_count_per_segment; /**< Maximum document count per segment */ +} ZVecCollectionOptions; + + +/** + * @brief Collection statistics structure + */ +typedef struct { + uint64_t doc_count; /**< Total document count */ + ZVecString **index_names; /**< Index name array */ + float *index_completeness; /**< Index completeness array */ + size_t index_count; /**< Index name count */ +} ZVecCollectionStats; + + +/** + * @brief Create field schema + * @param name Field name + * @param data_type Data type + * @param nullable Whether nullable + * @param dimension Vector dimension + * @return ZVecFieldSchema* Pointer to the newly created field schema + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL +zvec_field_schema_create(const char *name, ZVecDataType data_type, + bool nullable, uint32_t dimension); + +/** + * @brief Destroy field schema + * @param schema Field schema pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_field_schema_destroy(ZVecFieldSchema *schema); + +/** + * @brief Set index parameters for field + * @param schema Field schema pointer + * @param index_params Index parameters pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_field_schema_set_index_params( + ZVecFieldSchema *schema, const ZVecIndexParams *index_params); + + +/** + * @brief Set inverted index parameters for field schema + * @param field_schema Field schema pointer + * @param invert_params Inverted index parameters pointer + */ +void zvec_field_schema_set_invert_index( + ZVecFieldSchema *field_schema, const ZVecInvertIndexParams *invert_params); + +/** + * @brief Set HNSW index parameters for field schema + * @param field_schema Field schema pointer + * @param hnsw_params HNSW index parameters pointer + */ +void zvec_field_schema_set_hnsw_index(ZVecFieldSchema *field_schema, + const ZVecHnswIndexParams *hnsw_params); + +/** + * @brief Set Flat index parameters for field schema + * @param field_schema Field schema pointer + * @param flat_params Flat index parameters pointer + */ +void zvec_field_schema_set_flat_index(ZVecFieldSchema *field_schema, + const ZVecFlatIndexParams *flat_params); + +/** + * @brief Set IVF index parameters for field schema + * @param field_schema Field schema pointer + * @param ivf_params IVF index parameters pointer + */ +void zvec_field_schema_set_ivf_index(ZVecFieldSchema *field_schema, + const ZVecIVFIndexParams *ivf_params); + + +// ============================================================================= +// Collection Schema Structures +// ============================================================================= + +/** + * @brief Collection schema structure + */ +typedef struct { + ZVecString *name; /**< Collection name */ + ZVecFieldSchema **fields; /**< Field array */ + size_t field_count; /**< Field count */ + size_t field_capacity; /**< Field array capacity */ + uint64_t max_doc_count_per_segment; /**< Maximum document count per segment */ +} ZVecCollectionSchema; + +/** + * @brief Create collection schema + * @param name Collection name + * @return ZVecCollectionSchema* Pointer to the newly created collection schema + */ +ZVEC_EXPORT ZVecCollectionSchema *ZVEC_CALL +zvec_collection_schema_create(const char *name); + +/** + * @brief Destroy collection schema + * @param schema Collection schema pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_schema_destroy(ZVecCollectionSchema *schema); + +/** + * @brief Add field to collection schema + * @param schema Collection schema pointer + * @param field Field schema pointer (function takes ownership) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_field( + ZVecCollectionSchema *schema, ZVecFieldSchema *field); + +/** + * @brief Add multiple fields to collection schema at once + * + * @param schema Collection schema pointer + * @param fields Array of fields to add + * @param field_count Number of fields to add + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_add_fields( + ZVecCollectionSchema *schema, const ZVecFieldSchema *fields, + size_t field_count); + +/** + * @brief Remove field + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_remove_field( + ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Remove multiple fields from collection schema at once + * + * @param schema Collection schema pointer + * @param field_names Array of field names to remove + * @param field_count Number of fields to remove + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_remove_fields( + ZVecCollectionSchema *schema, const char *const *field_names, + size_t field_count); + +/** + * @brief Get field count + * + * @param schema Collection schema pointer + * @return size_t Field count + */ +ZVEC_EXPORT size_t ZVEC_CALL +zvec_collection_schema_get_field_count(const ZVecCollectionSchema *schema); + +/** + * @brief Find field + * @param schema Collection schema pointer + * @param field_name Field name + * @return ZVecFieldSchema* Field schema pointer, returns NULL if not found + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_find_field( + const ZVecCollectionSchema *schema, const char *field_name); + +/** + * @brief Validate collection schema + * @param schema Collection schema pointer + * @param[out] error_msg Error message (needs to be freed by calling + * zvec_free_string) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_schema_validate( + const ZVecCollectionSchema *schema, ZVecString **error_msg); + + +/** + * @brief Get field by index + * @param schema Collection schema pointer + * @param index Field index + * @return ZVecFieldSchema* Field schema pointer + */ +ZVEC_EXPORT ZVecFieldSchema *ZVEC_CALL zvec_collection_schema_get_field( + const ZVecCollectionSchema *schema, size_t index); + +/** + * @brief Set maximum document count per segment + * @param schema Collection schema pointer + * @param max_doc_count Maximum document count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_schema_set_max_doc_count_per_segment( + ZVecCollectionSchema *schema, uint64_t max_doc_count); + +/** + * @brief Get maximum document count per segment of collection schema + * + * @param schema Collection schema pointer + * @return uint64_t Maximum document count per segment + */ +ZVEC_EXPORT uint64_t ZVEC_CALL +zvec_collection_schema_get_max_doc_count_per_segment( + const ZVecCollectionSchema *schema); + + +// ============================================================================= +// Collection Management Functions +// ============================================================================= + +/** + * @brief Create and open collection + * @param path Collection path + * @param schema Collection schema pointer + * @param options Collection options pointer (NULL uses default options) + * @param[out] collection Returned collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_and_open( + const char *path, const ZVecCollectionSchema *schema, + const ZVecCollectionOptions *options, ZVecCollection **collection); + + +/** + * @brief Open existing collection + * @param path Collection path + * @param options Collection options pointer (NULL uses default options) + * @param[out] collection Returned collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_open(const char *path, const ZVecCollectionOptions *options, + ZVecCollection **collection); + + +/** + * @brief Close collection + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_close(ZVecCollection *collection); + + +/** + * @brief Destroy collection + * + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_destroy(ZVecCollection *collection); + +/** + * @brief Flush collection data to disk + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_flush(ZVecCollection *collection); + +/** + * @brief Get collection path + * @param collection Collection handle + * @param[out] path Returned path string (needs to be freed by calling + * zvec_free_string) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_get_path(const ZVecCollection *collection, ZVecString **path); + + +/** + * @brief Get collection name + * @param collection Collection handle + * @param[out] name Returned collection name (needs to be freed by calling + * zvec_free_string) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_get_name(const ZVecCollection *collection, ZVecString **name); + +/** + * @brief Get collection schema + * @param collection Collection handle + * @param[out] schema + * Returned collection schema pointer (needs to be freed by calling + * zvec_collection_schema_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_schema( + const ZVecCollection *collection, ZVecCollectionSchema **schema); + + +/** + * @brief Initialize default collection options + * @param options Collection options structure pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_options_init_default(ZVecCollectionOptions *options); + +/** + * @brief Get collection options + * @param collection Collection handle + * @param[out] options + * Returned collection options pointer (needs to be freed by calling + * zvec_collection_options_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_options( + const ZVecCollection *collection, ZVecCollectionOptions **options); + +/** + * @brief Get collection statistics + * @param collection Collection handle + * @param[out] stats + * Returned statistics pointer (needs to be freed by calling + * zvec_collection_stats_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_stats( + const ZVecCollection *collection, ZVecCollectionStats **stats); + +/** + * @brief Destroy collection statistics + * @param stats Statistics pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_collection_stats_destroy(ZVecCollectionStats *stats); + + +/** + * @brief Free field schema array memory + * + * @param array Field schema array pointer + * @param count Array element count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_field_schema_array(ZVecFieldSchema **array, + size_t count); + +/** + * @brief Check if collection has specified field + * @param collection Collection handle + * @param field_name Field name + * @param[out] exists Whether exists + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_has_field( + const ZVecCollection *collection, const char *field_name, bool *exists); + +/** + * @brief Get field information + * @param collection Collection handle + * @param field_name Field name + * @param[out] field_schema + * Returned field schema pointer (needs to be freed by calling + * zvec_field_schema_destroy) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_field_info( + const ZVecCollection *collection, const char *field_name, + ZVecFieldSchema **field_schema); + +/** + * @brief Free field schema memory + * + * @param field_schema Field schema pointer to be freed + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_free_field_schema(ZVecFieldSchema *field_schema); + + +// ============================================================================= +// Index Management Interface +// ============================================================================= + +/** + * @brief Create index + * + * @param collection Collection handle + * @param column_name Column name + * @param index_params Index parameters + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index( + ZVecCollection *collection, const char *column_name, + const ZVecIndexParams *index_params); + +/** + * @brief Create index for collection field (using specific type parameters) + * @param collection Collection handle + * @param field_name Field name + * @param index_params Index parameters (select appropriate structure based on + * index type) + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_index_with_params( + ZVecCollection *collection, const ZVecString *field_name, + const void + *index_params); // Determine specific type based on index_type field + +/** + * @brief Create HNSW index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param hnsw_params HNSW index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_hnsw_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecHnswIndexParams *hnsw_params); + +/** + * @brief Create Flat index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param flat_params Flat index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_flat_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecFlatIndexParams *flat_params); + +/** + * @brief Create IVF index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param ivf_params IVF index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_ivf_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecIVFIndexParams *ivf_params); + +/** + * @brief Create scalar index for collection field + * @param collection Collection handle + * @param field_name Field name + * @param invert_params Scalar index parameters + * @return Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_create_invert_index( + ZVecCollection *collection, const ZVecString *field_name, + const ZVecInvertIndexParams *invert_params); + +/** + * @brief Drop index + * @param collection Collection handle + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_drop_index(ZVecCollection *collection, const char *field_name); + +/** + * @brief Optimize collection (rebuild indexes, merge segments, etc.) + * @param collection Collection handle + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_optimize(ZVecCollection *collection); + +/** + * @brief Get index statistics + * @param collection Collection handle + * @param field_name Field name + * @param[out] completeness Index completeness (0.0-1.0) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_get_index_stats(const ZVecCollection *collection, + const char *field_name, float *completeness); + + +/** + * @brief Compact collection (reclaim space) + * @param collection Collection handle + * @return ZVecErrorCode Error code */ + +/** + * @brief Get detailed information of the last error + * @param[out] error_details Pointer to error details structure + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_get_last_error_details(ZVecErrorDetails *error_details); + +/** + * @brief Clear error status + */ +ZVEC_EXPORT void ZVEC_CALL zvec_clear_error(void); + + +// ============================================================================= +// Field Management Interface (DDL) +// ============================================================================= + +/** + * @brief Add field + * @param collection Collection handle + * @param field_schema Field schema pointer + * @param default_expression Default value expression (can be NULL) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_add_field( + ZVecCollection *collection, const ZVecFieldSchema *field_schema, + const char *default_expression); + +/** + * @brief Drop field + * @param collection Collection handle + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_collection_drop_field(ZVecCollection *collection, const char *field_name); + +/** + * @brief Alter field + * @param collection Collection handle + * @param old_name Original field name + * @param new_name New field name (can be NULL to indicate no renaming) + * @param new_schema New field schema (can be NULL to indicate no schema + * modification) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_alter_field( + ZVecCollection *collection, const char *old_name, const char *new_name, + const ZVecFieldSchema *new_schema); + + +/** + * @brief Document structure (opaque pointer mode) + * Internal implementation details are not visible to the outside, and + * operations are performed through API functions + */ +typedef struct ZVecDoc ZVecDoc; + +// ============================================================================= +// Data Manipulation Interface (DML) +// ============================================================================= + +/** + * @brief Insert documents into collection + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successfully inserted documents + * @param[out] error_count Number of failed insertions + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_insert( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Update documents in collection + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successfully updated documents + * @param[out] error_count Number of failed updates + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_update( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Insert or update documents in collection (upsert operation) + * @param collection Collection handle + * @param docs Document array + * @param doc_count Document count + * @param[out] success_count Number of successful operations + * @param[out] error_count Number of failed operations + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_upsert( + ZVecCollection *collection, const ZVecDoc **docs, size_t doc_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Delete documents from collection + * @param collection Collection handle + * @param pks Primary key array + * @param pk_count Primary key count + * @param[out] success_count Number of successfully deleted documents + * @param[out] error_count Number of failed deletions + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete( + ZVecCollection *collection, const char *const *pks, size_t pk_count, + size_t *success_count, size_t *error_count); + +/** + * @brief Delete documents by filter condition + * @param collection Collection handle + * @param filter Filter expression + * @param[out] deleted_count Number of deleted documents + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_delete_by_filter( + ZVecCollection *collection, const char *filter); + +// ============================================================================= +// Data Query Interface (DQL) +// ============================================================================= + +/** + * @brief Vector similarity search + * @param collection Collection handle + * @param query Query parameters pointer + * @param[out] results Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query( + const ZVecCollection *collection, const ZVecVectorQuery *query, + ZVecDoc ***results, size_t *result_count); + +/** + * @brief Grouped vector similarity search + * @param collection Collection handle + * @param query Grouped query parameters pointer + * @param[out] results Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] group_by_values Returned group by field values array (needs to be + * freed by calling zvec_free_string_array) + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query_by_group( + const ZVecCollection *collection, const ZVecGroupByVectorQuery *query, + ZVecDoc ***results, ZVecString ***group_by_values, size_t *result_count); + +/** + * @brief Get documents by primary keys + * @param collection Collection handle + * @param primary_keys Primary key array + * @param count Number of primary keys + * @param[out] documents Returned document array (needs to be freed by calling + * zvec_docs_free) + * @param[out] found_count Number of found documents + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_get_by_primary_keys( + ZVecCollection *collection, const char *const *primary_keys, size_t count, + ZVecDoc ***documents, size_t *found_count); + +/** + * @brief Query documents by filter condition + * @param collection Collection handle + * @param filter_expression Filter expression + * @param limit Result limit + * @param offset Offset + * @param[out] documents Returned document array + * @param[out] result_count Number of returned results + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_collection_query_by_filter( + const ZVecCollection *collection, const char *filter_expression, + size_t limit, size_t offset, ZVecDoc ***documents, size_t *result_count); + +// ============================================================================= +// Document Related Structures +// ============================================================================= + +/** + * @brief Document field value union + */ +typedef union { + bool bool_value; + int32_t int32_value; + int64_t int64_value; + uint32_t uint32_value; + uint64_t uint64_value; + float float_value; + double double_value; + ZVecString string_value; + ZVecFloatArray vector_value; + ZVecByteArray binary_value; /**< Binary data value */ +} ZVecFieldValue; + +/** + * @brief Document field structure + */ +typedef struct { + ZVecString name; ///< Field name + ZVecDataType data_type; ///< Data type + ZVecFieldValue value; ///< Field value +} ZVecDocField; + +/** + * @brief Document operator enumeration + */ +typedef enum { + ZVEC_DOC_OP_INSERT = 0, ///< Insert operation + ZVEC_DOC_OP_UPDATE = 1, ///< Update operation + ZVEC_DOC_OP_UPSERT = 2, ///< Insert or update operation + ZVEC_DOC_OP_DELETE = 3 ///< Delete operation +} ZVecDocOperator; + + +// ============================================================================= +// Data Manipulation Interface (DML) +// ============================================================================= + +/** + * @brief Create a new document object + * + * @return ZVecDoc* Pointer to the newly created document object, returns NULL + * on failure + */ +ZVEC_EXPORT ZVecDoc *ZVEC_CALL zvec_doc_create(void); + +/** + * @brief Destroy the document object and release all resources + * + * @param doc Pointer to the document object + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_destroy(ZVecDoc *doc); + +/** + * @brief Clear the document object + * + * @param doc Pointer to the document object + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_clear(ZVecDoc *doc); + +/** + * @brief Add field to document by value + * + * @param doc Document object pointer + * @param field_name Field name + * @param data_type Data type + * @param value Value pointer + * @param value_size Value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_add_field_by_value( + ZVecDoc *doc, const char *field_name, ZVecDataType data_type, + const void *value, size_t value_size); + +/** + * @brief Add field to document by structure + * + * @param doc Document object pointer + * @param field Field structure pointer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_add_field_by_struct(ZVecDoc *doc, const ZVecDocField *field); + +/** + * @brief Remove field from document + * + * @param doc Document structure pointer + * @param field_name Field name + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_remove_field(ZVecDoc *doc, const char *field_name); + + +/** + * @brief Batch release document array + * + * @param documents Document pointer array + * @param count Document count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_docs_free(ZVecDoc **documents, size_t count); + +/** + * @brief Set document primary key + * + * @param doc Pointer to the document structure + * @param pk Primary key string + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_pk(ZVecDoc *doc, const char *pk); + +/** + * @brief Set document ID + * + * @param doc Document structure pointer + * @param doc_id Document ID + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_doc_id(ZVecDoc *doc, uint64_t doc_id); + +/** + * @brief Set document score + * + * @param doc Document structure pointer + * @param score Score value + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_score(ZVecDoc *doc, float score); + +/** + * @brief Set document operator + * + * @param doc Document structure pointer + * @param op Operator + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_set_operator(ZVecDoc *doc, + ZVecDocOperator op); + +/** + * @brief Get document ID + * + * @param doc Document structure pointer + * @return uint64_t Document ID + */ +ZVEC_EXPORT uint64_t ZVEC_CALL zvec_doc_get_doc_id(const ZVecDoc *doc); + +/** + * @brief Get document score + * + * @param doc Document structure pointer + * @return float Score value + */ +ZVEC_EXPORT float ZVEC_CALL zvec_doc_get_score(const ZVecDoc *doc); + +/** + * @brief Get document operator + * + * @param doc Document structure pointer + * @return ZVecDocOperator Operator + */ +ZVEC_EXPORT ZVecDocOperator ZVEC_CALL zvec_doc_get_operator(const ZVecDoc *doc); + +/** + * @brief Get document field count + * + * @param doc Document structure pointer + * @return size_t Field count + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_doc_get_field_count(const ZVecDoc *doc); + + +/** + * @brief Get document primary key pointer (no copy) + * + * @param doc Document object pointer + * @return const char* Primary key string pointer, returns NULL if not set + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_doc_get_pk_pointer(const ZVecDoc *doc); + +/** + * @brief Get document primary key copy (needs manual release) + * + * @param doc Document object pointer + * @return const char* Primary key string copy, needs to call free() to release, + * returns NULL if not set + */ +ZVEC_EXPORT const char *ZVEC_CALL zvec_doc_get_pk_copy(const ZVecDoc *doc); + +/** + * @brief Get field value (basic type returned directly) + * + * Supports basic numeric data types: BOOL, INT32, INT64, UINT32, UINT64, + * FLOAT, DOUBLE. The value is copied directly into the provided buffer. + * For STRING, BINARY, and VECTOR types, use zvec_doc_get_field_value_copy + * or zvec_doc_get_field_value_pointer instead. + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type (must be a basic numeric type) + * @param value_buffer Output buffer to receive the value + * @param buffer_size Size of the output buffer + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_basic( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + void *value_buffer, size_t buffer_size); + +/** + * @brief Get field value copy (allocate new memory) + * + * Supports all data types including: + * - Basic types: BOOL, INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE + * - String types: STRING, BINARY + * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, + * VECTOR_INT8, VECTOR_INT16, VECTOR_BINARY32, VECTOR_BINARY64 + * - Sparse vector types: SPARSE_VECTOR_FP32, SPARSE_VECTOR_FP16 + * - Array types: ARRAY_STRING, ARRAY_BINARY, ARRAY_BOOL, ARRAY_INT32, + * ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, ARRAY_FLOAT, ARRAY_DOUBLE + * + * The returned value pointer must be manually freed using appropriate + * deallocation functions (free() for basic types and strings, + * zvec_free_uint8_array() for binary data). + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type + * @param[out] value Returned value pointer (needs manual release) + * @param[out] value_size Returned value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_copy( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + void **value, size_t *value_size); + +/** + * @brief Get field value pointer (data remains in document) + * + * Supports data types where direct pointer access is safe: + * - Basic types: BOOL, INT32, INT64, UINT32, UINT64, FLOAT, DOUBLE + * - String types: STRING (returns null-terminated C string), BINARY + * - Vector types: VECTOR_FP32, VECTOR_FP64, VECTOR_FP16, VECTOR_INT4, + * VECTOR_INT8, VECTOR_INT16, VECTOR_BINARY32, VECTOR_BINARY64 + * - Array types: ARRAY_INT32, ARRAY_INT64, ARRAY_UINT32, ARRAY_UINT64, + * ARRAY_FLOAT, ARRAY_DOUBLE + * + * The returned pointer points to data within the document object and + * does not require manual memory management. The pointer remains valid + * as long as the document exists. + * + * @param doc Document object pointer + * @param field_name Field name + * @param field_type Field type + * @param[out] value Returned value pointer (points to document-internal data) + * @param[out] value_size Returned value size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_value_pointer( + const ZVecDoc *doc, const char *field_name, ZVecDataType field_type, + const void **value, size_t *value_size); + +/** + * @brief Check if document is empty + * + * @param doc Document object pointer + * @return bool Returns true if document is empty, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_is_empty(const ZVecDoc *doc); + +/** + * @brief Check if document contains specified field + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field exists, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_has_field(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Check if document field has value + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field has value, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_has_field_value(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Check if document field is null + * + * @param doc Document object pointer + * @param field_name Field name + * @return bool Returns true if field is null, otherwise returns false + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_doc_is_field_null(const ZVecDoc *doc, + const char *field_name); + +/** + * @brief Get all field names of document + * + * @param doc Document object pointer + * @param[out] field_names + * Returned field name array (needs to call zvec_free_str_array to release) + * @param[out] count Returned field count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_get_field_names( + const ZVecDoc *doc, char ***field_names, size_t *count); + +/** + * @brief Release string array memory + * + * @param array String array pointer + * @param count Array element count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_str_array(char **array, size_t count); + +/** + * @brief Serialize document + * + * @param doc Document object pointer + * @param[out] data Returned serialized data (needs to call + * zvec_free_uint8_array to release) + * @param[out] size Returned data size + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_serialize(const ZVecDoc *doc, + uint8_t **data, + size_t *size); + +/** + * @brief Deserialize document + * + * @param data Serialized data + * @param size Data size + * @param[out] doc Returned document object pointer (needs to call + * zvec_doc_destroy to release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_doc_deserialize(const uint8_t *data, + size_t size, + ZVecDoc **doc); + +/** + * @brief Merge two documents + * + * @param doc Target document object pointer + * @param other Source document object pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_doc_merge(ZVecDoc *doc, const ZVecDoc *other); + +/** + * @brief Get document memory usage + * + * @param doc Document object pointer + * @return size_t Memory usage (bytes) + */ +ZVEC_EXPORT size_t ZVEC_CALL zvec_doc_memory_usage(const ZVecDoc *doc); + +/** + * @brief Validate document against Schema + * + * @param doc Document object pointer + * @param schema Schema object pointer + * @param is_update Whether it's an update operation + * @param[out] error_msg Error message (needs manual release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_validate(const ZVecDoc *doc, const ZVecCollectionSchema *schema, + bool is_update, char **error_msg); + +/** + * @brief Get detailed string representation of document + * + * @param doc Document object pointer + * @param[out] detail_str Returned detailed string (needs manual release) + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL +zvec_doc_to_detail_string(const ZVecDoc *doc, char **detail_str); + +/** + * @brief Free docs array memory + * @param docs Document array pointer + * @param count Document count + */ +ZVEC_EXPORT void ZVEC_CALL zvec_docs_free(ZVecDoc **docs, size_t count); + + +// ============================================================================= +// Query Parameter Constructor Functions +// ============================================================================= + +/** + * @brief Create vector query parameters + * @param field_name Query field name + * @param query_data Query vector data + * @param query_length Query vector length + * @param top_k Number of results to return + * @return ZVecVectorQuery* Pointer to the newly created query parameters + */ +ZVEC_EXPORT ZVecVectorQuery *ZVEC_CALL +zvec_vector_query_create(const char *field_name, const float *query_data, + size_t query_length, int top_k); + +/** + * @brief Destroy vector query parameters + * @param query Query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_vector_query_destroy(ZVecVectorQuery *query); + +/** + * @brief Set query filter condition + * @param query Query parameters pointer + * @param filter_expression Filter expression + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_filter( + ZVecVectorQuery *query, const char *filter_expression); + +/** + * @brief Set output fields + * @param query Query parameters pointer + * @param field_names Field name array + * @param count Field count + * @return ZVecErrorCode Error code + */ +ZVEC_EXPORT ZVecErrorCode ZVEC_CALL zvec_vector_query_set_output_fields( + ZVecVectorQuery *query, const char *const *field_names, size_t count); + +/** + * @brief Set timeout + * @param query Query parameters pointer + * @param timeout_ms Timeout in milliseconds + */ +ZVEC_EXPORT void ZVEC_CALL zvec_vector_query_set_timeout(ZVecVectorQuery *query, + int timeout_ms); + +/** + * @brief Create grouped vector query parameters + * @param field_name Query field name + * @param query_data Query vector data + * @param query_length Query vector length + * @param group_by_field Group by field name + * @param group_count Number of groups + * @param group_top_k Number of results to return per group + * @return ZVecGroupByVectorQuery* Pointer to the newly created query parameters + */ +ZVEC_EXPORT ZVecGroupByVectorQuery *ZVEC_CALL zvec_grouped_vector_query_create( + const char *field_name, const float *query_data, size_t query_length, + const char *group_by_field, uint32_t group_count, uint32_t group_top_k); + +/** + * @brief Destroy grouped vector query parameters + * @param query Query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_grouped_vector_query_destroy(ZVecGroupByVectorQuery *query); + + +// ============================================================================= +// Utility Functions +// ============================================================================= + +/** + * @brief Convert error code to description string + * @param error_code Error code + * @return const char* Error description string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_error_code_to_string(ZVecErrorCode error_code); + +/** + * @brief Convert data type to string + * @param data_type Data type + * @return const char* Data type string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_data_type_to_string(ZVecDataType data_type); + +/** + * @brief Convert index type to string + * @param index_type Index type + * @return const char* Index type string + */ +ZVEC_EXPORT const char *ZVEC_CALL +zvec_index_type_to_string(ZVecIndexType index_type); + +/** + * @brief Convert metric type to string + * @param metric_type Metric type + * @return const char* Metric type string + */ +const char *zvec_metric_type_to_string(ZVecMetricType metric_type); + +/** + * @brief Get system information + * @param[out] info_json System information JSON string (needs to be freed by + * calling zvec_free_string) + * @return ZVecErrorCode Error code + */ +ZVecErrorCode zvec_get_system_info(ZVecString **info_json); + +// ============================================================================= +// Memory Management Interface +// ============================================================================= + +/** + * @brief Allocate memory + * @param size Number of bytes to allocate + * @return void* Allocated memory pointer, returns NULL on failure + */ +ZVEC_EXPORT void *ZVEC_CALL zvec_malloc(size_t size); + +/** + * @brief Reallocate memory + * @param ptr Original memory pointer + * @param size New number of bytes + * @return void* Reallocation memory pointer, returns NULL on failure + */ +ZVEC_EXPORT void *ZVEC_CALL zvec_realloc(void *ptr, size_t size); + +/** + * @brief Free memory + * @param ptr Memory pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free(void *ptr); + +/** + * @brief Free string memory + * @param str String pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_string(ZVecString *str); + +/** + * @brief Free string array memory + * @param array String array pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_string_array(ZVecStringArray *array); + +/** + * @brief Free byte array memory + * @param array Byte array pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_byte_array(ZVecMutableByteArray *array); + +/** + * @brief Free string memory + * @param str String pointer to free + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_str(char *str); + +/** + * @brief Release uint8_t array memory + * + * @param array uint8_t array pointer + */ +ZVEC_EXPORT void ZVEC_CALL zvec_free_uint8_array(uint8_t *array); + + +// ============================================================================= +// Helper Functions +// ============================================================================= + +/** + * @brief Simplified HNSW index parameters initialization macro + * @param metric Distance metric type + * @param m_ Connectivity parameter + * @param ef_construction Exploration factor during construction + * @param ef_search Exploration factor during search + * @param quant Quantization type + * + * Usage example: + * ZVecHnswIndexParams params = ZVEC_HNSW_PARAMS(ZVEC_METRIC_TYPE_COSINE, 16, + * 200, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + */ +#define ZVEC_HNSW_PARAMS(metric, m_, ef_construction, ef_search, quant) \ + (ZVecHnswIndexParams) { \ + .base.base.index_type = ZVEC_INDEX_TYPE_HNSW, .base.metric_type = metric, \ + .base.quantize_type = quant, .m = m_, .ef_construction = ef_construction, \ + .ef_search = ef_search \ + } + +/** + * @brief Simplified inverted index parameters initialization macro + * @param range_opt Whether to enable range optimization + * @param wildcard Whether to enable wildcard expansion + * + * Usage example: + * ZVecInvertIndexParams params = ZVEC_INVERT_PARAMS(true, false); + */ +#define ZVEC_INVERT_PARAMS(range_opt, wildcard) \ + (ZVecInvertIndexParams) { \ + .base.index_type = ZVEC_INDEX_TYPE_INVERT, \ + .enable_range_optimization = range_opt, \ + .enable_extended_wildcard = wildcard \ + } + +/** + * @brief Simplified Flat index parameters initialization macro + * @param metric Distance metric type + * @param quant Quantization type + */ +#define ZVEC_FLAT_PARAMS(metric, quant) \ + (ZVecFlatIndexParams) { \ + .base.index_type = ZVEC_INDEX_TYPE_FLAT, .base.metric_type = metric, \ + .base.quantize_type = quant \ + } + +/** + * @brief Simplified IVF index parameters initialization macro + * @param metric Distance metric type + * @param nlist Number of cluster centers + * @param niters Number of iterations + * @param soar Whether to use SOAR algorithm + * @param nprobe Number of clusters to probe during search + * @param quant Quantization type + */ +#define ZVEC_IVF_PARAMS(metric, nlist, niters, soar, nprobe, quant) \ + (ZVecIVFIndexParams) { \ + .base.index_type = ZVEC_INDEX_TYPE_IVF, .base.metric_type = metric, \ + .base.quantize_type = quant, .n_list = nlist, .n_iters = niters, \ + .use_soar = soar, .n_probe = nprobe \ + } + +/** + * @brief Simplified string view initialization macro + * @param str String content + * + * Usage example: + * ZVecStringView name = ZVEC_STRING_VIEW("my_collection"); + */ +#define ZVEC_STRING_VIEW(str) \ + (ZVecStringView) { \ + .data = str, .length = strlen(str) \ + } + +// Has been replaced by the new ZVEC_STRING_VIEW macro + +/** + * @brief Simplified float array initialization macro + * @param data_ptr Float array pointer + * @param len Array length + * + * Usage example: + * float vectors[] = {0.1f, 0.2f, 0.3f}; + * ZVecFloatArray vec_array = ZVEC_FLOAT_ARRAY(vectors, 3); + */ +#define ZVEC_FLOAT_ARRAY(data_ptr, len) \ + (ZVecFloatArray) { \ + .data = data_ptr, .length = len \ + } + +/** + * @brief Simplified integer array initialization macro + * @param data_ptr Integer array pointer + * @param len Array length + */ +#define ZVEC_INT64_ARRAY(data_ptr, len) \ + (ZVecInt64Array) { \ + .data = data_ptr, .length = len \ + } + + +/** + * @brief Simplified inverted index parameters initialization macro + * @param range_opt Whether to enable range optimization + * @param wildcard Whether to enable wildcard expansion + * + * Usage example: + * ZVecInvertIndexParams params = ZVEC_INVERT_PARAMS(true, false); + */ +#define ZVEC_INVERT_PARAMS(range_opt, wildcard) \ + (ZVecInvertIndexParams) { \ + .base.index_type = ZVEC_INDEX_TYPE_INVERT, \ + .enable_range_optimization = range_opt, \ + .enable_extended_wildcard = wildcard \ + } + + +/** + * @brief Simplified collection options initialization macro (using default + * values) + * + * Usage example: + * ZVecCollectionOptions opts = ZVEC_DEFAULT_OPTIONS(); + */ +#define ZVEC_DEFAULT_OPTIONS() \ + (ZVecCollectionOptions){.enable_mmap = true, \ + .max_buffer_size = 1048576, \ + .read_only = false, \ + .max_doc_count_per_segment = 1000000} + +/** + * @brief Simplified vector query initialization macro + * @param field_name_str Query field name + * @param query_vec Query vector array + * @param top_k Number of results to return + * @param filter_str Filter condition string + * + * Usage example: + * ZVecVectorQuery query = ZVEC_VECTOR_QUERY("embedding", query_vectors, 10, + * ""); + */ +#define ZVEC_VECTOR_QUERY(field_name_str, query_vec, top_k, filter_str) \ + (ZVecVectorQuery){.field_name = ZVEC_STRING(field_name_str), \ + .query_vector = query_vec, \ + .topk = top_k, \ + .filter = ZVEC_STRING(filter_str), \ + .include_vector = 1, \ + .include_doc_id = 1} + +/** + * @brief Simplified document field initialization macro + * @param name_str Field name + * @param type Data type + * @param value_union Field value union + * + * Usage example: + * ZVecDocField field = ZVEC_DOC_FIELD("id", ZVEC_DATA_TYPE_STRING, + * {.string_value = ZVEC_STRING("doc1")}); + */ +#define ZVEC_DOC_FIELD(name_str, type, value_union) \ + (ZVecDocField) { \ + .name = ZVEC_STRING(name_str), .data_type = type, .value = value_union \ + } + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // ZVEC_C_API_H diff --git a/src/include/zvec/db/doc.h b/src/include/zvec/db/doc.h index 5f927fa1..e14886ca 100644 --- a/src/include/zvec/db/doc.h +++ b/src/include/zvec/db/doc.h @@ -68,6 +68,10 @@ class Doc { return pk_; } + const std::string &pk_ref() const { + return pk_; + } + void set_score(float score) { score_ = score; } @@ -103,6 +107,10 @@ class Doc { return op_; } + Operator get_operator() const { + return op_; + } + // Set field value template bool set(const std::string &field_name, T value) { @@ -232,6 +240,26 @@ class Doc { return std::nullopt; } + // Get field value as const reference, throws exception if field doesn't exist + // or type mismatches + template + const T &get_ref(const std::string &field_name) const { + auto it = fields_.find(field_name); + if (it == fields_.end()) { + throw std::runtime_error("Field '" + field_name + "' not found"); + } + + if (std::holds_alternative(it->second)) { + throw std::runtime_error("Field '" + field_name + "' is null"); + } + + try { + return std::get(it->second); + } catch (const std::bad_variant_access &) { + throw std::runtime_error("Field '" + field_name + "' type mismatch"); + } + } + void remove(const std::string &field_name) { fields_.erase(field_name); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 0d2b9532..0abd6e20 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,3 +4,4 @@ include(${CMAKE_SOURCE_DIR}/cmake/option.cmake) cc_directories(ailego) cc_directories(db) cc_directories(core) +cc_directories(c_api) \ No newline at end of file diff --git a/tests/c_api/CMakeLists.txt b/tests/c_api/CMakeLists.txt new file mode 100644 index 00000000..ad2f62e1 --- /dev/null +++ b/tests/c_api/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(${CMAKE_SOURCE_DIR}/cmake/bazel.cmake) + +file(GLOB_RECURSE ALL_TEST_SRCS *_test.c) + +foreach(CC_SRCS ${ALL_TEST_SRCS}) + get_filename_component(CC_TARGET ${CC_SRCS} NAME_WE) + cc_gtest( + NAME ${CC_TARGET} + STRICT + LIBS zvec_c_api + SRCS ${CC_SRCS} utils.c + INCS . .. ../../src + ) +endforeach() diff --git a/tests/c_api/c_api_test.c b/tests/c_api/c_api_test.c new file mode 100644 index 00000000..5abcb533 --- /dev/null +++ b/tests/c_api/c_api_test.c @@ -0,0 +1,2350 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "zvec/c_api.h" +#include +#include +#include +#include +#include +#include +#ifdef _POSIX_C_SOURCE +#include +#endif +#include +#include "utils.h" + +// ============================================================================= +// Test helper macro definitions +// ============================================================================= + +static int test_count = 0; +static int passed_count = 0; +static int current_test_passed = 1; // Track if current test function passes + +#define TEST_START() \ + do { \ + printf("Running test: %s\n", __func__); \ + test_count++; \ + current_test_passed = 1; \ + } while (0) + +#define TEST_ASSERT(condition) \ + do { \ + if (condition) { \ + printf(" ✓ PASS\n"); \ + } else { \ + printf(" ✗ FAIL at line %d\n", __LINE__); \ + current_test_passed = 0; \ + } \ + } while (0) + +#define TEST_END() \ + do { \ + if (current_test_passed) { \ + passed_count++; \ + } \ + } while (0) + +// ============================================================================= +// Helper functions tests +// ============================================================================= + +void test_version_functions(void) { + TEST_START(); + + // Test version retrieval functions + const char *version = zvec_get_version(); + TEST_ASSERT(version != NULL); + + // Test version component retrieval + int major = zvec_get_version_major(); + int minor = zvec_get_version_minor(); + int patch = zvec_get_version_patch(); + + TEST_ASSERT(major >= 0); + TEST_ASSERT(minor >= 0); + TEST_ASSERT(patch >= 0); + + TEST_ASSERT(zvec_check_version(major, minor, patch)); + + // Test version checking functions + bool compatible = zvec_check_version(0, 3, 0); + TEST_ASSERT(compatible == true); + + bool not_compatible = zvec_check_version(99, 99, 99); + TEST_ASSERT(not_compatible == false); + + TEST_END(); +} + +void test_error_handling_functions(void) { + TEST_START(); + + char *error_msg = NULL; + ZVecErrorCode err = zvec_get_last_error(&error_msg); + TEST_ASSERT(err == ZVEC_OK); + + if (error_msg) { + zvec_free_str(error_msg); + } + + // Test error clearing + zvec_clear_error(); + + // Test error details retrieval + ZVecErrorDetails error_details = {0}; + err = zvec_get_last_error_details(&error_details); + TEST_ASSERT(err == ZVEC_OK); + + TEST_END(); +} + +void test_zvec_config() { + TEST_START(); + + // Test 1: Console log config creation and destruction + ZVecConsoleLogConfig *console_config = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(console_config != NULL); + if (console_config) { + TEST_ASSERT(console_config->level == ZVEC_LOG_LEVEL_INFO); + zvec_config_console_log_destroy(console_config); + } + + // Test 2: File log config creation and destruction + ZVecFileLogConfig *file_config = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_WARN, "./logs", "test_log", 100, 7); + TEST_ASSERT(file_config != NULL); + if (file_config) { + TEST_ASSERT(file_config->level == ZVEC_LOG_LEVEL_WARN); + TEST_ASSERT(strcmp(file_config->dir.data, "./logs") == 0); + TEST_ASSERT(strcmp(file_config->basename.data, "test_log") == 0); + TEST_ASSERT(file_config->file_size == 100); + TEST_ASSERT(file_config->overdue_days == 7); + zvec_config_file_log_destroy(file_config); + } + + // Test 3: File log config edge cases + ZVecFileLogConfig *empty_file_config = + zvec_config_file_log_create(ZVEC_LOG_LEVEL_INFO, "", "", 0, 0); + TEST_ASSERT(empty_file_config != NULL); + if (empty_file_config) { + TEST_ASSERT(empty_file_config->level == ZVEC_LOG_LEVEL_INFO); + TEST_ASSERT(strcmp(empty_file_config->dir.data, "") == 0); + TEST_ASSERT(strcmp(empty_file_config->basename.data, "") == 0); + TEST_ASSERT(empty_file_config->file_size == 0); + TEST_ASSERT(empty_file_config->overdue_days == 0); + zvec_config_file_log_destroy(empty_file_config); + } + + // Test 4: Log config creation with console type + ZVecConsoleLogConfig *temp_console = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_ERROR); + ZVecLogConfig *log_config_console = + zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, temp_console); + TEST_ASSERT(log_config_console != NULL); + if (log_config_console) { + TEST_ASSERT(log_config_console->type == ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(log_config_console->config.console_config.level == + ZVEC_LOG_LEVEL_ERROR); + zvec_config_log_destroy(log_config_console); + } + if (temp_console) { + zvec_config_console_log_destroy(temp_console); + } + + // Test 5: Log config creation with file type + ZVecFileLogConfig *temp_file = zvec_config_file_log_create( + ZVEC_LOG_LEVEL_DEBUG, "./logs", "app", 50, 30); + ZVecLogConfig *log_config_file = + zvec_config_log_create(ZVEC_LOG_TYPE_FILE, temp_file); + TEST_ASSERT(log_config_file != NULL); + if (log_config_file) { + TEST_ASSERT(log_config_file->type == ZVEC_LOG_TYPE_FILE); + TEST_ASSERT(log_config_file->config.file_config.level == + ZVEC_LOG_LEVEL_DEBUG); + TEST_ASSERT( + strcmp(log_config_file->config.file_config.dir.data, "./logs") == 0); + TEST_ASSERT( + strcmp(log_config_file->config.file_config.basename.data, "app") == 0); + zvec_config_log_destroy(log_config_file); + } + if (temp_file) { + zvec_config_file_log_destroy(temp_file); + } + + // Test 6: Log config with NULL config data (should use defaults) + ZVecLogConfig *log_config_default = + zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, NULL); + TEST_ASSERT(log_config_default != NULL); + if (log_config_default) { + TEST_ASSERT(log_config_default->type == ZVEC_LOG_TYPE_CONSOLE); + TEST_ASSERT(log_config_default->config.console_config.level == + ZVEC_LOG_LEVEL_WARN); + zvec_config_log_destroy(log_config_default); + } + + // Test 7: Config data creation and basic operations + ZVecConfigData *config_data = zvec_config_data_create(); + TEST_ASSERT(config_data != NULL); + if (config_data) { + // Test initial values + TEST_ASSERT(config_data->log_config != NULL); + TEST_ASSERT(config_data->log_config->type == ZVEC_LOG_TYPE_CONSOLE); + + // Test memory limit setting + ZVecErrorCode err = + zvec_config_data_set_memory_limit(config_data, 1024 * 1024 * 1024); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->memory_limit_bytes == 1024 * 1024 * 1024); + + // Test thread count settings + err = zvec_config_data_set_query_thread_count(config_data, 8); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->query_thread_count == 8); + + err = zvec_config_data_set_optimize_thread_count(config_data, 4); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->optimize_thread_count == 4); + + // Test log config replacement + ZVecConsoleLogConfig *new_console = + zvec_config_console_log_create(ZVEC_LOG_LEVEL_DEBUG); + ZVecLogConfig *new_log_config = + zvec_config_log_create(ZVEC_LOG_TYPE_CONSOLE, new_console); + if (new_log_config) { + err = zvec_config_data_set_log_config(config_data, new_log_config); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(config_data->log_config == new_log_config); + } + + zvec_config_data_destroy(config_data); + if (new_console) zvec_config_console_log_destroy(new_console); + if (new_log_config) zvec_config_log_destroy(new_log_config); + } + + // Test 8: Edge cases and error conditions + // Test NULL pointer handling + ZVecErrorCode err = zvec_config_data_set_memory_limit(NULL, 1024); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_log_config(NULL, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_query_thread_count(NULL, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_config_data_set_optimize_thread_count(NULL, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test boundary values + ZVecConfigData *boundary_config = zvec_config_data_create(); + if (boundary_config) { + // Test zero values + err = zvec_config_data_set_memory_limit(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->memory_limit_bytes == 0); + + // Test maximum values + err = zvec_config_data_set_memory_limit(boundary_config, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->memory_limit_bytes == UINT64_MAX); + + // Test zero thread counts + err = zvec_config_data_set_query_thread_count(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->query_thread_count == 0); + + err = zvec_config_data_set_optimize_thread_count(boundary_config, 0); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(boundary_config->optimize_thread_count == 0); + + zvec_config_data_destroy(boundary_config); + } + + // Test 9: Memory leak prevention - double destroy safety + ZVecConfigData *double_destroy_test = zvec_config_data_create(); + if (double_destroy_test) { + zvec_config_data_destroy(double_destroy_test); + } + + TEST_END(); +} + +void test_zvec_initialize() { + TEST_START(); + + ZVecConfigData *config = zvec_config_data_create(); + TEST_ASSERT(config != NULL); + if (config) { + TEST_ASSERT(config->log_config != NULL); + TEST_ASSERT(config->log_config->type == ZVEC_LOG_TYPE_CONSOLE); + } + ZVecErrorCode err = zvec_initialize(config); + TEST_ASSERT(err == ZVEC_OK); + bool is_initialized = false; + zvec_is_initialized(&is_initialized); + TEST_ASSERT(is_initialized); + + TEST_END(); +} + +// ============================================================================= +// Schema-related tests +// ============================================================================= + +void test_schema_basic_operations(void) { + TEST_START(); + + // Test 1: Basic Schema creation and destruction + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + TEST_ASSERT(schema != NULL); + TEST_ASSERT(schema->name != NULL); + TEST_ASSERT(strcmp(schema->name->data, "demo") == 0); + TEST_ASSERT(schema->field_count == 0); + TEST_ASSERT(schema->fields == NULL); + TEST_ASSERT(schema->max_doc_count_per_segment > 0); + + // Test 2: Schema field count operations + size_t initial_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(initial_count == 0); + + // Test 3: Adding fields to schema + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecErrorCode err = zvec_collection_schema_add_field(schema, id_field); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_add = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_add == 1); + + // Test 4: Finding fields in schema + const ZVecFieldSchema *found_field = + zvec_collection_schema_find_field(schema, "id"); + TEST_ASSERT(found_field != NULL); + TEST_ASSERT(strcmp(found_field->name->data, "id") == 0); + TEST_ASSERT(found_field->data_type == ZVEC_DATA_TYPE_INT64); + + // Test 5: Getting field by index + ZVecFieldSchema *indexed_field = zvec_collection_schema_get_field(schema, 0); + TEST_ASSERT(indexed_field != NULL); + TEST_ASSERT(strcmp(indexed_field->name->data, "id") == 0); + + // Test 6: Adding multiple fields + ZVecFieldSchema fields_to_add[2]; + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + ZVecFieldSchema *age_field = + zvec_field_schema_create("age", ZVEC_DATA_TYPE_INT32, true, 0); + + fields_to_add[0] = *name_field; + fields_to_add[1] = *age_field; + + err = zvec_collection_schema_add_fields(schema, fields_to_add, 2); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_multi_add = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_multi_add == 3); + + // Test 7: Finding newly added fields + const ZVecFieldSchema *name_found = + zvec_collection_schema_find_field(schema, "name"); + TEST_ASSERT(name_found != NULL); + TEST_ASSERT(strcmp(name_found->name->data, "name") == 0); + + const ZVecFieldSchema *age_found = + zvec_collection_schema_find_field(schema, "age"); + TEST_ASSERT(age_found != NULL); + TEST_ASSERT(strcmp(age_found->name->data, "age") == 0); + + // Test 8: Setting and getting max doc count + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 10000); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 10000); + + // Test 9: Schema validation + ZVecString *validation_error = NULL; + err = zvec_collection_schema_validate(schema, &validation_error); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(validation_error == NULL); + + // Test 10: Removing single field + err = zvec_collection_schema_remove_field(schema, "age"); + TEST_ASSERT(err == ZVEC_OK); + + size_t count_after_remove = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(count_after_remove == 2); + + const ZVecFieldSchema *removed_field = + zvec_collection_schema_find_field(schema, "age"); + TEST_ASSERT(removed_field == NULL); + + // Test 11: Removing multiple fields + const char *fields_to_remove[] = {"name", "id"}; + err = zvec_collection_schema_remove_fields(schema, fields_to_remove, 2); + TEST_ASSERT(err == ZVEC_OK); + + size_t final_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(final_count == 0); + + // Test 12: Schema cleanup + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +void test_schema_edge_cases(void) { + TEST_START(); + + // Test 1: NULL parameter handling for schema creation + ZVecCollectionSchema *null_schema = zvec_collection_schema_create(NULL); + TEST_ASSERT(null_schema == NULL); + + // Test 2: Empty string schema name + ZVecCollectionSchema *empty_schema = zvec_collection_schema_create(""); + TEST_ASSERT(empty_schema != NULL); + TEST_ASSERT(empty_schema->name != NULL); + TEST_ASSERT(strcmp(empty_schema->name->data, "") == 0); + zvec_collection_schema_destroy(empty_schema); + + // Test 3: Very long schema name + char long_name[1024]; + memset(long_name, 'a', 1023); + long_name[1023] = '\0'; + ZVecCollectionSchema *long_schema = zvec_collection_schema_create(long_name); + TEST_ASSERT(long_schema != NULL); + TEST_ASSERT(long_schema->name != NULL); + TEST_ASSERT(strlen(long_schema->name->data) == 1023); + zvec_collection_schema_destroy(long_schema); + + // Test 4: NULL schema parameter handling for all functions + ZVecErrorCode err; + size_t count = zvec_collection_schema_get_field_count(NULL); + TEST_ASSERT(count == 0); + + const ZVecFieldSchema *null_field = + zvec_collection_schema_find_field(NULL, "test"); + TEST_ASSERT(null_field == NULL); + + ZVecFieldSchema *null_indexed_field = + zvec_collection_schema_get_field(NULL, 0); + TEST_ASSERT(null_indexed_field == NULL); + + uint64_t null_max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(NULL); + TEST_ASSERT(null_max_doc_count == 0); + + err = zvec_collection_schema_set_max_doc_count_per_segment(NULL, 1000); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + ZVecString *null_validation_error = NULL; + err = zvec_collection_schema_validate(NULL, &null_validation_error); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + TEST_ASSERT(null_validation_error == NULL); + + err = zvec_collection_schema_add_field(NULL, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_collection_schema_add_fields(NULL, NULL, 0); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + err = zvec_collection_schema_remove_field(NULL, "test"); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + const char *null_field_names[] = {NULL}; + err = zvec_collection_schema_remove_fields(NULL, null_field_names, 1); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 5: Working with valid schema for edge cases + ZVecCollectionSchema *schema = zvec_collection_schema_create("edge_test"); + TEST_ASSERT(schema != NULL); + + // Test 6: Adding NULL field to schema + err = zvec_collection_schema_add_field(schema, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 7: Adding fields with NULL array + err = zvec_collection_schema_add_fields(schema, NULL, 5); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 8: Adding zero fields + err = zvec_collection_schema_add_fields(schema, NULL, 0); + TEST_ASSERT(err == ZVEC_OK); + + // Test 9: Finding field with NULL name + const ZVecFieldSchema *null_name_field = + zvec_collection_schema_find_field(schema, NULL); + TEST_ASSERT(null_name_field == NULL); + + // Test 10: Finding non-existent field + const ZVecFieldSchema *nonexistent_field = + zvec_collection_schema_find_field(schema, "nonexistent"); + TEST_ASSERT(nonexistent_field == NULL); + + // Test 11: Getting field with invalid index + ZVecFieldSchema *invalid_index_field = + zvec_collection_schema_get_field(schema, 1000); + TEST_ASSERT(invalid_index_field == NULL); + + // Test 12: Getting field from empty schema with index 0 + ZVecFieldSchema *zero_index_field = + zvec_collection_schema_get_field(schema, 0); + TEST_ASSERT(zero_index_field == NULL); + + // Test 13: Removing field with NULL name + err = zvec_collection_schema_remove_field(schema, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 14: Removing non-existent field + err = zvec_collection_schema_remove_field(schema, "nonexistent"); + TEST_ASSERT(err == ZVEC_ERROR_NOT_FOUND); + + // Test 15: Removing fields with NULL array + err = zvec_collection_schema_remove_fields(schema, NULL, 5); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 16: Removing zero fields + err = zvec_collection_schema_remove_fields(schema, NULL, 0); + TEST_ASSERT(err == ZVEC_OK); + + // Test 17: Setting extremely large max doc count + err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + uint64_t retrieved_max_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(retrieved_max_count == UINT64_MAX); + + // Test 18: Setting zero max doc count + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 0); + TEST_ASSERT(err == ZVEC_OK); + uint64_t zero_max_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(zero_max_count == 0); + + // Test 19: Schema validation with empty schema + ZVecString *empty_validation_error = NULL; + err = zvec_collection_schema_validate(schema, &empty_validation_error); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // Test 20: Add duplicate field names + ZVecFieldSchema *first_id = + zvec_field_schema_create("duplicate_id", ZVEC_DATA_TYPE_INT64, false, 0); + ZVecFieldSchema *second_id = + zvec_field_schema_create("duplicate_id", ZVEC_DATA_TYPE_STRING, false, 0); + + err = zvec_collection_schema_add_field(schema, first_id); + TEST_ASSERT(err == ZVEC_OK); + + err = zvec_collection_schema_add_field(schema, second_id); + TEST_ASSERT(err == ZVEC_ERROR_ALREADY_EXISTS); + zvec_field_schema_destroy(second_id); + + // Verify fields + size_t field_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(field_count == 1); + + // Test 21: Cleanup + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +void test_schema_field_operations(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Test field count + size_t initial_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(initial_count == 5); + + // Test finding non-existent field + const ZVecFieldSchema *nonexistent = + zvec_collection_schema_find_field(schema, "nonexistent"); + TEST_ASSERT(nonexistent == NULL); + + // Test finding existing field + const ZVecFieldSchema *id_field = + zvec_collection_schema_find_field(schema, "id"); + TEST_ASSERT(id_field != NULL); + if (id_field) { + TEST_ASSERT(strcmp(id_field->name->data, "id") == 0); + TEST_ASSERT(id_field->data_type == ZVEC_DATA_TYPE_INT64); + } + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_normal_schema_creation(void) { + TEST_START(); + + ZVecCollectionSchema *schema = + zvec_test_create_normal_schema(false, "test_normal", NULL, NULL, 1000); + TEST_ASSERT(schema != NULL); + + if (schema) { + TEST_ASSERT(strcmp(schema->name->data, "test_normal") == 0); + + // Verify field count + size_t field_count = zvec_collection_schema_get_field_count(schema); + TEST_ASSERT(field_count > 0); + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_schema_with_indexes(void) { + TEST_START(); + + // Test Schema with scalar index + ZVecCollectionSchema *scalar_index_schema = + zvec_test_create_schema_with_scalar_index(true, true, + "scalar_index_test"); + TEST_ASSERT(scalar_index_schema != NULL); + if (scalar_index_schema) { + zvec_collection_schema_destroy(scalar_index_schema); + } + + // Test Schema with vector index + ZVecCollectionSchema *vector_index_schema = + zvec_test_create_schema_with_vector_index(false, "vector_index_test", + NULL); + TEST_ASSERT(vector_index_schema != NULL); + if (vector_index_schema) { + zvec_collection_schema_destroy(vector_index_schema); + } + + TEST_END(); +} + +void test_schema_max_doc_count(void) { + TEST_START(); + + // Test 1: Setting max doc count to a valid value + ZVecCollectionSchema *schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + ZVecErrorCode err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, 1000); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t max_doc_count = + zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 1000); + + zvec_collection_schema_destroy(schema); + + // Test 2: Setting max doc count to zero + schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + err = zvec_collection_schema_set_max_doc_count_per_segment(schema, 0); + TEST_ASSERT(err == ZVEC_OK); + + max_doc_count = zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == 0); + + zvec_collection_schema_destroy(schema); + + // Test 3: Setting max doc count to maximum value + schema = zvec_collection_schema_create("max_doc_test"); + TEST_ASSERT(schema != NULL); + + err = + zvec_collection_schema_set_max_doc_count_per_segment(schema, UINT64_MAX); + TEST_ASSERT(err == ZVEC_OK); + + max_doc_count = zvec_collection_schema_get_max_doc_count_per_segment(schema); + TEST_ASSERT(max_doc_count == UINT64_MAX); + + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +// ============================================================================= +// Collection-related tests +// ============================================================================= + +void test_collection_basic_operations(void) { + TEST_START(); + + // Create temporary directory + char temp_dir[] = "/tmp/zvec_test_collection_basic_operations"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test collection operations + ZVecDoc *doc1 = zvec_test_create_doc(1, schema, NULL); + ZVecDoc *doc2 = zvec_test_create_doc(2, schema, NULL); + ZVecDoc *doc3 = zvec_test_create_doc(3, schema, NULL); + + TEST_ASSERT(doc1 != NULL); + TEST_ASSERT(doc2 != NULL); + TEST_ASSERT(doc3 != NULL); + + if (doc1 && doc2 && doc3) { + ZVecDoc *docs[] = {doc1, doc2, doc3}; + size_t success_count, error_count; + + // Test insert operation + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 3, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 3); + TEST_ASSERT(error_count == 0); + + // Test update operation + zvec_doc_set_score(doc1, 0.95f); + ZVecDoc *update_docs[] = {doc1}; + err = zvec_collection_update(collection, (const ZVecDoc **)update_docs, + 1, &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + // Test upsert operation + zvec_doc_set_pk(doc3, "pk_3_modified"); + ZVecDoc *upsert_docs[] = {doc3}; + err = zvec_collection_upsert(collection, (const ZVecDoc **)upsert_docs, + 1, &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 1); + TEST_ASSERT(error_count == 0); + + // Test delete operation by primary keys + const char *pks[] = {"pk_1", "pk_2"}; + err = zvec_collection_delete(collection, pks, 2, &success_count, + &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 2); + TEST_ASSERT(error_count == 0); + + // Test delete by filter + err = zvec_collection_delete_by_filter(collection, "id > 0"); + TEST_ASSERT(err == ZVEC_OK); + + // Clean up documents + zvec_doc_destroy(doc1); + zvec_doc_destroy(doc2); + zvec_doc_destroy(doc3); + } + + // Test collection flush + err = zvec_collection_flush(collection); + TEST_ASSERT(err == ZVEC_OK); + + // Test collection optimization + err = zvec_collection_optimize(collection); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_edge_cases(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_edge_cases"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + + // Test empty name collection + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + if (collection) { + zvec_collection_destroy(collection); + collection = NULL; + } + + // Test long name collection + char long_name[256]; + memset(long_name, 'a', 255); + long_name[255] = '\0'; + + char long_path[512]; + snprintf(long_path, sizeof(long_path), "%s/%s", temp_dir, + "very_long_collection_name_that_tests_path_limits"); + + err = zvec_collection_create_and_open(long_path, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + if (collection) { + zvec_collection_destroy(collection); + collection = NULL; + } + + // Test NULL name集合 + err = zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err != ZVEC_OK); + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_delete_by_filter(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_delete_by_filter"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + // Test normal deletion filtering + err = zvec_collection_delete_by_filter(collection, "id > 1"); + TEST_ASSERT(err == ZVEC_OK); + + // Test NULL filter + err = zvec_collection_delete_by_filter(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test empty string filter + err = zvec_collection_delete_by_filter(collection, ""); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_stats(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_stats"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + ZVecCollectionStats *stats = NULL; + err = zvec_collection_get_stats(collection, &stats); + TEST_ASSERT(err == ZVEC_OK); + + if (stats) { + // Basic validation of statistics + TEST_ASSERT(stats->doc_count == + 0); // New collection should have no documents + zvec_collection_stats_destroy(stats); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Field-related tests +// ============================================================================= + +void test_field_schema_functions(void) { + TEST_START(); + + // Test scalar field creation + ZVecFieldSchema scalar_field = {0}; + ZVecString name1 = {0}; + name1.data = "test_field"; + name1.length = 10; + scalar_field.name = &name1; + scalar_field.data_type = ZVEC_DATA_TYPE_STRING; + scalar_field.nullable = true; + scalar_field.dimension = 0; + + TEST_ASSERT(strcmp(scalar_field.name->data, "test_field") == 0); + TEST_ASSERT(scalar_field.data_type == ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(scalar_field.nullable == true); + + // Test vector field creation + ZVecFieldSchema vector_field = {0}; + ZVecString name2 = {0}; + name2.data = "vec_field"; + name2.length = 9; + vector_field.name = &name2; + vector_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + vector_field.nullable = false; + vector_field.dimension = 128; + + TEST_ASSERT(strcmp(vector_field.name->data, "vec_field") == 0); + TEST_ASSERT(vector_field.data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(vector_field.dimension == 128); + + // Test sparse vector field creation + ZVecFieldSchema sparse_field = {0}; + ZVecString name3 = {0}; + name3.data = "sparse_field"; + name3.length = 12; + sparse_field.name = &name3; + sparse_field.data_type = ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32; + sparse_field.nullable = false; + sparse_field.dimension = 0; + + TEST_ASSERT(strcmp(sparse_field.name->data, "sparse_field") == 0); + TEST_ASSERT(sparse_field.data_type == ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32); + + TEST_END(); +} + +void test_field_helper_functions(void) { + TEST_START(); + + // Test scalar field helper functions + ZVecInvertIndexParams *invert_params = + zvec_test_create_default_invert_params(true); + ZVecFieldSchema *scalar_field = zvec_test_create_scalar_field( + "test_scalar", ZVEC_DATA_TYPE_INT32, true, invert_params); + TEST_ASSERT(scalar_field != NULL); + if (scalar_field) { + TEST_ASSERT(strcmp(scalar_field->name->data, "test_scalar") == 0); + TEST_ASSERT(scalar_field->data_type == ZVEC_DATA_TYPE_INT32); + free(scalar_field); + } + if (invert_params) { + free(invert_params); + } + + // Test vector field helper functions + ZVecHnswIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + ZVecFieldSchema *vector_field = zvec_test_create_vector_field( + "test_vector", ZVEC_DATA_TYPE_VECTOR_FP32, 128, false, hnsw_params); + TEST_ASSERT(vector_field != NULL); + if (vector_field) { + TEST_ASSERT(strcmp(vector_field->name->data, "test_vector") == 0); + TEST_ASSERT(vector_field->data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(vector_field->dimension == 128); + free(vector_field); + } + if (hnsw_params) { + free(hnsw_params); + } + + TEST_END(); +} + +// ============================================================================= +// Document-related tests +// ============================================================================= + +void test_doc_creation(void) { + TEST_START(); + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Test complete document creation + ZVecDoc *doc = zvec_test_create_doc(1, schema, NULL); + TEST_ASSERT(doc != NULL); + if (doc) { + zvec_doc_destroy(doc); + } + + // Test null value document creation + ZVecDoc *null_doc = zvec_test_create_doc_null(2, schema, NULL); + TEST_ASSERT(null_doc != NULL); + if (null_doc) { + zvec_doc_destroy(null_doc); + } + + zvec_collection_schema_destroy(schema); + } + + TEST_END(); +} + +void test_doc_primary_key(void) { + TEST_START(); + + // Test primary key generation + char *pk = zvec_test_make_pk(12345); + TEST_ASSERT(pk != NULL); + if (pk) { + TEST_ASSERT(strcmp(pk, "pk_12345") == 0); + free(pk); + } + + TEST_END(); +} + +void test_doc_functions(void) { + TEST_START(); + + // Create test document using utility function + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + ZVecDoc *doc = zvec_doc_create(); + TEST_ASSERT(doc != NULL); + + // Test primary key operations + zvec_doc_set_pk(doc, "test_doc_complete"); + const char *pk = zvec_doc_get_pk_pointer(doc); + TEST_ASSERT(pk != NULL); + TEST_ASSERT(strcmp(pk, "test_doc_complete") == 0); + + // Test document ID and score operations + zvec_doc_set_doc_id(doc, 99999); + uint64_t doc_id = zvec_doc_get_doc_id(doc); + TEST_ASSERT(doc_id == 99999); + + zvec_doc_set_score(doc, 0.95f); + float score = zvec_doc_get_score(doc); + TEST_ASSERT(score == 0.95f); + + // Test operator operations + zvec_doc_set_operator(doc, ZVEC_DOC_OP_INSERT); + ZVecDocOperator op = zvec_doc_get_operator(doc); + TEST_ASSERT(op == ZVEC_DOC_OP_INSERT); + + ZVecErrorCode err; + + // ==================== COMPREHENSIVE DATA TYPE TESTING ==================== + + printf( + "=== Testing zvec_doc_get_field_value_basic with all supported types " + "===\n"); + + // Test all basic numeric types that zvec_doc_get_field_value_basic supports + // BOOL type + ZVecDocField bool_field; + bool_field.name.data = "bool_field"; + bool_field.name.length = strlen("bool_field"); + bool_field.data_type = ZVEC_DATA_TYPE_BOOL; + bool_field.value.bool_value = true; + err = zvec_doc_add_field_by_struct(doc, &bool_field); + TEST_ASSERT(err == ZVEC_OK); + + bool bool_result; + err = zvec_doc_get_field_value_basic(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_result, sizeof(bool_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_result == true); + + // INT32 type + ZVecDocField int32_field; + int32_field.name.data = "int32_field"; + int32_field.name.length = strlen("int32_field"); + int32_field.data_type = ZVEC_DATA_TYPE_INT32; + int32_field.value.int32_value = -2147483648; // Min int32 + err = zvec_doc_add_field_by_struct(doc, &int32_field); + TEST_ASSERT(err == ZVEC_OK); + + int32_t int32_result; + err = zvec_doc_get_field_value_basic(doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_result == -2147483648); + + // INT64 type + ZVecDocField int64_field; + int64_field.name.data = "int64_field"; + int64_field.name.length = strlen("int64_field"); + int64_field.data_type = ZVEC_DATA_TYPE_INT64; + int64_field.value.int64_value = 9223372036854775807LL; // Max int64 + err = zvec_doc_add_field_by_struct(doc, &int64_field); + TEST_ASSERT(err == ZVEC_OK); + + int64_t int64_result; + err = zvec_doc_get_field_value_basic(doc, "int64_field", ZVEC_DATA_TYPE_INT64, + &int64_result, sizeof(int64_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int64_result == 9223372036854775807LL); + + // UINT32 type + ZVecDocField uint32_field; + uint32_field.name.data = "uint32_field"; + uint32_field.name.length = strlen("uint32_field"); + uint32_field.data_type = ZVEC_DATA_TYPE_UINT32; + uint32_field.value.uint32_value = 4294967295U; // Max uint32 + err = zvec_doc_add_field_by_struct(doc, &uint32_field); + TEST_ASSERT(err == ZVEC_OK); + + uint32_t uint32_result; + err = + zvec_doc_get_field_value_basic(doc, "uint32_field", ZVEC_DATA_TYPE_UINT32, + &uint32_result, sizeof(uint32_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint32_result == 4294967295U); + + // UINT64 type + ZVecDocField uint64_field; + uint64_field.name.data = "uint64_field"; + uint64_field.name.length = strlen("uint64_field"); + uint64_field.data_type = ZVEC_DATA_TYPE_UINT64; + uint64_field.value.uint64_value = 18446744073709551615ULL; // Max uint64 + err = zvec_doc_add_field_by_struct(doc, &uint64_field); + TEST_ASSERT(err == ZVEC_OK); + + uint64_t uint64_result; + err = + zvec_doc_get_field_value_basic(doc, "uint64_field", ZVEC_DATA_TYPE_UINT64, + &uint64_result, sizeof(uint64_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(uint64_result == 18446744073709551615ULL); + + // FLOAT type + ZVecDocField float_field; + float_field.name.data = "float_field"; + float_field.name.length = strlen("float_field"); + float_field.data_type = ZVEC_DATA_TYPE_FLOAT; + float_field.value.float_value = 3.14159265359f; + err = zvec_doc_add_field_by_struct(doc, &float_field); + TEST_ASSERT(err == ZVEC_OK); + + float float_result; + err = zvec_doc_get_field_value_basic(doc, "float_field", ZVEC_DATA_TYPE_FLOAT, + &float_result, sizeof(float_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fabsf(float_result - 3.14159265359f) < 1e-6f); + + // DOUBLE type + ZVecDocField double_field; + double_field.name.data = "double_field"; + double_field.name.length = strlen("double_field"); + double_field.data_type = ZVEC_DATA_TYPE_DOUBLE; + double_field.value.double_value = 2.71828182845904523536; + err = zvec_doc_add_field_by_struct(doc, &double_field); + TEST_ASSERT(err == ZVEC_OK); + + double double_result; + err = + zvec_doc_get_field_value_basic(doc, "double_field", ZVEC_DATA_TYPE_DOUBLE, + &double_result, sizeof(double_result)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fabs(double_result - 2.71828182845904523536) < 1e-15); + + printf( + "=== Testing zvec_doc_get_field_value_copy with all supported types " + "===\n"); + + // Test STRING type with zvec_doc_get_field_value_copy + ZVecDocField string_field; + string_field.name.data = "string_field"; + string_field.name.length = strlen("string_field"); + string_field.data_type = ZVEC_DATA_TYPE_STRING; + string_field.value.string_value = *zvec_string_create("Hello, 世界!"); + err = zvec_doc_add_field_by_struct(doc, &string_field); + TEST_ASSERT(err == ZVEC_OK); + + void *string_result; + size_t string_size; + err = zvec_doc_get_field_value_copy( + doc, "string_field", ZVEC_DATA_TYPE_STRING, &string_result, &string_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(string_result != NULL); + TEST_ASSERT(string_size == strlen("Hello, 世界!")); + TEST_ASSERT(memcmp(string_result, "Hello, 世界!", string_size) == 0); + free(string_result); + + // Test BINARY type with zvec_doc_get_field_value_copy + ZVecDocField binary_field; + binary_field.name.data = "binary_field"; + binary_field.name.length = strlen("binary_field"); + binary_field.data_type = ZVEC_DATA_TYPE_BINARY; + uint8_t binary_data[] = {0x00, 0x01, 0x02, 0xFF, 0xFE, 0xFD}; + binary_field.value.string_value = + *zvec_bin_create(binary_data, sizeof(binary_data)); + err = zvec_doc_add_field_by_struct(doc, &binary_field); + TEST_ASSERT(err == ZVEC_OK); + + void *binary_result; + size_t binary_size; + err = zvec_doc_get_field_value_copy( + doc, "binary_field", ZVEC_DATA_TYPE_BINARY, &binary_result, &binary_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(binary_result != NULL); + TEST_ASSERT(binary_size == 6); + TEST_ASSERT(memcmp(binary_result, "\x00\x01\x02\xFF\xFE\xFD", binary_size) == + 0); + free(binary_result); + + // Test VECTOR_FP32 type with zvec_doc_get_field_value_copy + float test_vector[] = {1.1f, 2.2f, 3.3f, 4.4f, 5.5f}; + ZVecDocField fp32_vec_field; + fp32_vec_field.name.data = "fp32_vec_field"; + fp32_vec_field.name.length = strlen("fp32_vec_field"); + fp32_vec_field.data_type = ZVEC_DATA_TYPE_VECTOR_FP32; + fp32_vec_field.value.vector_value.data = test_vector; + fp32_vec_field.value.vector_value.length = 5; + err = zvec_doc_add_field_by_struct(doc, &fp32_vec_field); + TEST_ASSERT(err == ZVEC_OK); + + void *fp32_vec_result; + size_t fp32_vec_size; + err = zvec_doc_get_field_value_copy(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, + &fp32_vec_result, &fp32_vec_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp32_vec_result != NULL); + TEST_ASSERT(fp32_vec_size == 5 * sizeof(float)); + TEST_ASSERT(memcmp(fp32_vec_result, test_vector, fp32_vec_size) == 0); + free(fp32_vec_result); + + + printf( + "=== Testing zvec_doc_get_field_value_pointer with all supported types " + "===\n"); + + // Test pointer access to basic types + const void *bool_ptr; + size_t bool_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "bool_field", ZVEC_DATA_TYPE_BOOL, + &bool_ptr, &bool_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(bool_ptr != NULL); + TEST_ASSERT(bool_ptr_size == sizeof(bool)); + TEST_ASSERT(*(const bool *)bool_ptr == true); + + const void *int32_ptr; + size_t int32_ptr_size; + err = zvec_doc_get_field_value_pointer( + doc, "int32_field", ZVEC_DATA_TYPE_INT32, &int32_ptr, &int32_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(int32_ptr != NULL); + TEST_ASSERT(int32_ptr_size == sizeof(int32_t)); + TEST_ASSERT(*(const int32_t *)int32_ptr == -2147483648); + + // Test pointer access to STRING (should return null-terminated C string) + const void *string_ptr; + size_t string_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "string_field", + ZVEC_DATA_TYPE_STRING, &string_ptr, + &string_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(string_ptr != NULL); + TEST_ASSERT(string_ptr_size == strlen("Hello, 世界!")); + TEST_ASSERT(memcmp(string_ptr, "Hello, 世界!", string_ptr_size) == 0); + + // Test pointer access to BINARY + const void *binary_ptr; + size_t binary_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "binary_field", + ZVEC_DATA_TYPE_BINARY, &binary_ptr, + &binary_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(binary_ptr != NULL); + TEST_ASSERT(binary_ptr_size == 6); + TEST_ASSERT(memcmp(binary_ptr, "\x00\x01\x02\xFF\xFE\xFD", binary_ptr_size) == + 0); + + // Test pointer access to VECTOR_FP32 + const void *fp32_vec_ptr; + size_t fp32_vec_ptr_size; + err = zvec_doc_get_field_value_pointer(doc, "fp32_vec_field", + ZVEC_DATA_TYPE_VECTOR_FP32, + &fp32_vec_ptr, &fp32_vec_ptr_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(fp32_vec_ptr != NULL); + TEST_ASSERT(fp32_vec_ptr_size == 5 * sizeof(float)); + TEST_ASSERT(memcmp(fp32_vec_ptr, test_vector, fp32_vec_ptr_size) == 0); + + // Declare dummy variables for error testing + const void *dummy_ptr; + size_t dummy_ptr_size; + + // ==================== FIELD OPERATIONS TESTING ==================== + + // Test field operations + size_t field_count = zvec_doc_get_field_count(doc); + TEST_ASSERT(field_count >= 10); // All the fields we've added + + // Test field existence checks + TEST_ASSERT(zvec_doc_has_field(doc, "bool_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "int32_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "string_field") == true); + TEST_ASSERT(zvec_doc_has_field(doc, "nonexistent") == false); + + TEST_ASSERT(zvec_doc_has_field_value(doc, "bool_field") == true); + TEST_ASSERT(zvec_doc_is_field_null(doc, "bool_field") == false); + TEST_ASSERT(zvec_doc_is_field_null(doc, "nonexistent") == false); + + // Test field names retrieval + char **field_names; + size_t name_count; + err = zvec_doc_get_field_names(doc, &field_names, &name_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(name_count >= 10); + TEST_ASSERT(field_names != NULL); + + // Verify some expected fields are present + bool found_key_fields = false; + for (size_t i = 0; i < name_count; i++) { + if (strcmp(field_names[i], "bool_field") == 0 || + strcmp(field_names[i], "int32_field") == 0 || + strcmp(field_names[i], "string_field") == 0) { + found_key_fields = true; + break; + } + } + TEST_ASSERT(found_key_fields == true); + + zvec_free_str_array(field_names, name_count); + + // ==================== ERROR CONDITION TESTING ==================== + + printf("=== Testing error conditions ===\n"); + + // Test non-existent field + err = + zvec_doc_get_field_value_basic(doc, "missing_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err != ZVEC_OK); + + err = + zvec_doc_get_field_value_copy(doc, "missing_field", ZVEC_DATA_TYPE_STRING, + &string_result, &string_size); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_pointer( + doc, "missing_field", ZVEC_DATA_TYPE_FLOAT, &dummy_ptr, &dummy_ptr_size); + TEST_ASSERT(err != ZVEC_OK); + + // Test wrong data type access + err = + zvec_doc_get_field_value_basic(doc, "string_field", ZVEC_DATA_TYPE_INT32, + &int32_result, sizeof(int32_result)); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_copy(doc, "int32_field", ZVEC_DATA_TYPE_STRING, + &string_result, &string_size); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_doc_get_field_value_pointer( + doc, "bool_field", ZVEC_DATA_TYPE_FLOAT, &dummy_ptr, &dummy_ptr_size); + TEST_ASSERT(err != ZVEC_OK); + + // ==================== DOCUMENT SERIALIZATION TESTING ==================== + + printf("=== Testing document serialization ===\n"); + + uint8_t *serialized_data; + size_t data_size; + err = zvec_doc_serialize(doc, &serialized_data, &data_size); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(serialized_data != NULL); + TEST_ASSERT(data_size > 0); + + ZVecDoc *deserialized_doc; + err = zvec_doc_deserialize(serialized_data, data_size, &deserialized_doc); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(deserialized_doc != NULL); + + // Verify deserialized document has same field count + size_t deserialized_field_count = zvec_doc_get_field_count(deserialized_doc); + TEST_ASSERT(deserialized_field_count == field_count); + + // Test a field from deserialized document + int32_t deserialized_int32; + err = zvec_doc_get_field_value_basic( + deserialized_doc, "int32_field", ZVEC_DATA_TYPE_INT32, + &deserialized_int32, sizeof(deserialized_int32)); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(deserialized_int32 == -2147483648); + + // ==================== CLEANUP ==================== + + zvec_doc_destroy(deserialized_doc); + zvec_free_uint8_array(serialized_data); + zvec_free_str(string_field.value.string_value.data); + zvec_free_str(binary_field.value.string_value.data); + zvec_doc_destroy(doc); + zvec_collection_schema_destroy(schema); + + TEST_END(); +} + +// ============================================================================= +// Index parameter tests +// ============================================================================= + +void test_index_params(void) { + TEST_START(); + + // Test HNSW parameter creation + ZVecHnswIndexParams *hnsw_params = zvec_test_create_default_hnsw_params(); + TEST_ASSERT(hnsw_params != NULL); + if (hnsw_params) { + free(hnsw_params); + } + + // Test Flat parameter creation + ZVecFlatIndexParams *flat_params = zvec_test_create_default_flat_params(); + TEST_ASSERT(flat_params != NULL); + if (flat_params) { + free(flat_params); + } + + // Test scalar index parameter creation + ZVecInvertIndexParams *invert_params = + zvec_test_create_default_invert_params(true); + TEST_ASSERT(invert_params != NULL); + if (invert_params) { + free(invert_params); + } + + TEST_END(); +} + +// ============================================================================= +// Memory management tests +// ============================================================================= +void test_zvec_string_functions(void) { + TEST_START(); + + // Test string creation and basic operations + ZVecString *str1 = zvec_string_create("Hello World"); + TEST_ASSERT(str1 != NULL); + TEST_ASSERT(zvec_string_length(str1) == 11); + TEST_ASSERT(strcmp(zvec_string_c_str(str1), "Hello World") == 0); + + // Test string copy + ZVecString *str2 = zvec_string_copy(str1); + TEST_ASSERT(str2 != NULL); + TEST_ASSERT(zvec_string_length(str2) == 11); + TEST_ASSERT(strcmp(zvec_string_c_str(str2), "Hello World") == 0); + + // Test string comparison + int cmp_result = zvec_string_compare(str1, str2); + TEST_ASSERT(cmp_result == 0); + + ZVecString *str3 = zvec_string_create("Hello"); + TEST_ASSERT(zvec_string_compare(str1, str3) > 0); + + // Test string creation from view + ZVecStringView view = {"Hello View", 10}; + ZVecString *str4 = zvec_string_create_from_view(&view); + TEST_ASSERT(str4 != NULL); + TEST_ASSERT(zvec_string_length(str4) == 10); + TEST_ASSERT(strcmp(zvec_string_c_str(str4), "Hello View") == 0); + + // Test string view with embedded null bytes + char binary_data[] = {'H', 'e', 'l', 'l', 'o', '\0', 'W', 'o', 'r', 'l', 'd'}; + ZVecStringView binary_view = {binary_data, 11}; + ZVecString *str5 = zvec_string_create_from_view(&binary_view); + TEST_ASSERT(str5 != NULL); + TEST_ASSERT(zvec_string_length(str5) == 11); + // Note: strcmp will stop at first null byte, so we need to compare manually + TEST_ASSERT(memcmp(zvec_string_c_str(str5), binary_data, 11) == 0); + + // Cleanup + zvec_free_string(str1); + zvec_free_string(str2); + zvec_free_string(str3); + zvec_free_string(str4); + zvec_free_string(str5); + + TEST_END(); +} + +void test_index_params_functions(void) { + TEST_START(); + + // Test base index params + ZVecBaseIndexParams base_params; + zvec_index_params_base_init(&base_params, ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(base_params.index_type == ZVEC_INDEX_TYPE_HNSW); + + // Test invert index params + ZVecInvertIndexParams invert_params; + zvec_index_params_invert_init(&invert_params, true, false); + TEST_ASSERT(invert_params.base.index_type == ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(invert_params.enable_range_optimization == true); + TEST_ASSERT(invert_params.enable_extended_wildcard == false); + + // Test vector index params + ZVecVectorIndexParams vector_params; + zvec_index_params_vector_init(&vector_params, ZVEC_INDEX_TYPE_HNSW, + ZVEC_METRIC_TYPE_L2, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(vector_params.base.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(vector_params.metric_type == ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(vector_params.quantize_type == ZVEC_QUANTIZE_TYPE_UNDEFINED); + + // Test HNSW index params + ZVecHnswIndexParams hnsw_params; + zvec_index_params_hnsw_init(&hnsw_params, ZVEC_METRIC_TYPE_COSINE, 16, 200, + 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(hnsw_params.base.base.index_type == ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw_params.base.metric_type == ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(hnsw_params.m == 16); + TEST_ASSERT(hnsw_params.ef_construction == 200); + TEST_ASSERT(hnsw_params.ef_search == 50); + + // Test Flat index params + ZVecFlatIndexParams flat_params; + zvec_index_params_flat_init(&flat_params, ZVEC_METRIC_TYPE_IP, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(flat_params.base.base.index_type == ZVEC_INDEX_TYPE_FLAT); + TEST_ASSERT(flat_params.base.metric_type == ZVEC_METRIC_TYPE_IP); + + // Test IVF index params + ZVecIVFIndexParams ivf_params; + zvec_index_params_ivf_init(&ivf_params, ZVEC_METRIC_TYPE_L2, 100, 10, true, 5, + ZVEC_QUANTIZE_TYPE_UNDEFINED); + TEST_ASSERT(ivf_params.base.base.index_type == ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(ivf_params.n_list == 100); + TEST_ASSERT(ivf_params.n_iters == 10); + TEST_ASSERT(ivf_params.use_soar == true); + TEST_ASSERT(ivf_params.n_probe == 5); + + TEST_END(); +} + +void test_utility_functions(void) { + TEST_START(); + + // Test error code to string conversion + const char *error_str = zvec_error_code_to_string(ZVEC_OK); + TEST_ASSERT(error_str != NULL); + TEST_ASSERT(strlen(error_str) > 0); + + error_str = zvec_error_code_to_string(ZVEC_ERROR_INVALID_ARGUMENT); + TEST_ASSERT(error_str != NULL); + + // Test data type to string conversion + const char *data_type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_INT32); + TEST_ASSERT(data_type_str != NULL); + TEST_ASSERT(strlen(data_type_str) > 0); + + data_type_str = zvec_data_type_to_string(ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(data_type_str != NULL); + + // Test index type to string conversion + const char *index_type_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(index_type_str != NULL); + TEST_ASSERT(strlen(index_type_str) > 0); + + index_type_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_INVERT); + TEST_ASSERT(index_type_str != NULL); + + TEST_END(); +} + +void test_memory_management_functions(void) { + TEST_START(); + + // Test basic memory allocation + void *ptr = zvec_malloc(1024); + TEST_ASSERT(ptr != NULL); + + // Test memory reallocation + void *new_ptr = zvec_realloc(ptr, 2048); + TEST_ASSERT(new_ptr != NULL); + + // Test memory deallocation + zvec_free(new_ptr); + + // Test string allocation and deallocation + ZVecString *str = zvec_string_create("Test String"); + TEST_ASSERT(str != NULL); + zvec_free_string(str); + + TEST_END(); +} + +void test_query_params_functions(void) { + TEST_START(); + + // Test basic query parameters creation and destruction + ZVecQueryParams *base_params = zvec_query_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(base_params != NULL); + + // Test union query parameters + ZVecQueryParamsUnion *union_params = + zvec_query_params_union_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(union_params != NULL); + + // Test HNSW query parameters + ZVecHnswQueryParams *hnsw_params = zvec_query_params_hnsw_create( + ZVEC_INDEX_TYPE_HNSW, 50, 0.5f, false, true); + TEST_ASSERT(hnsw_params != NULL); + + // Test IVF query parameters + ZVecIVFQueryParams *ivf_params = + zvec_query_params_ivf_create(ZVEC_INDEX_TYPE_IVF, 10, true, 1.5f); + TEST_ASSERT(ivf_params != NULL); + + // Test Flat query parameters + ZVecFlatQueryParams *flat_params = + zvec_query_params_flat_create(ZVEC_INDEX_TYPE_FLAT, false, 2.0f); + TEST_ASSERT(flat_params != NULL); + + // Test setting various parameters on base query params + ZVecErrorCode err; + + // Test index type setting + err = zvec_query_params_set_index_type(base_params, ZVEC_INDEX_TYPE_IVF); + TEST_ASSERT(err == ZVEC_OK); + + // Test radius setting + err = zvec_query_params_set_radius(base_params, 0.8f); + TEST_ASSERT(err == ZVEC_OK); + + // Test linear search setting + err = zvec_query_params_set_is_linear(base_params, false); + TEST_ASSERT(err == ZVEC_OK); + + // Test refiner setting + err = zvec_query_params_set_is_using_refiner(base_params, true); + TEST_ASSERT(err == ZVEC_OK); + + // Test HNSW-specific parameters + err = zvec_query_params_hnsw_set_ef(hnsw_params, 75); + TEST_ASSERT(err == ZVEC_OK); + + // Test IVF-specific parameters + err = zvec_query_params_ivf_set_nprobe(ivf_params, 15); + TEST_ASSERT(err == ZVEC_OK); + + // Test IVF scale factor setting + err = zvec_query_params_ivf_set_scale_factor(ivf_params, 2.5f); + TEST_ASSERT(err == ZVEC_OK); + + // Test destruction of valid parameters + zvec_query_params_destroy(base_params); + zvec_query_params_hnsw_destroy(hnsw_params); + zvec_query_params_ivf_destroy(ivf_params); + zvec_query_params_flat_destroy(flat_params); + zvec_query_params_union_destroy(union_params); + + + // Test boundary cases - null pointer handling + zvec_query_params_hnsw_destroy(NULL); + zvec_query_params_ivf_destroy(NULL); + zvec_query_params_flat_destroy(NULL); + zvec_query_params_union_destroy(NULL); + + + TEST_END(); +} + +void test_collection_stats_functions(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_stats_functions"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + if (collection) { + ZVecCollectionStats *stats = NULL; + + // Test normal statistics retrieval + err = zvec_collection_get_stats(collection, &stats); + TEST_ASSERT(err == ZVEC_OK); + + if (stats) { + TEST_ASSERT(stats->doc_count == 0); + zvec_collection_stats_destroy(stats); + } + + // Test NULL parameters + err = zvec_collection_get_stats(NULL, &stats); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_get_stats(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + // Test statistics destruction boundary cases + zvec_collection_stats_destroy(NULL); + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_dml_functions(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_dml"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test insertion function boundary cases + size_t success_count, error_count; + + // Test NULL collection + err = zvec_collection_insert(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test NULL document array + err = zvec_collection_insert(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test zero document count + ZVecDoc *empty_docs[1]; + err = zvec_collection_insert(collection, (const ZVecDoc **)empty_docs, 0, + &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test NULL count pointer + err = zvec_collection_insert(collection, (const ZVecDoc **)empty_docs, 1, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test update function boundary cases + err = zvec_collection_update(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_update(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_update(collection, (const ZVecDoc **)empty_docs, 0, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test upsert function boundary cases + err = zvec_collection_upsert(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_upsert(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_upsert(collection, (const ZVecDoc **)empty_docs, 0, + NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test deletion function boundary cases + const char *pks[1]; + err = zvec_collection_delete(NULL, NULL, 0, &success_count, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete(collection, NULL, 0, &success_count, + &error_count); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete(collection, pks, 0, NULL, &error_count); + TEST_ASSERT(err != ZVEC_OK); + + // Test deletion by filter boundary cases + err = zvec_collection_delete_by_filter(NULL, NULL); + TEST_ASSERT(err != ZVEC_OK); + + err = zvec_collection_delete_by_filter(collection, NULL); + TEST_ASSERT(err != ZVEC_OK); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up temporary directory + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Actual Query Execution Tests +// ============================================================================= + +void test_actual_vector_queries(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_actual_queries"; + + // Create schema with vector field + ZVecCollectionSchema *schema = zvec_collection_schema_create("query_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Add ID field + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + // Add vector field with HNSW index + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + ZVecFieldSchema *vec_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, 4); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Insert test documents + float vec1[] = {1.0f, 0.0f, 0.0f, 0.0f}; + float vec2[] = {0.0f, 1.0f, 0.0f, 0.0f}; + float vec3[] = {0.0f, 0.0f, 1.0f, 0.0f}; + float vec4[] = {0.7f, 0.7f, 0.0f, 0.0f}; // Similar to vec1 and vec2 + + ZVecDoc *docs[4]; + for (int i = 0; i < 4; i++) { + docs[i] = zvec_doc_create(); + zvec_doc_set_pk(docs[i], zvec_test_make_pk(i + 1)); + zvec_doc_add_field_by_value(docs[i], "id", ZVEC_DATA_TYPE_INT64, + &(int64_t){i + 1}, sizeof(int64_t)); + } + + zvec_doc_add_field_by_value( + docs[0], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec1, sizeof(vec1)); + zvec_doc_add_field_by_value( + docs[1], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec2, sizeof(vec2)); + zvec_doc_add_field_by_value( + docs[2], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec3, sizeof(vec3)); + zvec_doc_add_field_by_value( + docs[3], "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, vec4, sizeof(vec4)); + + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)docs, 4, + &success_count, &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == 4); + TEST_ASSERT(error_count == 0); + + // Flush collection to build index + zvec_collection_flush(collection); + + // Test 1: Basic vector search + ZVecVectorQuery query1 = {0}; + query1.field_name = (ZVecString){.data = "embedding", .length = 9}; + query1.query_vector = + (ZVecByteArray){.data = (uint8_t *)vec1, .length = sizeof(vec1)}; + query1.topk = 3; + query1.include_vector = true; + query1.include_doc_id = true; + + ZVecDoc **results = NULL; + size_t result_count = 0; + err = zvec_collection_query(collection, &query1, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count > 0); + TEST_ASSERT(results != NULL); + + // First result should be vec1 itself (distance ~0) + if (result_count > 0) { + float score = zvec_doc_get_score(results[0]); + TEST_ASSERT(score < 0.001f); // Very small distance + } + + zvec_docs_free(results, result_count); + + // Test 2: Search with filter + ZVecVectorQuery query2 = query1; + query2.filter = (ZVecString){.data = "id > 2", .length = 6}; + + err = zvec_collection_query(collection, &query2, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + + // Should only return documents with id > 2 + for (size_t i = 0; i < result_count; i++) { + int64_t id; + zvec_doc_get_field_value_basic(results[i], "id", ZVEC_DATA_TYPE_INT64, + &id, sizeof(id)); + TEST_ASSERT(id > 2); + } + + zvec_docs_free(results, result_count); + + // Cleanup documents + for (int i = 0; i < 4; i++) { + zvec_doc_destroy(docs[i]); + } + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + zvec_index_params_hnsw_destroy(hnsw_params); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_index_creation_and_management(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_index_management"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test 1: Create HNSW index + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_COSINE, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + TEST_ASSERT(hnsw_params != NULL); + + err = zvec_collection_create_hnsw_index( + collection, &(ZVecString){.data = "dense", .length = 5}, hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test 2: Create scalar index + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, false); + TEST_ASSERT(invert_params != NULL); + + err = zvec_collection_create_invert_index( + collection, &(ZVecString){.data = "name", .length = 4}, + invert_params); + TEST_ASSERT(err == ZVEC_OK); + + // Note: Index statistics and drop functionality not yet implemented in C + // API These would require zvec_collection_get_index_stats() and + // zvec_collection_drop_index() + + // Test 3: Optimize collection + err = zvec_collection_optimize(collection); + TEST_ASSERT(err == ZVEC_OK); + + zvec_collection_destroy(collection); + zvec_index_params_hnsw_destroy(hnsw_params); + zvec_index_params_invert_destroy(invert_params); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_collection_ddl_operations(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_collection_ddl"; + + ZVecCollectionSchema *schema = zvec_test_create_temp_schema(); + TEST_ASSERT(schema != NULL); + + if (schema) { + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(collection != NULL); + + if (collection) { + // Test 1: Add new field (using schema modification before opening) + ZVecFieldSchema *new_field = + zvec_field_schema_create("new_field", ZVEC_DATA_TYPE_STRING, true, 0); + TEST_ASSERT(new_field != NULL); + + // Note: Runtime field addition not yet implemented in C API + // This would require zvec_collection_add_field() which is not implemented + + // Test 2: Get collection schema + ZVecCollectionSchema *retrieved_schema = NULL; + err = zvec_collection_get_schema(collection, &retrieved_schema); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(retrieved_schema != NULL); + + size_t field_count = + zvec_collection_schema_get_field_count(retrieved_schema); + TEST_ASSERT(field_count > 0); + + zvec_collection_schema_destroy(retrieved_schema); + + zvec_collection_destroy(collection); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +void test_field_ddl_operations(void) { + TEST_START(); + + // Test field schema creation with various configurations + ZVecFieldSchema *field1 = + zvec_field_schema_create("test_field1", ZVEC_DATA_TYPE_STRING, false, 0); + TEST_ASSERT(field1 != NULL); + TEST_ASSERT(strcmp(field1->name->data, "test_field1") == 0); + TEST_ASSERT(field1->data_type == ZVEC_DATA_TYPE_STRING); + TEST_ASSERT(field1->nullable == false); + TEST_ASSERT(field1->dimension == 0); + + ZVecFieldSchema *field2 = zvec_field_schema_create( + "test_field2", ZVEC_DATA_TYPE_VECTOR_FP32, true, 128); + TEST_ASSERT(field2 != NULL); + TEST_ASSERT(field2->data_type == ZVEC_DATA_TYPE_VECTOR_FP32); + TEST_ASSERT(field2->nullable == true); + TEST_ASSERT(field2->dimension == 128); + + // Test index parameter setting + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + TEST_ASSERT(hnsw_params != NULL); + + ZVecErrorCode err = zvec_field_schema_set_index_params( + field2, (ZVecIndexParams *)hnsw_params); + TEST_ASSERT(err == ZVEC_OK); + + // Test field operations + // (Field validation function doesn't exist in current API) + + // Cleanup + zvec_field_schema_destroy(field1); + zvec_field_schema_destroy(field2); + zvec_index_params_hnsw_destroy(hnsw_params); + + TEST_END(); +} + +void test_performance_benchmarks(void) { + TEST_START(); + + char temp_dir[] = "/tmp/zvec_test_performance"; + + ZVecCollectionSchema *schema = zvec_collection_schema_create("perf_test"); + TEST_ASSERT(schema != NULL); + + if (schema) { + // Create simple schema for performance testing + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_collection_schema_add_field(schema, id_field); + + ZVecFieldSchema *vec_field = + zvec_field_schema_create("vec", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + ZVecHnswIndexParams *hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED, 16, 100, 50); + zvec_field_schema_set_hnsw_index(vec_field, hnsw_params); + zvec_collection_schema_add_field(schema, vec_field); + + ZVecCollection *collection = NULL; + ZVecErrorCode err = + zvec_collection_create_and_open(temp_dir, schema, NULL, &collection); + TEST_ASSERT(err == ZVEC_OK); + + TEST_ASSERT(collection != NULL); + + if (collection) { + const size_t BATCH_SIZE = 1000; + const size_t TOTAL_DOCS = 10000; + + // Test bulk insertion performance +#ifdef _POSIX_C_SOURCE + struct timeval start_time, end_time; + gettimeofday(&start_time, NULL); +#else + clock_t start_clock = clock(); +#endif + + for (size_t batch_start = 0; batch_start < TOTAL_DOCS; + batch_start += BATCH_SIZE) { + ZVecDoc *batch_docs[BATCH_SIZE]; + size_t current_batch_size = (batch_start + BATCH_SIZE > TOTAL_DOCS) + ? TOTAL_DOCS - batch_start + : BATCH_SIZE; + + // Create batch of documents + for (size_t i = 0; i < current_batch_size; i++) { + batch_docs[i] = zvec_doc_create(); + zvec_doc_set_pk(batch_docs[i], zvec_test_make_pk(batch_start + i)); + + int64_t id = batch_start + i; + zvec_doc_add_field_by_value(batch_docs[i], "id", ZVEC_DATA_TYPE_INT64, + &id, sizeof(id)); + + // Create random vector + float vec[128]; + for (int j = 0; j < 128; j++) { + vec[j] = (float)rand() / RAND_MAX; + } + zvec_doc_add_field_by_value(batch_docs[i], "vec", + ZVEC_DATA_TYPE_VECTOR_FP32, vec, + sizeof(vec)); + } + + // Insert batch + size_t success_count, error_count; + err = zvec_collection_insert(collection, (const ZVecDoc **)batch_docs, + current_batch_size, &success_count, + &error_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(success_count == current_batch_size); + TEST_ASSERT(error_count == 0); + + // Cleanup batch documents + for (size_t i = 0; i < current_batch_size; i++) { + zvec_doc_destroy(batch_docs[i]); + } + } + +#ifdef _POSIX_C_SOURCE + gettimeofday(&end_time, NULL); + double insert_time = (end_time.tv_sec - start_time.tv_sec) + + (end_time.tv_usec - start_time.tv_usec) / 1000000.0; +#else + clock_t end_clock = clock(); + double insert_time = ((double)(end_clock - start_clock)) / CLOCKS_PER_SEC; +#endif + printf(" Inserted %zu documents in %.3f seconds (%.0f docs/sec)\n", + TOTAL_DOCS, insert_time, TOTAL_DOCS / insert_time); + + // Flush and optimize + zvec_collection_flush(collection); + zvec_collection_optimize(collection); + + // Test query performance + float query_vec[128]; + for (int i = 0; i < 128; i++) { + query_vec[i] = (float)rand() / RAND_MAX; + } + + ZVecVectorQuery query = {0}; + query.field_name = (ZVecString){.data = "vec", .length = 3}; + query.query_vector = (ZVecByteArray){.data = (uint8_t *)query_vec, + .length = sizeof(query_vec)}; + query.topk = 10; + query.include_vector = false; + query.include_doc_id = true; + + const int QUERY_COUNT = 100; +#ifdef _POSIX_C_SOURCE + struct timeval query_start_time, query_end_time; + gettimeofday(&query_start_time, NULL); +#else + clock_t query_start_clock = clock(); +#endif + + for (int q = 0; q < QUERY_COUNT; q++) { + ZVecDoc **results = NULL; + size_t result_count = 0; + + err = + zvec_collection_query(collection, &query, &results, &result_count); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(result_count <= 10); + + zvec_docs_free(results, result_count); + } + +#ifdef _POSIX_C_SOURCE + gettimeofday(&query_end_time, NULL); + double query_time = (query_end_time.tv_sec - query_start_time.tv_sec) + + (query_end_time.tv_usec - query_start_time.tv_usec) / 1000000.0; +#else + clock_t query_end_clock = clock(); + double query_time = ((double)(query_end_clock - query_start_clock)) / CLOCKS_PER_SEC; +#endif + double avg_query_time = + (query_time * 1000) / QUERY_COUNT; // ms per query + printf(" Average query time: %.2f ms\n", avg_query_time); + + zvec_collection_destroy(collection); + zvec_index_params_hnsw_destroy(hnsw_params); + } + + zvec_collection_schema_destroy(schema); + } + + // Clean up + char cmd[256]; + snprintf(cmd, sizeof(cmd), "rm -rf %s", temp_dir); + system(cmd); + + TEST_END(); +} + +// ============================================================================= +// Main function +// ============================================================================= + +int main(void) { + printf("Starting comprehensive C API tests...\n\n"); + + // Clean up previous test directories + printf("Cleaning up previous test directories...\n"); + system("rm -rf /tmp/zvec_test_*"); + printf("Cleanup completed.\n\n"); + + test_version_functions(); + test_error_handling_functions(); + test_zvec_config(); + test_zvec_initialize(); + test_zvec_string_functions(); + + // Schema-related tests + test_schema_basic_operations(); + test_schema_edge_cases(); + test_schema_field_operations(); + test_normal_schema_creation(); + test_schema_with_indexes(); + test_schema_max_doc_count(); + + // Field-related tests + test_field_schema_functions(); + test_field_helper_functions(); + test_field_ddl_operations(); + + // Collection-related tests + test_collection_basic_operations(); + test_collection_edge_cases(); + test_collection_delete_by_filter(); + test_collection_stats(); + test_collection_stats_functions(); + test_collection_dml_functions(); + test_collection_ddl_operations(); + + // Doc-related tests + test_doc_creation(); + test_doc_primary_key(); + test_doc_functions(); + + // Index tests + test_index_params(); + test_index_params_functions(); + test_index_creation_and_management(); + + // Query tests + test_query_params_functions(); + test_actual_vector_queries(); + + // Performance tests + // test_performance_benchmarks(); + + // Utility function tests + test_utility_functions(); + + // Memory management tests + test_memory_management_functions(); + + printf("\n=== Comprehensive Test Summary ===\n"); + printf("Total tests: %d\n", test_count); + printf("Passed: %d\n", passed_count); + printf("Failed: %d\n", test_count - passed_count); + + return test_count == passed_count ? 0 : 1; +} diff --git a/tests/c_api/utils.c b/tests/c_api/utils.c new file mode 100644 index 00000000..66c932a4 --- /dev/null +++ b/tests/c_api/utils.c @@ -0,0 +1,940 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "utils.h" +#include +#include +#include +#include + +// ============================================================================= +// Internal Helper Functions +// ============================================================================= + +static char *strdup_safe(const char *str) { + if (!str) return NULL; + size_t len = strlen(str) + 1; + char *copy = (char *)malloc(len); + if (copy) { + memcpy(copy, str, len); + } + return copy; +} + +// ============================================================================= +// Schema Creation Helper Functions Implementation +// ============================================================================= + +ZVecCollectionSchema *zvec_test_create_temp_schema(void) { + // Create collection schema using C API + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + schema->max_doc_count_per_segment = 1000; + + // Create index parameters using C API + ZVecInvertIndexParams *invert_params = + zvec_index_params_invert_create(true, true); + ZVecHnswIndexParams *dense_hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_L2, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + ZVecHnswIndexParams *sparse_hnsw_params = zvec_index_params_hnsw_create( + ZVEC_METRIC_TYPE_IP, 16, 100, 50, ZVEC_QUANTIZE_TYPE_UNDEFINED); + + + // Create and add fields + ZVecFieldSchema *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_INT64, false, 0); + zvec_field_schema_set_invert_index(id_field, invert_params); + zvec_collection_schema_add_field(schema, id_field); + + // Create name field (inverted index without optimization) + ZVecInvertIndexParams *name_invert_params = + zvec_index_params_invert_create(false, false); + ZVecFieldSchema *name_field = + zvec_field_schema_create("name", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_field_schema_set_invert_index(name_field, name_invert_params); + zvec_collection_schema_add_field(schema, name_field); + + // Create weight field (no index) + ZVecFieldSchema *weight_field = + zvec_field_schema_create("weight", ZVEC_DATA_TYPE_FLOAT, true, 0); + zvec_collection_schema_add_field(schema, weight_field); + + // Create dense field (HNSW index) + ZVecFieldSchema *dense_field = + zvec_field_schema_create("dense", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + zvec_field_schema_set_hnsw_index(dense_field, dense_hnsw_params); + zvec_collection_schema_add_field(schema, dense_field); + + // Create sparse field (HNSW index) + ZVecFieldSchema *sparse_field = zvec_field_schema_create( + "sparse", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + zvec_field_schema_set_hnsw_index(sparse_field, sparse_hnsw_params); + zvec_collection_schema_add_field(schema, sparse_field); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_scalar_schema(void) { + // Create collection schema using C API + ZVecCollectionSchema *schema = zvec_collection_schema_create("demo"); + + // Create fields + ZVecFieldSchema *int32_field = + zvec_field_schema_create("int32", ZVEC_DATA_TYPE_INT32, false, 0); + zvec_collection_schema_add_field(schema, int32_field); + + ZVecFieldSchema *string_field = + zvec_field_schema_create("string", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_collection_schema_add_field(schema, string_field); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_normal_schema( + bool nullable, const char *name, + const ZVecInvertIndexParams *scalar_index_params, + const ZVecHnswIndexParams *vector_index_params, uint64_t max_doc_count) { + // Create collection schema using C API + ZVecCollectionSchema *schema = + zvec_collection_schema_create(name ? name : "demo"); + schema->max_doc_count_per_segment = max_doc_count; + + // Create scalar fields (8) + const char *scalar_names[] = {"int32", "string", "uint32", "bool", + "float", "double", "int64", "uint64"}; + ZVecDataType scalar_types[] = {ZVEC_DATA_TYPE_INT32, ZVEC_DATA_TYPE_STRING, + ZVEC_DATA_TYPE_UINT32, ZVEC_DATA_TYPE_BOOL, + ZVEC_DATA_TYPE_FLOAT, ZVEC_DATA_TYPE_DOUBLE, + ZVEC_DATA_TYPE_INT64, ZVEC_DATA_TYPE_UINT64}; + + for (int i = 0; i < 8; i++) { + ZVecFieldSchema *field = + zvec_field_schema_create(scalar_names[i], scalar_types[i], nullable, 0); + if (scalar_index_params) { + zvec_field_schema_set_invert_index( + field, (ZVecInvertIndexParams *)scalar_index_params); + } + zvec_collection_schema_add_field(schema, field); + } + + // Create array fields (8) + const char *array_names[] = {"array_int32", "array_string", "array_uint32", + "array_bool", "array_float", "array_double", + "array_int64", "array_uint64"}; + ZVecDataType array_types[] = { + ZVEC_DATA_TYPE_ARRAY_INT32, ZVEC_DATA_TYPE_ARRAY_STRING, + ZVEC_DATA_TYPE_ARRAY_UINT32, ZVEC_DATA_TYPE_ARRAY_BOOL, + ZVEC_DATA_TYPE_ARRAY_FLOAT, ZVEC_DATA_TYPE_ARRAY_DOUBLE, + ZVEC_DATA_TYPE_ARRAY_INT64, ZVEC_DATA_TYPE_ARRAY_UINT64}; + + for (int i = 0; i < 8; i++) { + ZVecFieldSchema *field = + zvec_field_schema_create(array_names[i], array_types[i], nullable, 0); + if (scalar_index_params) { + zvec_field_schema_set_invert_index( + field, (ZVecInvertIndexParams *)scalar_index_params); + } + zvec_collection_schema_add_field(schema, field); + } + + // Create vector fields (5) + // dense vectors + ZVecFieldSchema *dense_fp32 = zvec_field_schema_create( + "dense_fp32", ZVEC_DATA_TYPE_VECTOR_FP32, false, 128); + if (vector_index_params) { + zvec_field_schema_set_hnsw_index( + dense_fp32, (ZVecHnswIndexParams *)vector_index_params); + } + zvec_collection_schema_add_field(schema, dense_fp32); + + ZVecFieldSchema *dense_fp16 = zvec_field_schema_create( + "dense_fp16", ZVEC_DATA_TYPE_VECTOR_FP16, false, 128); + ZVecFlatIndexParams *flat_params1 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(dense_fp16, flat_params1); + zvec_collection_schema_add_field(schema, dense_fp16); + + ZVecFieldSchema *dense_int8 = zvec_field_schema_create( + "dense_int8", ZVEC_DATA_TYPE_VECTOR_INT8, false, 128); + ZVecFlatIndexParams *flat_params2 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(dense_int8, flat_params2); + zvec_collection_schema_add_field(schema, dense_int8); + + // sparse vectors + ZVecFieldSchema *sparse_fp32 = zvec_field_schema_create( + "sparse_fp32", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32, false, 0); + if (vector_index_params) { + zvec_field_schema_set_hnsw_index( + sparse_fp32, (ZVecHnswIndexParams *)vector_index_params); + } + zvec_collection_schema_add_field(schema, sparse_fp32); + + ZVecFieldSchema *sparse_fp16 = zvec_field_schema_create( + "sparse_fp16", ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16, false, 0); + ZVecFlatIndexParams *flat_params3 = zvec_index_params_flat_create( + ZVEC_METRIC_TYPE_L2, ZVEC_QUANTIZE_TYPE_UNDEFINED); + zvec_field_schema_set_flat_index(sparse_fp16, flat_params3); + zvec_collection_schema_add_field(schema, sparse_fp16); + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( + bool nullable, bool enable_optimize, const char *name) { + ZVecInvertIndexParams *invert_params = + zvec_test_create_default_invert_params(enable_optimize); + ZVecCollectionSchema *schema = + zvec_test_create_normal_schema(nullable, name, invert_params, NULL, 1000); + free(invert_params); + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( + bool nullable, const char *name, + const ZVecHnswIndexParams *vector_index_params) { + ZVecHnswIndexParams *default_params = NULL; + if (!vector_index_params) { + default_params = zvec_test_create_default_hnsw_params(); + } + + ZVecCollectionSchema *schema = zvec_test_create_normal_schema( + nullable, name, NULL, + vector_index_params ? vector_index_params : default_params, 1000); + + if (default_params) { + free(default_params); + } + + return schema; +} + +ZVecCollectionSchema *zvec_test_create_schema_with_max_doc_count( + uint64_t doc_count) { + return zvec_test_create_normal_schema(false, "demo", NULL, NULL, doc_count); +} + +// ============================================================================= +// Document Creation Helper Functions Implementation +// ============================================================================= + +char *zvec_test_make_pk(uint64_t doc_id) { + char *pk = (char *)malloc(32); // Sufficiently large buffer + if (pk) { + snprintf(pk, 32, "pk_%llu", (unsigned long long)doc_id); + } + return pk; +} + +uint64_t zvec_test_extract_doc_id(const char *pk) { + if (!pk || strlen(pk) < 4) return 0; + return strtoull(pk + 3, NULL, 10); +} + +ZVecDoc *zvec_test_create_doc(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk) { + if (!schema) return NULL; + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Create test data for each field + for (size_t i = 0; i < schema->field_count; i++) { + // Fix type mismatch issue - remove address operator + const ZVecFieldSchema *field = schema->fields[i]; + // Remove unused variable + // ZVecErrorCode err = ZVEC_OK; + + switch (field->data_type) { + case ZVEC_DATA_TYPE_BINARY: { + char binary_str[32]; + snprintf(binary_str, sizeof(binary_str), "binary_%llu", + (unsigned long long)doc_id); + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + binary_str, strlen(binary_str)); + break; + } + case ZVEC_DATA_TYPE_BOOL: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(bool){doc_id % 10 == 0}, sizeof(bool)); + break; + } + case ZVEC_DATA_TYPE_INT32: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(int32_t){(int32_t)doc_id}, + sizeof(int32_t)); + break; + } + case ZVEC_DATA_TYPE_INT64: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(int64_t){(int64_t)doc_id}, + sizeof(int64_t)); + break; + } + case ZVEC_DATA_TYPE_UINT32: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(uint32_t){(uint32_t)doc_id}, + sizeof(uint32_t)); + break; + } + case ZVEC_DATA_TYPE_UINT64: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(uint64_t){(uint64_t)doc_id}, + sizeof(uint64_t)); + break; + } + case ZVEC_DATA_TYPE_FLOAT: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(float){(float)doc_id}, sizeof(float)); + break; + } + case ZVEC_DATA_TYPE_DOUBLE: { + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + &(double){(double)doc_id}, sizeof(double)); + break; + } + case ZVEC_DATA_TYPE_STRING: { + char string_val[64]; + snprintf(string_val, sizeof(string_val), "value_%llu", + (unsigned long long)doc_id); + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + string_val, strlen(string_val)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_BOOL: { + bool bool_array[10]; + for (int j = 0; j < 10; j++) { + bool_array[j] = (doc_id + j) % 2 == 0; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + bool_array, sizeof(bool_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT32: { + int32_t int32_array[10]; + for (int j = 0; j < 10; j++) { + int32_array[j] = (int32_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + int32_array, sizeof(int32_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_INT64: { + int64_t int64_array[10]; + for (int j = 0; j < 10; j++) { + int64_array[j] = (int64_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + int64_array, sizeof(int64_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT32: { + uint32_t uint32_array[10]; + for (int j = 0; j < 10; j++) { + uint32_array[j] = (uint32_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + uint32_array, sizeof(uint32_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_UINT64: { + uint64_t uint64_array[10]; + for (int j = 0; j < 10; j++) { + uint64_array[j] = (uint64_t)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + uint64_array, sizeof(uint64_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_FLOAT: { + float float_array[10]; + for (int j = 0; j < 10; j++) { + float_array[j] = (float)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + float_array, sizeof(float_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_DOUBLE: { + double double_array[10]; + for (int j = 0; j < 10; j++) { + double_array[j] = (double)doc_id; + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + double_array, sizeof(double_array)); + break; + } + case ZVEC_DATA_TYPE_ARRAY_STRING: { + // String arrays need special handling + char string_data[256]; + size_t offset = 0; + for (int j = 0; j < 10; j++) { + char temp_str[32]; + snprintf(temp_str, sizeof(temp_str), "value_%llu_%d", + (unsigned long long)doc_id, j); + size_t len = strlen(temp_str); + if (offset + len + 1 < sizeof(string_data)) { + strcpy(string_data + offset, temp_str); + offset += len + 1; + } + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + string_data, offset); + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY32: { + uint32_t *vector_data = + (uint32_t *)malloc(field->dimension * sizeof(uint32_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (uint32_t)(doc_id + j); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(uint32_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_BINARY64: { + uint64_t *vector_data = + (uint64_t *)malloc(field->dimension * sizeof(uint64_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (uint64_t)(doc_id + j); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(uint64_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + double *vector_data = + (double *)malloc(field->dimension * sizeof(double)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (double)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(double)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + // FP16 needs special handling, simplified to FP32 here + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + int8_t *vector_data = + (int8_t *)malloc(field->dimension * sizeof(int8_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int8_t)((doc_id + j) % 256); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(int8_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + int16_t *vector_data = + (int16_t *)malloc(field->dimension * sizeof(int16_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int16_t)((doc_id + j) % 65536); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + vector_data, + field->dimension * sizeof(int16_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + // Sparse vectors need special handling + uint32_t nnz = field->dimension > 0 + ? field->dimension / 10 + : 10; // Number of non-zero elements + size_t sparse_size = + sizeof(uint32_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + void *sparse_data = malloc(sparse_size); + if (sparse_data) { + uint32_t *data_ptr = (uint32_t *)sparse_data; + *data_ptr = nnz; // Set number of non-zero elements + uint32_t *indices = data_ptr + 1; + float *values = (float *)(indices + nnz); + for (uint32_t j = 0; j < nnz; j++) { + indices[j] = j * 10; // Index + values[j] = (float)(doc_id + j * 0.1); // Value + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + sparse_data, sparse_size); + free(sparse_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: { + // Sparse FP16 vectors, simplified handling + uint32_t nnz = field->dimension > 0 ? field->dimension / 10 : 10; + size_t sparse_size = + sizeof(uint32_t) + + nnz * (sizeof(uint32_t) + + sizeof(float)); // Still use float for storage + void *sparse_data = malloc(sparse_size); + if (sparse_data) { + uint32_t *data_ptr = (uint32_t *)sparse_data; + *data_ptr = nnz; + uint32_t *indices = data_ptr + 1; + float *values = (float *)(indices + nnz); + for (uint32_t j = 0; j < nnz; j++) { + indices[j] = j * 10; + values[j] = (float)(doc_id + j * 0.1); + } + zvec_doc_add_field_by_value(doc, field->name->data, field->data_type, + sparse_data, sparse_size); + free(sparse_data); + } + break; + } + + default: + // Unsupported data type + break; + } + + // Remove reference to removed variable err + /* + if (err != ZVEC_OK) { + // Error handling: continue processing other fields + } + */ + } + + return doc; +} + +ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk) { + // Reuse create_doc function, but only process vector fields + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Only create data for vector fields + for (size_t i = 0; i < schema->field_count; i++) { + const ZVecFieldSchema *field = schema->fields[i]; + + // Only process specific vector type fields + if (field->data_type != ZVEC_DATA_TYPE_VECTOR_FP32 && + field->data_type != ZVEC_DATA_TYPE_VECTOR_FP16 && + field->data_type != ZVEC_DATA_TYPE_VECTOR_INT8 && + field->data_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32 && + field->data_type != ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16) { + continue; + } + + ZVecErrorCode err = ZVEC_OK; + + switch (field->data_type) { + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP64: { + double *vector_data = + (double *)malloc(field->dimension * sizeof(double)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (double)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(double)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_FP16: { + float *vector_data = (float *)malloc(field->dimension * sizeof(float)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(float)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT8: { + int8_t *vector_data = + (int8_t *)malloc(field->dimension * sizeof(int8_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int8_t)(doc_id % 128); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(int8_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_VECTOR_INT16: { + int16_t *vector_data = + (int16_t *)malloc(field->dimension * sizeof(int16_t)); + if (vector_data) { + for (uint32_t j = 0; j < field->dimension; j++) { + vector_data[j] = (int16_t)(doc_id % 32768); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, vector_data, + field->dimension * sizeof(int16_t)); + free(vector_data); + } + break; + } + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP16: + case ZVEC_DATA_TYPE_SPARSE_VECTOR_FP32: { + const size_t nnz = 100; + size_t sparse_size = + sizeof(size_t) + nnz * (sizeof(uint32_t) + sizeof(float)); + char *sparse_data = (char *)malloc(sparse_size); + if (sparse_data) { + char *ptr = sparse_data; + *((size_t *)ptr) = nnz; + ptr += sizeof(size_t); + + for (size_t j = 0; j < nnz; j++) { + *((uint32_t *)ptr) = (uint32_t)j; + ptr += sizeof(uint32_t); + *((float *)ptr) = (float)(doc_id + j * 0.1); + ptr += sizeof(float); + } + err = zvec_doc_add_field_by_value(doc, field->name->data, + field->data_type, sparse_data, + sparse_size); + free(sparse_data); + } + break; + } + default: + break; + } + + + if (err != ZVEC_OK) { + zvec_doc_destroy(doc); + return NULL; + } + } + + return doc; +} + +ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, + const char **field_names, + const ZVecDataType *field_types, + size_t field_count, const char *pk) { + ZVecDoc *doc = zvec_doc_create(); + if (!doc) return NULL; + + // Set primary key + char *primary_key = pk ? strdup_safe(pk) : zvec_test_make_pk(doc_id); + if (primary_key) { + zvec_doc_set_pk(doc, primary_key); + free(primary_key); + } + + // Create data for specified fields + for (size_t i = 0; i < field_count; i++) { + ZVecErrorCode err = ZVEC_OK; + + switch (field_types[i]) { + case ZVEC_DATA_TYPE_INT32: + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + &(int32_t){(int32_t)doc_id}, + sizeof(int32_t)); + break; + case ZVEC_DATA_TYPE_STRING: { + char string_val[64]; + snprintf(string_val, sizeof(string_val), "value_%llu", + (unsigned long long)doc_id); + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + string_val, strlen(string_val)); + break; + } + case ZVEC_DATA_TYPE_FLOAT: + err = + zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + &(float){(float)doc_id}, sizeof(float)); + break; + case ZVEC_DATA_TYPE_VECTOR_FP32: { + float vector_data[128]; + for (int j = 0; j < 128; j++) { + vector_data[j] = (float)(doc_id + j * 0.1); + } + err = zvec_doc_add_field_by_value(doc, field_names[i], field_types[i], + vector_data, sizeof(vector_data)); + break; + } + default: + // Other types can be added here + break; + } + + if (err != ZVEC_OK) { + zvec_doc_destroy(doc); + return NULL; + } + } + + return doc; +} + +// ============================================================================= +// Index Parameter Creation Helper Functions Implementation +// ============================================================================= + +ZVecHnswIndexParams *zvec_test_create_default_hnsw_params(void) { + ZVecHnswIndexParams *params = + (ZVecHnswIndexParams *)malloc(sizeof(ZVecHnswIndexParams)); + if (!params) return NULL; + + params->base.base.index_type = ZVEC_INDEX_TYPE_HNSW; + params->base.metric_type = ZVEC_METRIC_TYPE_IP; + params->base.quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; + params->m = 16; + params->ef_construction = 100; + + return params; +} + +ZVecFlatIndexParams *zvec_test_create_default_flat_params(void) { + ZVecFlatIndexParams *params = + (ZVecFlatIndexParams *)malloc(sizeof(ZVecFlatIndexParams)); + if (!params) return NULL; + + params->base.base.index_type = ZVEC_INDEX_TYPE_FLAT; + params->base.metric_type = ZVEC_METRIC_TYPE_IP; + params->base.quantize_type = ZVEC_QUANTIZE_TYPE_UNDEFINED; + + return params; +} + +ZVecInvertIndexParams *zvec_test_create_default_invert_params( + bool enable_optimize) { + ZVecInvertIndexParams *params = + (ZVecInvertIndexParams *)malloc(sizeof(ZVecInvertIndexParams)); + if (!params) return NULL; + + params->base.index_type = ZVEC_INDEX_TYPE_INVERT; + params->enable_range_optimization = enable_optimize; + params->enable_extended_wildcard = enable_optimize; + + return params; +} + +// ============================================================================= +// Field Schema Creation Helper Functions Implementation +// ============================================================================= + +ZVecFieldSchema *zvec_test_create_scalar_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecInvertIndexParams *invert_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + // Fix const qualifier issue - create string copy + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = 0; + field->index_params = invert_params ? (ZVecIndexParams *)invert_params : NULL; + + return field; +} + +ZVecFieldSchema *zvec_test_create_vector_field( + const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, + const ZVecHnswIndexParams *vector_index_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + // Fix const qualifier issue - create string copy + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = dimension; + field->index_params = + vector_index_params ? (ZVecIndexParams *)vector_index_params : NULL; + + return field; +} + +ZVecFieldSchema *zvec_test_create_sparse_vector_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecHnswIndexParams *vector_index_params) { + ZVecFieldSchema *field = (ZVecFieldSchema *)malloc(sizeof(ZVecFieldSchema)); + if (!field) return NULL; + + field->name = (ZVecString *)malloc(sizeof(ZVecString)); + if (!field->name) { + free(field); + return NULL; + } + // Fix const qualifier issue - create string copy + field->name->data = name ? strdup(name) : NULL; + field->name->length = name ? strlen(name) : 0; + field->name->capacity = name ? strlen(name) + 1 : 0; + field->data_type = data_type; + field->nullable = nullable; + field->dimension = 0; // Sparse vectors don't need fixed dimension + field->index_params = + vector_index_params ? (ZVecIndexParams *)vector_index_params : NULL; + + return field; +} + +// ============================================================================= +// Memory Management Helper Functions Implementation +// ============================================================================= + +void zvec_test_free_field_schemas(ZVecFieldSchema *fields, size_t count) { + if (!fields) return; + + for (size_t i = 0; i < count; i++) { + if (fields[i].name) { + // Free string memory allocated by strdup + if (fields[i].name->data) { + free(fields[i].name->data); + } + free(fields[i].name); + } + // Free index parameter memory + if (fields[i].index_params) { + zvec_index_params_destroy(fields[i].index_params); + free(fields[i].index_params); + } + } + free(fields); +} + +void zvec_test_free_strings(char **strings, size_t count) { + if (!strings) return; + + for (size_t i = 0; i < count; i++) { + if (strings[i]) { + free(strings[i]); + } + } + + free(strings); +} + +// ============================================================================= +// File System Helper Functions Implementation +// ============================================================================= + +/** + * @brief Delete directory and all its contents (wrapper function) + * + * @param dir_path Directory path + * @return int 0 for success, -1 for failure + */ +int zvec_test_delete_dir(const char *dir_path) { + if (!dir_path) { + return -1; + } + +#ifdef _WIN32 + // Windows platform implementation + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "rd /s /q \"%s\" >nul 2>&1", dir_path); + int result = system(cmd); + return (result == 0) ? 0 : -1; +#else + // Unix/Linux/macOS platform implementation + char cmd[1024]; + snprintf(cmd, sizeof(cmd), "rm -rf \"%s\" 2>/dev/null", dir_path); + int result = system(cmd); + return (result == 0) ? 0 : -1; +#endif +} diff --git a/tests/c_api/utils.h b/tests/c_api/utils.h new file mode 100644 index 00000000..63e5e314 --- /dev/null +++ b/tests/c_api/utils.h @@ -0,0 +1,260 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ZVEC_TESTS_C_API_UTILS_H +#define ZVEC_TESTS_C_API_UTILS_H + +#include +#include +#include +#include "zvec/c_api.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// ============================================================================= +// Schema Creation Helper Functions +// ============================================================================= + +/** + * @brief Create temporary test schema + * Contains basic scalar fields and vector fields + * + * @return ZVecCollectionSchema* Created schema pointer, needs to be released by + * calling zvec_collection_schema_cleanup + */ +ZVecCollectionSchema *zvec_test_create_temp_schema(void); + +/** + * @brief Create pure scalar schema + * Contains only scalar fields (int32, string) + * + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_scalar_schema(void); + +/** + * @brief Create full-featured schema + * Contains all supported data type fields + * + * @param nullable Whether to allow null values + * @param name Schema name + * @param scalar_index_params Scalar index parameters (can be NULL) + * @param vector_index_params Vector index parameters (can be NULL) + * @param max_doc_count Maximum documents per segment + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_normal_schema( + bool nullable, const char *name, + const ZVecInvertIndexParams *scalar_index_params, + const ZVecHnswIndexParams *vector_index_params, uint64_t max_doc_count); + +/** + * @brief Create schema with scalar index + * + * @param nullable Whether to allow null values + * @param enable_optimize Whether to enable optimization + * @param name Schema name + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_scalar_index( + bool nullable, bool enable_optimize, const char *name); + +/** + * @brief Create schema with vector index + * + * @param nullable Whether to allow null values + * @param name Schema name + * @param vector_index_params Vector index parameters (can be NULL, uses default + * HNSW parameters) + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_vector_index( + bool nullable, const char *name, + const ZVecHnswIndexParams *vector_index_params); + +/** + * @brief Create schema with specified maximum document count + * + * @param doc_count Maximum documents per segment + * @return ZVecCollectionSchema* Created schema pointer + */ +ZVecCollectionSchema *zvec_test_create_schema_with_max_doc_count( + uint64_t doc_count); + +// ============================================================================= +// Document Creation Helper Functions +// ============================================================================= + +/** + * @brief Generate primary key based on document ID + * + * @param doc_id Document ID + * @return char* Generated primary key string, needs to be released by calling + * free() + */ +char *zvec_test_make_pk(uint64_t doc_id); + +/** + * @brief Create complete document + * Create corresponding test data for each field according to schema + * + * @param doc_id Document ID + * @param schema Schema pointer + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer, needs to be released by calling + * zvec_doc_destroy + */ +ZVecDoc *zvec_test_create_doc(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk); + +/** + * @brief Create partial null document + * Only set values for vector fields, keep scalar fields as null + * + * @param doc_id Document ID + * @param schema Schema pointer + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer + */ +ZVecDoc *zvec_test_create_doc_null(uint64_t doc_id, + const ZVecCollectionSchema *schema, + const char *pk); + +/** + * @brief Create document with specified fields + * Only create data for specified fields + * + * @param doc_id Document ID + * @param field_names Field name array + * @param field_types Field type array + * @param field_count Number of fields + * @param pk Primary key (can be NULL, auto-generated) + * @return ZVecDoc* Created document pointer + */ +ZVecDoc *zvec_test_create_doc_with_fields(uint64_t doc_id, + const char **field_names, + const ZVecDataType *field_types, + size_t field_count, const char *pk); + +// ============================================================================= +// Index Parameter Creation Helper Functions +// ============================================================================= + +/** + * @brief Create default HNSW index parameters + * + * @return ZVecHnswIndexParams* Created parameter pointer, needs to be released + * by calling free() + */ +ZVecHnswIndexParams *zvec_test_create_default_hnsw_params(void); + +/** + * @brief Create default Flat index parameters + * + * @return ZVecFlatIndexParams* Created parameter pointer, needs to be released + * by calling free() + */ +ZVecFlatIndexParams *zvec_test_create_default_flat_params(void); + +/** + * @brief Create default scalar index parameters + * + * @param enable_optimize Whether to enable optimization + * @return ZVecInvertIndexParams* Created parameter pointer, needs to be + * released by calling free() + */ +ZVecInvertIndexParams *zvec_test_create_default_invert_params( + bool enable_optimize); + +// ============================================================================= +// Field Schema Creation Helper Functions +// ============================================================================= + +/** + * @brief Create scalar field schema + * + * @param name Field name + * @param data_type Data type + * @param nullable Whether to allow null values + * @param invert_params Scalar index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer, needs to be released + * by calling free() + */ +ZVecFieldSchema *zvec_test_create_scalar_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecInvertIndexParams *invert_params); + +/** + * @brief Create vector field schema + * + * @param name Field name + * @param data_type Data type + * @param dimension Vector dimension + * @param nullable Whether to allow null values + * @param vector_index_params Vector index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer + */ +ZVecFieldSchema *zvec_test_create_vector_field( + const char *name, ZVecDataType data_type, uint32_t dimension, bool nullable, + const ZVecHnswIndexParams *vector_index_params); + +/** + * @brief Create sparse vector field schema + * + * @param name Field name + * @param data_type Data type + * @param nullable Whether to allow null values + * @param vector_index_params Vector index parameters (can be NULL) + * @return ZVecFieldSchema* Created field schema pointer + */ +ZVecFieldSchema *zvec_test_create_sparse_vector_field( + const char *name, ZVecDataType data_type, bool nullable, + const ZVecHnswIndexParams *vector_index_params); + +// ============================================================================= +// Memory Management Helper Functions +// ============================================================================= + +/** + * @brief Free field schema array + * + * @param fields Field array pointer + * @param count Number of fields + */ +void zvec_test_free_field_schemas(ZVecFieldSchema *fields, size_t count); + +/** + * @brief Free string array + * + * @param strings String array pointer + * @param count Number of strings + */ +void zvec_test_free_strings(char **strings, size_t count); + +/** + * @brief Delete directory and all its contents + * + * @param dir_path Directory path + * @return int 0 for success, -1 for failure + */ +int zvec_test_delete_dir(const char *dir_path); + +#ifdef __cplusplus +} +#endif + +#endif // ZVEC_TESTS_C_API_UTILS_H \ No newline at end of file