From a407e48f5431670aef42cae45c8f85c866f79fea Mon Sep 17 00:00:00 2001 From: Disha Prakash Date: Tue, 17 Feb 2026 11:56:48 +0000 Subject: [PATCH] chore: Update vectorstore documentation --- docs/vector_store.ipynb | 92 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 5 deletions(-) diff --git a/docs/vector_store.ipynb b/docs/vector_store.ipynb index ddc5ce30..6420d40f 100644 --- a/docs/vector_store.ipynb +++ b/docs/vector_store.ipynb @@ -422,7 +422,40 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Delete texts" + "### Get document\n", + "\n", + "Get documents from the vectorstore using filters and parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "documents_with_apple = await store.aget(\n", + " where_document={\"$ilike\": \"%apple%\"}, include=\"documents\"\n", + ")\n", + "paginated_ids = await store.aget(limit=3, offset=3)\n", + "\n", + "print(documents_with_apple[\"documents\"])\n", + "print(paginated_ids[\"ids\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Delete documents\n", + "\n", + "Documents can be deleted using IDs or metadata filters." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Delete by IDs" ] }, { @@ -434,6 +467,46 @@ "await store.adelete([ids[1]])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Delete by metadata filter\n", + "You can delete documents based on metadata filters. This is useful for bulk deletion operations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete all documents with a specific metadata value\n", + "await store.adelete(filter={\"source\": \"documentation\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete documents matching complex filter criteria\n", + "await store.adelete(\n", + " filter={\"$and\": [{\"category\": \"obsolete\"}, {\"year\": {\"$lt\": 2020}}]}\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Delete by both IDs and filter (must match both criteria)\n", + "await store.adelete(ids=[\"id1\", \"id2\"], filter={\"status\": \"archived\"})" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -573,6 +646,15 @@ "### Search for documents with metadata filter" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### For v0.16.0+\n", + "\n", + "Metadata filtering on the `metadata_json_column` is now supported in the `AlloyDBVectorStore`." + ] + }, { "cell_type": "code", "execution_count": null, @@ -592,9 +674,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### For v0.15.0+\n", - "\n", - "**Important Update:** Support for string filters has been deprecated. Please use dictionaries to add filters." + "**Important Update:** From v0.15.0, support for string filters has been deprecated. Please use dictionaries to add filters." ] }, { @@ -722,7 +802,9 @@ "\n", "- **`metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"]`**: These columns are treated as metadata for each product. Metadata provides additional information about a product, such as its name, category, price, quantity available, SKU (Stock Keeping Unit), and an image URL. This information is useful for displaying product details in search results or for filtering and categorization.\n", "\n", - "- **`metadata_json_column=\"metadata\"`**: The `metadata` column can store any additional information about the products in a flexible JSON format. This allows for storing varied and complex data that doesn't fit into the standard columns.\n" + "- **`metadata_json_column=\"metadata\"`**: The `metadata` column can store any additional information about the products in a flexible JSON format. This allows for storing varied and complex data that doesn't fit into the standard columns.\n", + "Note that filtering on fields within the JSON but not in `metadata_columns` will be less efficient.\n", + "\n" ] }, {