diff --git "a/Master RAG with LangChain A Practical Guide/RAG_using_LangChain_A_complete_Hands_On\302\240Tutorial_Blog_Code.ipynb" "b/Master RAG with LangChain A Practical Guide/RAG_using_LangChain_A_complete_Hands_On\302\240Tutorial_Blog_Code.ipynb" new file mode 100644 index 0000000..2461100 --- /dev/null +++ "b/Master RAG with LangChain A Practical Guide/RAG_using_LangChain_A_complete_Hands_On\302\240Tutorial_Blog_Code.ipynb" @@ -0,0 +1,1038 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "j8FUEDqezdn8", + "outputId": "fac20372-097e-4014-f7de-73c7f27ba513" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m308.5/308.5 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m320.6/320.6 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m122.8/122.8 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.0/53.0 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m6.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.5/142.5 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ], + "source": [ + "!pip install -qU langchain-openai" + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install -qU langchain" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jVDW0k_o3YVe", + "outputId": "2642377d-d783-4121-95ec-a5ad6a5e757b" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/973.5 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m204.8/973.5 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m573.4/973.5 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m \u001b[32m972.8/973.5 kB\u001b[0m \u001b[31m10.0 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m973.5/973.5 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install -qU langchain_chroma" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hLoFSyaM3nyx", + "outputId": "9c2ed32a-3d96-409f-9495-630a29db207d" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.8/526.8 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.0/92.0 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.8/60.8 kB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.3/41.3 kB\u001b[0m \u001b[31m5.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m38.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.1/60.1 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.1/106.1 kB\u001b[0m \u001b[31m15.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m283.7/283.7 kB\u001b[0m \u001b[31m29.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m52.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.6/67.6 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.6/53.6 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m37.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m47.2/47.2 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m341.4/341.4 kB\u001b[0m \u001b[31m37.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.4/3.4 MB\u001b[0m \u001b[31m62.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m69.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m130.2/130.2 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Building wheel for pypika (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "spacy 3.7.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\n", + "weasel 0.3.4 requires typer<0.10.0,>=0.3.0, but you have typer 0.12.3 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install -qU langchain_community" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0ktoJO-6344a", + "outputId": "091088be-a115-4183-ffc8-604349c4a8d0" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.3/49.3 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install langchainhub" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "2yPL-bwt4MXp", + "outputId": "21c3760b-604a-4e63-833a-18fbfc522dbf" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting langchainhub\n", + " Downloading langchainhub-0.1.16-py3-none-any.whl (4.8 kB)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchainhub) (2.31.0)\n", + "Collecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)\n", + " Downloading types_requests-2.32.0.20240523-py3-none-any.whl (15 kB)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchainhub) (2024.2.2)\n", + "Installing collected packages: types-requests, langchainhub\n", + "Successfully installed langchainhub-0.1.16 types-requests-2.32.0.20240523\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\")" + ], + "metadata": { + "id": "iXDmCsWr2xsf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "ee4c26a7-99fd-484a-a08a-5b9e114076f4" + }, + "execution_count": 6, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "··········\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import bs4\n", + "from langchain import hub\n", + "from langchain_chroma import Chroma\n", + "from langchain_community.document_loaders import WebBaseLoader\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "from langchain_openai import OpenAIEmbeddings\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "\n", + "# Load, chunk and index the contents of the blog.\n", + "loader = WebBaseLoader(\n", + " web_paths=(\"https://lilianweng.github.io/posts/2017-06-21-overview/\",),\n", + " bs_kwargs=dict(\n", + " parse_only=bs4.SoupStrainer(\n", + " class_=(\"post-content\", \"post-title\", \"post-header\")\n", + " )\n", + " ),\n", + ")\n", + "docs = loader.load()\n", + "\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", + "splits = text_splitter.split_documents(docs)\n", + "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n", + "\n", + "# Retrieve and generate using the relevant snippets of the blog.\n", + "retriever = vectorstore.as_retriever()\n", + "prompt = hub.pull(\"rlm/rag-prompt\")\n", + "\n", + "\n", + "def format_docs(docs):\n", + " return \"\\n\\n\".join(doc.page_content for doc in docs)\n", + "\n", + "\n", + "rag_chain = (\n", + " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")\n", + "\n", + "rag_chain.invoke(\"What is Convolutional Neural Networks?\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 53 + }, + "id": "QGfbGM3o24wV", + "outputId": "b5d58d10-84d5-492c-e3f3-dbfdc689a32a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'Convolutional Neural Networks, or CNNs, are a type of feed-forward artificial neural networks inspired by the visual cortex system. They are designed for tasks like edge detection and object recognition through feature extraction and processing. The convolutional and pooling layers in CNNs perform similar functions to the visual cortex units V1, V2, and V4.'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 7 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Using your own documents" + ], + "metadata": { + "id": "7D5KPIxkC6yZ" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install -q pypdf" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b_fEOzn93VRX", + "outputId": "4c55f277-5dcb-4565-f637-63c7156abd38" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/290.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m174.1/290.4 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m290.4/290.4 kB\u001b[0m \u001b[31m4.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install -q pymupdf" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9xWq5ZLpHJ9c", + "outputId": "4358a7a1-8558-4c0d-96c9-67c38c335072" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.5/3.5 MB\u001b[0m \u001b[31m15.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m15.8/15.8 MB\u001b[0m \u001b[31m50.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from langchain_community.document_loaders import PyMuPDFLoader\n", + "import bs4\n", + "from langchain import hub\n", + "from langchain_chroma import Chroma\n", + "from langchain_community.document_loaders import WebBaseLoader\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "from langchain_openai import OpenAIEmbeddings\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter" + ], + "metadata": { + "id": "shwhctHJG50x" + }, + "execution_count": 19, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def format_docs(docs):\n", + " return \"\\n\\n\".join(doc.page_content for doc in docs)\n" + ], + "metadata": { + "id": "ZX2v9y-TyIxJ" + }, + "execution_count": 23, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "file_path = '/content/d and f block.pdf'\n", + "loader = PyMuPDFLoader(file_path)\n", + "documents = loader.load()" + ], + "metadata": { + "id": "dJHX2p9BC9Y8" + }, + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(documents)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "dIK0XdMEF3OE", + "outputId": "467d9a35-ee50-4271-8c2a-7afde772f41e" + }, + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[Document(page_content='Portal for CBSE Notes, Test Papers, Sample Papers, Tips and Tricks \\nCBSE Class-12 Chemistry Quick Revision Notes \\nChapter-08: The D and F-Block Elements \\n \\n \\n• The d -Block elements: \\na) The elements lying in the middle of periodic table belonging to groups 3 to 12 are \\nknown as d – block elements. \\nb) Their general electronic configuration is (n – 1)d1-10 ns1-2 where (n – 1) stands for \\npenultimate (last but one) shell. \\n• Transition element: \\na) A transition element is defined as the one which has incompletely filled d orbitals in \\nits ground state or in any one of its oxidation states. \\nb) Zinc, cadmium, mercury are not regarded as transition metals due to completely \\nfilled d – orbital. \\n•\\nThe f-Block elements: \\nThe elements constituting the f -block are those in which the 4 f and 5 f orbitals are \\nprogressively filled in the latter two long periods. \\n•\\nLanthanoids: \\nThe 14 elements immediately following lanthanum, i.e., Cerium (58) to Lutetium (71) \\nare called lanthanoids. They belong to first inner transition series. Lanthanum (57) has \\nsimilar properties. Therefore, it is studied along with lanthanoids. \\n•\\nActinoids: \\nThe 14 elements immediately following actinium (89), with atomic numbers 90 \\n(Thorium) to 103 (Lawrencium) are called actinoids. They belong to second inner \\ntransition series. Actinium (89) has similar properties. Therefore, it is studied along with \\nactinoids. \\n•\\nFour transition series: \\na) 3d – transition series. The transition elements with atomic number 21(Sc) to 30(Zn) and \\nhaving incomplete 3d orbitals is called the first transition series. \\nb) 4d – transition series. It consists of elements with atomic number 39(Y) to 48 (Cd) and \\nhaving incomplete 4d orbitals. It is called second transition series. \\nc) 5d – transition series. It consists of elements with atomic number 57(La), 72(Hf) to \\n80(Hg) having incomplete 5d orbitals. It is called third transition series. \\nd) 6d – transition series. It consists of elements with atomic number 89(Ac), 104(Rf) to \\n112(Uub) having incomplete 6d orbitals. It is called fourth transition series. \\n•\\nGeneral Characteristics of transition elements: \\na) Metallic character: \\nAll transition elements are metallic in nature, i.e. they have strong metallic bonds. \\nThis is because of presence of unpaired electrons. This gives rise to properties like \\nhigh density, high enthalpies of atomization, and high melting and boiling points. \\nb) Atomic radii: \\n', metadata={'source': '/content/d and f block.pdf', 'file_path': '/content/d and f block.pdf', 'page': 0, 'total_pages': 4, 'format': 'PDF 1.4', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'author': 'Elpis', 'subject': '', 'keywords': '', 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'producer': 'GPL Ghostscript 9.05', 'creationDate': \"D:20141218110508+05'30'\", 'modDate': 'D:20171202153403', 'trapped': ''}), Document(page_content='Portal for CBSE Notes, Test Papers, Sample Papers, Tips and Tricks \\nThe atomic radii decrease from Sc to Cr because the effective nuclear charge \\nincreases. The atomic size of Fe, Co, Ni is almost same because the attraction due to \\nincrease in nuclear charge is cancelled by the repulsion because of increase in \\nshielding effect. Cu and Zn have bigger size because the shielding effect increases and \\nelectron electron repulsions repulsion increases. \\nc) Lanthanoid Contraction: \\nThe steady decrease in the atomic and ionic radii of the transition metals as the \\natomic number increases. This is because of filling of 4f orbitals before the 5d \\norbitals. This contraction is size is quite regular. This is called lanthanoid contraction. \\nIt is because of lanthanoid contraction that the atomic radii of the second row of \\ntransition elements are almost similar to those of the third row of transition \\nelements. \\nd) Ionisation enthalpy: \\nThere is slight and irregular variation in ionization energies of transition metals due \\nto irregular variation of atomic size. The I.E. of 5d transition series is higher than 3d \\nand 4d transition series because of Lanthanoid Contraction. \\ne) Oxidation state: \\nTransition metals show variable oxidation states due to tendency of (n-1)d as well as \\nns electrons to take part in bond formation. \\nf)\\n Magnetic properties: \\nMost of transition metals are paramagnetic in nature due to presence of unpaired \\nelectrons. It increase s from Sc to Cr and then decreases because number of unpaired \\nand then decrease because number of unpaired electrons increases from Sc to Cr and \\nthen decreases. \\ng) Catalytic properties: \\nMost of transition metals are used as catalyst because of (i) presence of incomplete \\nor empty d – orbitals, (ii) large surface area, (iii) varuable oxidation state, (iv) ability \\nto form complexes, e.g., Fe, Ni, V2O3, Pt, Mo, Co and used as catalyst. \\nh) Formation of coloured compounds: \\nThey form coloured ions due to presence of incompletely filled d – orbitals and \\nunpaired electrons, they can undergo d – d transition by absorbing colour from \\nvisible region and radiating complementary colour. \\ni)\\nFormation of complexes: \\nTransition metals form complexes due to (i) presence of vacant d – orbitals of \\nsuitable energy (ii) smaller size (iii) higher charge on cations. \\nj)\\nInterstitial compounds: \\nTransition metals have voids or interstitials in which C, H, N, B etc. can fit into \\nresulting in formation of interstitial compounds. They are non – stoichiometric, i.e., \\ntheir composition is not fixed, e.g., steel. They are harder and less malleable and \\nductile. \\nk) Alloys formation: \\n', metadata={'source': '/content/d and f block.pdf', 'file_path': '/content/d and f block.pdf', 'page': 1, 'total_pages': 4, 'format': 'PDF 1.4', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'author': 'Elpis', 'subject': '', 'keywords': '', 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'producer': 'GPL Ghostscript 9.05', 'creationDate': \"D:20141218110508+05'30'\", 'modDate': 'D:20171202153403', 'trapped': ''}), Document(page_content='Portal for CBSE Notes, Test Papers, Sample Papers, Tips and Tricks \\nThey form alloys due to similar ionic size. Metals can replace each other in crystal \\nlattice, e.g., brass, bronze, steel etc. \\n•\\nPreparation of Potassium dichromate (K2Cr2O7): \\nIt is prepared by fusion of chromate ore (FeCr2O4) with sodium carbonate in excess of \\nair. \\n2\\n4\\n2\\n3\\n2\\n2\\n4\\n2\\n3\\n2\\n2\\n4\\n2\\n4\\n2\\n2\\n7\\n2\\n2\\n4\\n Chromate\\n Dichromate\\n2\\n2\\n7\\n2\\n2\\n7\\n4\\n8\\n7\\n8\\n2\\n8\\n2\\n2\\n2\\nSodium\\nSodium\\nFeCr O\\nNa CO\\nO\\nNa CrO\\nFe O\\nCO\\nNa CrO\\nH SO\\nNa Cr O\\nH O\\nNa SO\\nNa Cr O\\nKCl\\nK Cr O\\nNaCl\\n+\\n+\\n→\\n+\\n+\\n+\\n→\\n+\\n+\\n+\\n→\\n+\\n \\n•\\nEffect of pH on chromate and dichromate ions: \\nThe chromates and dichromates are inter-convertible in aqueous solution depending \\nupon pH of the solution. The oxidation state of chromium in chromate and dichromate is \\nthe same. \\n2\\n2\\n4\\n2\\n7\\n2\\n2\\n2\\n2\\n7\\n4\\n2\\n2\\n2\\n2\\n2\\nCrO\\nH\\nCr O\\nH O\\nCr O\\nOH\\nCrO\\nH O\\n−\\n+\\n−\\n−\\n−\\n−\\n+\\n→\\n+\\n+\\n→\\n+\\n \\n• Potassium dichromate acts as a strong oxidizing agent in acidic medium: \\n2\\n3\\n2\\n7\\n2\\n14\\n6\\n2\\n7\\nCr O\\nH\\ne\\nCr\\nH O\\n−\\n+\\n−\\n+\\n+\\n+\\n→\\n+\\n \\n• Preparation of Potassium permanganate (KMnO4): \\na) Potassium permanganate is prepared by fusion of MnO4 with alkali metal hydroxide \\n(KOH) in presence of O2 or oxidising agent like KNO3. It produces dark green K2MnO4\\nwhich undergoes oxidation as well as reduction in neutral or acidic solution to give \\npermanganate. \\n2\\n2\\n2\\n4\\n2\\n2\\n4\\n4\\n2\\n2\\n2\\n4\\n2\\n2\\n4\\n3\\n2\\n2\\nMnO\\nKOH\\nO\\nK MnO\\nH O\\nH\\nMnO\\nMnO\\nMnO\\nH O\\n+\\n−\\n−\\n+\\n+\\n→\\n+\\n+\\n→\\n+\\n+\\n \\nb) Commercially, it is prepared by the alkaline oxidative fusion of MnO2 followed by the \\nelectrolytic oxidation of manganate (Vl). \\n2\\n3\\n with KOH in the presence of O or KNO\\n2\\n2\\n4 (\\n ions)\\nfused\\nMnO\\nMnO\\nmanganate\\n−\\n\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\n→\\n oxidation in alkaline medium\\n2\\n4\\n4 (\\n)\\nelectrolytic\\nGreen\\nMnO\\nMnO\\nPurple\\n−\\n−\\n\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\uf8e7\\n→\\n \\nc) In laboratory, Mn²+ salt can be oxidized by peroxodisulphate ion to permanganate \\nion. \\nIn acidic medium: \\n2\\n4\\n2\\n8\\n5\\n4\\nMnO\\nH\\ne\\nMn\\nH O\\n−\\n+\\n−\\n+\\n+\\n+\\n→\\n+\\n \\nIn neutral or faintly basic medium: \\n4\\n2\\n2\\n3\\n2\\n4\\nMnO\\ne\\nH O\\nMnO\\nOH\\n−\\n−\\n−\\n+\\n+\\n→\\n+\\n \\n•\\nProperties of Lanthanoids: \\na) +3 oxidation state is most common along with +2 and +4. \\nb) Except Promethium, they are non – radioactive. \\nc) The magnetic properties of lanthanoids are less complex than actinoids. \\n•\\nProperties of Actinoids: \\na) Actinoids also show higher oxidation states such as +4, +5, +6 and +7. \\nb) They are radioactive. \\n', metadata={'source': '/content/d and f block.pdf', 'file_path': '/content/d and f block.pdf', 'page': 2, 'total_pages': 4, 'format': 'PDF 1.4', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'author': 'Elpis', 'subject': '', 'keywords': '', 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'producer': 'GPL Ghostscript 9.05', 'creationDate': \"D:20141218110508+05'30'\", 'modDate': 'D:20171202153403', 'trapped': ''}), Document(page_content='Portal for CBSE Notes, Test Papers, Sample Papers, Tips and Tricks \\nc) The magnetic properties of the actinoids are more complex than those of the \\nlanthanoids. \\nd) They are more reactive. \\n• Mischmetall \\na) It is a well-known alloy which consists of a lanthanoid metal (~ 95%) and iron (~ \\n5%) and traces of S, C, Ca and Al. \\nb) A good deal of mischmetall is used in Mg-based alloy to produce bullets, shell and \\nlighter flint. \\n \\n', metadata={'source': '/content/d and f block.pdf', 'file_path': '/content/d and f block.pdf', 'page': 3, 'total_pages': 4, 'format': 'PDF 1.4', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'author': 'Elpis', 'subject': '', 'keywords': '', 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'producer': 'GPL Ghostscript 9.05', 'creationDate': \"D:20141218110508+05'30'\", 'modDate': 'D:20171202153403', 'trapped': ''})]\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(documents[0])" + ], + "metadata": { + "id": "WaC4WB4WHtVw", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "0efbde99-0012-4e29-ef0b-6729c92d07d6" + }, + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "page_content='Portal for CBSE Notes, Test Papers, Sample Papers, Tips and Tricks \\nCBSE Class-12 Chemistry Quick Revision Notes \\nChapter-08: The D and F-Block Elements \\n \\n \\n• The d -Block elements: \\na) The elements lying in the middle of periodic table belonging to groups 3 to 12 are \\nknown as d – block elements. \\nb) Their general electronic configuration is (n – 1)d1-10 ns1-2 where (n – 1) stands for \\npenultimate (last but one) shell. \\n• Transition element: \\na) A transition element is defined as the one which has incompletely filled d orbitals in \\nits ground state or in any one of its oxidation states. \\nb) Zinc, cadmium, mercury are not regarded as transition metals due to completely \\nfilled d – orbital. \\n•\\nThe f-Block elements: \\nThe elements constituting the f -block are those in which the 4 f and 5 f orbitals are \\nprogressively filled in the latter two long periods. \\n•\\nLanthanoids: \\nThe 14 elements immediately following lanthanum, i.e., Cerium (58) to Lutetium (71) \\nare called lanthanoids. They belong to first inner transition series. Lanthanum (57) has \\nsimilar properties. Therefore, it is studied along with lanthanoids. \\n•\\nActinoids: \\nThe 14 elements immediately following actinium (89), with atomic numbers 90 \\n(Thorium) to 103 (Lawrencium) are called actinoids. They belong to second inner \\ntransition series. Actinium (89) has similar properties. Therefore, it is studied along with \\nactinoids. \\n•\\nFour transition series: \\na) 3d – transition series. The transition elements with atomic number 21(Sc) to 30(Zn) and \\nhaving incomplete 3d orbitals is called the first transition series. \\nb) 4d – transition series. It consists of elements with atomic number 39(Y) to 48 (Cd) and \\nhaving incomplete 4d orbitals. It is called second transition series. \\nc) 5d – transition series. It consists of elements with atomic number 57(La), 72(Hf) to \\n80(Hg) having incomplete 5d orbitals. It is called third transition series. \\nd) 6d – transition series. It consists of elements with atomic number 89(Ac), 104(Rf) to \\n112(Uub) having incomplete 6d orbitals. It is called fourth transition series. \\n•\\nGeneral Characteristics of transition elements: \\na) Metallic character: \\nAll transition elements are metallic in nature, i.e. they have strong metallic bonds. \\nThis is because of presence of unpaired electrons. This gives rise to properties like \\nhigh density, high enthalpies of atomization, and high melting and boiling points. \\nb) Atomic radii: \\n' metadata={'source': '/content/d and f block.pdf', 'file_path': '/content/d and f block.pdf', 'page': 0, 'total_pages': 4, 'format': 'PDF 1.4', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'author': 'Elpis', 'subject': '', 'keywords': '', 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'producer': 'GPL Ghostscript 9.05', 'creationDate': \"D:20141218110508+05'30'\", 'modDate': 'D:20171202153403', 'trapped': ''}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)\n", + "docs = text_splitter.split_documents(documents)" + ], + "metadata": { + "id": "4gSbyfYdDWsV" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from langchain_openai import OpenAIEmbeddings" + ], + "metadata": { + "id": "HGe-ESmXD_68" + }, + "execution_count": 14, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "openai_api_key = os.environ[\"OPENAI_API_KEY\"]" + ], + "metadata": { + "id": "Rmf0fkQjEKNM" + }, + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "model_name = 'text-embedding-ada-002'\n", + "\n", + "embeddings = OpenAIEmbeddings(\n", + " model=model_name,\n", + " openai_api_key=openai_api_key\n", + ")" + ], + "metadata": { + "id": "qhF03tRjEDoM" + }, + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "vectorstore_2 = Chroma.from_documents(documents=docs, embedding=embeddings)" + ], + "metadata": { + "id": "8TrbYjmVESiz" + }, + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "retriever_2 = vectorstore_2.as_retriever(k=2)\n", + "prompt = hub.pull(\"rlm/rag-prompt\")" + ], + "metadata": { + "id": "5AR-6yKMEn-s" + }, + "execution_count": 21, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "rag_chain = (\n", + " {\"context\": retriever_2 | format_docs, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")\n", + "\n", + "rag_chain.invoke(\"What is d block\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 53 + }, + "id": "W8oYS1vKEygj", + "outputId": "26c0e654-21c6-41ac-ee62-7d971379a68d" + }, + "execution_count": 24, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'The d-block elements are those in groups 3 to 12 in the periodic table with general electronic configuration (n – 1)d1-10 ns1-2. Transition elements have incompletely filled d orbitals in their ground state or oxidation states. Lanthanoids and actinoids are inner transition series following lanthanum and actinium, respectively.'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 24 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from langchain_core.runnables import RunnableParallel" + ], + "metadata": { + "id": "6N5YoAU9yLo9" + }, + "execution_count": 25, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "rag_chain_from_docs = (\n", + " RunnablePassthrough.assign(context=(lambda x: format_docs(x[\"context\"])))\n", + " | prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")\n", + "\n", + "rag_chain_with_source = RunnableParallel(\n", + " {\"context\": retriever_2, \"question\": RunnablePassthrough()}\n", + ").assign(answer=rag_chain_from_docs)\n", + "\n", + "rag_chain_with_source.invoke(\"What is d-block?\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mz_8W1cDyNgg", + "outputId": "123c693e-c0cc-44eb-aef4-3d580e105f76" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'context': [Document(page_content='Portal for CBSE Notes, Test Papers, Sample Papers, Tips and Tricks \\nCBSE Class-12 Chemistry Quick Revision Notes \\nChapter-08: The D and F-Block Elements \\n \\n \\n• The d -Block elements: \\na) The elements lying in the middle of periodic table belonging to groups 3 to 12 are \\nknown as d – block elements. \\nb) Their general electronic configuration is (n – 1)d1-10 ns1-2 where (n – 1) stands for \\npenultimate (last but one) shell. \\n• Transition element: \\na) A transition element is defined as the one which has incompletely filled d orbitals in \\nits ground state or in any one of its oxidation states. \\nb) Zinc, cadmium, mercury are not regarded as transition metals due to completely \\nfilled d – orbital. \\n•\\nThe f-Block elements: \\nThe elements constituting the f -block are those in which the 4 f and 5 f orbitals are \\nprogressively filled in the latter two long periods. \\n•\\nLanthanoids: \\nThe 14 elements immediately following lanthanum, i.e., Cerium (58) to Lutetium (71)', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='c) 5d – transition series. It consists of elements with atomic number 57(La), 72(Hf) to \\n80(Hg) having incomplete 5d orbitals. It is called third transition series. \\nd) 6d – transition series. It consists of elements with atomic number 89(Ac), 104(Rf) to \\n112(Uub) having incomplete 6d orbitals. It is called fourth transition series. \\n•\\nGeneral Characteristics of transition elements: \\na) Metallic character: \\nAll transition elements are metallic in nature, i.e. they have strong metallic bonds. \\nThis is because of presence of unpaired electrons. This gives rise to properties like \\nhigh density, high enthalpies of atomization, and high melting and boiling points. \\nb) Atomic radii:', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='•\\nLanthanoids: \\nThe 14 elements immediately following lanthanum, i.e., Cerium (58) to Lutetium (71) \\nare called lanthanoids. They belong to first inner transition series. Lanthanum (57) has \\nsimilar properties. Therefore, it is studied along with lanthanoids. \\n•\\nActinoids: \\nThe 14 elements immediately following actinium (89), with atomic numbers 90 \\n(Thorium) to 103 (Lawrencium) are called actinoids. They belong to second inner \\ntransition series. Actinium (89) has similar properties. Therefore, it is studied along with \\nactinoids. \\n•\\nFour transition series: \\na) 3d – transition series. The transition elements with atomic number 21(Sc) to 30(Zn) and \\nhaving incomplete 3d orbitals is called the first transition series. \\nb) 4d – transition series. It consists of elements with atomic number 39(Y) to 48 (Cd) and \\nhaving incomplete 4d orbitals. It is called second transition series. \\nc) 5d – transition series. It consists of elements with atomic number 57(La), 72(Hf) to', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='transition elements are almost similar to those of the third row of transition \\nelements. \\nd) Ionisation enthalpy: \\nThere is slight and irregular variation in ionization energies of transition metals due \\nto irregular variation of atomic size. The I.E. of 5d transition series is higher than 3d \\nand 4d transition series because of Lanthanoid Contraction. \\ne) Oxidation state: \\nTransition metals show variable oxidation states due to tendency of (n-1)d as well as \\nns electrons to take part in bond formation. \\nf)\\n Magnetic properties: \\nMost of transition metals are paramagnetic in nature due to presence of unpaired \\nelectrons. It increase s from Sc to Cr and then decreases because number of unpaired \\nand then decrease because number of unpaired electrons increases from Sc to Cr and \\nthen decreases. \\ng) Catalytic properties: \\nMost of transition metals are used as catalyst because of (i) presence of incomplete', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 1, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''})],\n", + " 'question': 'What is d-block?',\n", + " 'answer': 'The d-block elements are those lying in the middle of the periodic table belonging to groups 3 to 12 with a general electronic configuration of (n – 1)d1-10 ns1-2. Transition elements are defined as having incompletely filled d orbitals in their ground state or oxidation states. Lanthanoids and actinoids are inner transition elements that follow lanthanum and actinium, respectively.'}" + ] + }, + "metadata": {}, + "execution_count": 27 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Refine Query (Memory Alternative)" + ], + "metadata": { + "id": "zODSNWj6IG6X" + } + }, + { + "cell_type": "code", + "source": [ + "retriever_2.invoke(\"Tell me more!\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hYVA5fQYIJ-X", + "outputId": "70a025d7-846d-4c90-a614-162e3d88e38c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[Document(page_content='However, simple perceptron neurons that linearly combine the current input element and the last unit state may easily lose the long-term dependencies. For example, we start a sentence with “Alice is working at …” and later after a whole paragraph, we want to start the next sentence with “She” or “He” correctly. If the model forgets the character’s name “Alice”, we can never know. To resolve the issue, researchers created a special neuron with a much more complicated internal structure for memorizing long-term context, named “Long-short term memory (LSTM)” cell. It is smart enough to learn for how long it should memorize the old information, when to forget, when to make use of the new data, and how to combine the old memory with new input. This introduction is so well written that I recommend everyone with interest in LSTM to read it. It has been officially promoted in the Tensorflow documentation ;-)', metadata={'source': 'https://lilianweng.github.io/posts/2017-06-21-overview/'}),\n", + " Document(page_content='An Overview of Deep Learning for Curious People\\n \\nDate: June 21, 2017 | Estimated Reading Time: 12 min | Author: Lilian Weng', metadata={'source': 'https://lilianweng.github.io/posts/2017-06-21-overview/'}),\n", + " Document(page_content='Fig. 1. A three-layer artificial neural network. (Image source: http://cs231n.github.io/convolutional-networks/#conv)\\nThe reason is surprisingly simple:\\n\\nWe have a lot more data.\\nWe have much powerful computers.', metadata={'source': 'https://lilianweng.github.io/posts/2017-06-21-overview/'}),\n", + " Document(page_content='Google Scholar: http://scholar.google.com\\narXiv cs section: https://arxiv.org/list/cs/recent\\nUnsupervised Feature Learning and Deep Learning Tutorial\\nTensorflow Tutorials\\nData Science Weekly\\nKDnuggets\\nTons of blog posts and online tutorials\\nRelated Cousera courses\\nawesome-deep-learning-papers\\n\\nBlog posts mentioned#\\n\\nExplained Visually: Image Kernels\\nUnderstanding LSTM Networks\\nThe Unreasonable Effectiveness of Recurrent Neural Networks\\nComputer, respond to this email.\\n\\nInteresting blogs worthy of checking#\\n\\nwww.wildml.com\\ncolah.github.io\\nkarpathy.github.io\\nblog.openai.com', metadata={'source': 'https://lilianweng.github.io/posts/2017-06-21-overview/'})]" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from langchain.prompts import ChatPromptTemplate" + ], + "metadata": { + "id": "N5aj5wgJJ71n" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from langchain_core.messages import AIMessage, HumanMessage\n", + "chat = ChatOpenAI(model=\"gpt-3.5-turbo-1106\")\n", + "\n", + "query_transform_prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " MessagesPlaceholder(variable_name=\"messages\"),\n", + " (\n", + " \"user\",\n", + " \"Given the above conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else.\",\n", + " ),\n", + " ]\n", + ")\n", + "\n", + "query_transformation_chain = query_transform_prompt | chat\n", + "\n", + "query_transformation_chain.invoke(\n", + " {\n", + " \"messages\": [\n", + " HumanMessage(content=\"What is d block?\"),\n", + " AIMessage(\n", + " content=\"The d-block elements are those in groups 3 to 12 of the periodic table with electronic configuration (n – 1)d1-10 ns1-2. Transition elements have incompletely filled d orbitals in their ground state or oxidation states. The f-block elements consist of those with 4f and 5f orbitals progressively filled in the latter long periods.\"\n", + " ),\n", + " HumanMessage(content=\"Tell me more about that!\"),\n", + " ],\n", + " }\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LDwb2a41Jcm3", + "outputId": "516c774d-653c-4d18-e75c-ebc0d5dff714" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "AIMessage(content='\"d-block elements properties and characteristics\"', response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 137, 'total_tokens': 144}, 'model_name': 'gpt-3.5-turbo-1106', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d230e697-ff6f-4c1d-ba9c-7ccc30f21e4c-0')" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.runnables import RunnableBranch\n", + "\n", + "query_transforming_retriever_chain = RunnableBranch(\n", + " (\n", + " lambda x: len(x.get(\"messages\", [])) == 1,\n", + " # If only one message, then we just pass that message's content to retriever\n", + " (lambda x: x[\"messages\"][-1].content) | retriever,\n", + " ),\n", + " # If messages, then we pass inputs to LLM chain to transform the query, then pass to retriever\n", + " query_transform_prompt | chat | StrOutputParser() | retriever,\n", + ").with_config(run_name=\"chat_retriever_chain\")" + ], + "metadata": { + "id": "G5FTV03vKdqG" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "SYSTEM_TEMPLATE = \"\"\"\n", + "Answer the user's questions based on the below context.\n", + "If the context doesn't contain any relevant information to the question, don't make something up and just say \"I don't know\":\n", + "\n", + "\n", + "{context}\n", + "\n", + "\"\"\"\n", + "\n", + "question_answering_prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " SYSTEM_TEMPLATE,\n", + " ),\n", + " MessagesPlaceholder(variable_name=\"messages\"),\n", + " ]\n", + ")\n", + "\n", + "document_chain = create_stuff_documents_chain(chat, question_answering_prompt)\n", + "\n", + "conversational_retrieval_chain = RunnablePassthrough.assign(\n", + " context=query_transforming_retriever_chain,\n", + ").assign(\n", + " answer=document_chain,\n", + ")" + ], + "metadata": { + "id": "2FlxnFH-LlXa" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "conversational_retrieval_chain.invoke(\n", + " {\n", + " \"messages\": [\n", + " HumanMessage(content=\"Can d block elements differ from f block?\"),\n", + " ]\n", + " }\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "w3IDGkpYLgHW", + "outputId": "c865697b-ec43-4af6-accd-2df37fee0199" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'messages': [HumanMessage(content='Can d block elements differ from f block?')],\n", + " 'context': [Document(page_content='Portal for CBSE Notes, Test Papers, Sample Papers, Tips and Tricks \\nCBSE Class-12 Chemistry Quick Revision Notes \\nChapter-08: The D and F-Block Elements \\n \\n \\n• The d -Block elements: \\na) The elements lying in the middle of periodic table belonging to groups 3 to 12 are \\nknown as d – block elements. \\nb) Their general electronic configuration is (n – 1)d1-10 ns1-2 where (n – 1) stands for \\npenultimate (last but one) shell. \\n• Transition element: \\na) A transition element is defined as the one which has incompletely filled d orbitals in \\nits ground state or in any one of its oxidation states. \\nb) Zinc, cadmium, mercury are not regarded as transition metals due to completely \\nfilled d – orbital. \\n•\\nThe f-Block elements: \\nThe elements constituting the f -block are those in which the 4 f and 5 f orbitals are \\nprogressively filled in the latter two long periods. \\n•\\nLanthanoids: \\nThe 14 elements immediately following lanthanum, i.e., Cerium (58) to Lutetium (71)', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='Portal for CBSE Notes, Test Papers, Sample Papers, Tips and Tricks \\nCBSE Class-12 Chemistry Quick Revision Notes \\nChapter-08: The D and F-Block Elements \\n \\n \\n• The d -Block elements: \\na) The elements lying in the middle of periodic table belonging to groups 3 to 12 are \\nknown as d – block elements. \\nb) Their general electronic configuration is (n – 1)d1-10 ns1-2 where (n – 1) stands for \\npenultimate (last but one) shell. \\n• Transition element: \\na) A transition element is defined as the one which has incompletely filled d orbitals in \\nits ground state or in any one of its oxidation states. \\nb) Zinc, cadmium, mercury are not regarded as transition metals due to completely \\nfilled d – orbital. \\n•\\nThe f-Block elements: \\nThe elements constituting the f -block are those in which the 4 f and 5 f orbitals are \\nprogressively filled in the latter two long periods. \\n•\\nLanthanoids: \\nThe 14 elements immediately following lanthanum, i.e., Cerium (58) to Lutetium (71)', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='transition elements are almost similar to those of the third row of transition \\nelements. \\nd) Ionisation enthalpy: \\nThere is slight and irregular variation in ionization energies of transition metals due \\nto irregular variation of atomic size. The I.E. of 5d transition series is higher than 3d \\nand 4d transition series because of Lanthanoid Contraction. \\ne) Oxidation state: \\nTransition metals show variable oxidation states due to tendency of (n-1)d as well as \\nns electrons to take part in bond formation. \\nf)\\n Magnetic properties: \\nMost of transition metals are paramagnetic in nature due to presence of unpaired \\nelectrons. It increase s from Sc to Cr and then decreases because number of unpaired \\nand then decrease because number of unpaired electrons increases from Sc to Cr and \\nthen decreases. \\ng) Catalytic properties: \\nMost of transition metals are used as catalyst because of (i) presence of incomplete', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 1, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='transition elements are almost similar to those of the third row of transition \\nelements. \\nd) Ionisation enthalpy: \\nThere is slight and irregular variation in ionization energies of transition metals due \\nto irregular variation of atomic size. The I.E. of 5d transition series is higher than 3d \\nand 4d transition series because of Lanthanoid Contraction. \\ne) Oxidation state: \\nTransition metals show variable oxidation states due to tendency of (n-1)d as well as \\nns electrons to take part in bond formation. \\nf)\\n Magnetic properties: \\nMost of transition metals are paramagnetic in nature due to presence of unpaired \\nelectrons. It increase s from Sc to Cr and then decreases because number of unpaired \\nand then decrease because number of unpaired electrons increases from Sc to Cr and \\nthen decreases. \\ng) Catalytic properties: \\nMost of transition metals are used as catalyst because of (i) presence of incomplete', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 1, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''})],\n", + " 'answer': 'Yes, d-block elements differ from f-block elements in terms of their electronic configurations and the orbitals they fill. The d-block elements have incompletely filled d orbitals in their ground state or in any one of their oxidation states, while the f-block elements have progressively filled 4f and 5f orbitals in the latter two long periods.'}" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ] + }, + { + "cell_type": "code", + "source": [ + "conversational_retrieval_chain.invoke(\n", + " {\n", + " \"messages\": [\n", + " HumanMessage(content=\"Can d block elements differ from f block?\"),\n", + " AIMessage(\n", + " content=\"Yes, d-block elements differ from f-block elements in terms of their electronic configurations and the orbitals they fill. The d-block elements have incompletely filled d orbitals in their ground state or in any one of their oxidation states, while the f-block elements have progressively filled 4f and 5f orbitals in the latter two long periods.\"\n", + " ),\n", + " HumanMessage(content=\"Tell me more about their difference!\"),\n", + " ],\n", + " }\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5SO1HImuMVYv", + "outputId": "4a858ed2-b039-46a6-e5f0-e988543cfeb5" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'messages': [HumanMessage(content='Can d block elements differ from f block?'),\n", + " AIMessage(content='Yes, d-block elements differ from f-block elements in terms of their electronic configurations and the orbitals they fill. The d-block elements have incompletely filled d orbitals in their ground state or in any one of their oxidation states, while the f-block elements have progressively filled 4f and 5f orbitals in the latter two long periods.'),\n", + " HumanMessage(content='Tell me more about their difference!')],\n", + " 'context': [Document(page_content='Portal for CBSE Notes, Test Papers, Sample Papers, Tips and Tricks \\nCBSE Class-12 Chemistry Quick Revision Notes \\nChapter-08: The D and F-Block Elements \\n \\n \\n• The d -Block elements: \\na) The elements lying in the middle of periodic table belonging to groups 3 to 12 are \\nknown as d – block elements. \\nb) Their general electronic configuration is (n – 1)d1-10 ns1-2 where (n – 1) stands for \\npenultimate (last but one) shell. \\n• Transition element: \\na) A transition element is defined as the one which has incompletely filled d orbitals in \\nits ground state or in any one of its oxidation states. \\nb) Zinc, cadmium, mercury are not regarded as transition metals due to completely \\nfilled d – orbital. \\n•\\nThe f-Block elements: \\nThe elements constituting the f -block are those in which the 4 f and 5 f orbitals are \\nprogressively filled in the latter two long periods. \\n•\\nLanthanoids: \\nThe 14 elements immediately following lanthanum, i.e., Cerium (58) to Lutetium (71)', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='Portal for CBSE Notes, Test Papers, Sample Papers, Tips and Tricks \\nCBSE Class-12 Chemistry Quick Revision Notes \\nChapter-08: The D and F-Block Elements \\n \\n \\n• The d -Block elements: \\na) The elements lying in the middle of periodic table belonging to groups 3 to 12 are \\nknown as d – block elements. \\nb) Their general electronic configuration is (n – 1)d1-10 ns1-2 where (n – 1) stands for \\npenultimate (last but one) shell. \\n• Transition element: \\na) A transition element is defined as the one which has incompletely filled d orbitals in \\nits ground state or in any one of its oxidation states. \\nb) Zinc, cadmium, mercury are not regarded as transition metals due to completely \\nfilled d – orbital. \\n•\\nThe f-Block elements: \\nThe elements constituting the f -block are those in which the 4 f and 5 f orbitals are \\nprogressively filled in the latter two long periods. \\n•\\nLanthanoids: \\nThe 14 elements immediately following lanthanum, i.e., Cerium (58) to Lutetium (71)', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='c) 5d – transition series. It consists of elements with atomic number 57(La), 72(Hf) to \\n80(Hg) having incomplete 5d orbitals. It is called third transition series. \\nd) 6d – transition series. It consists of elements with atomic number 89(Ac), 104(Rf) to \\n112(Uub) having incomplete 6d orbitals. It is called fourth transition series. \\n•\\nGeneral Characteristics of transition elements: \\na) Metallic character: \\nAll transition elements are metallic in nature, i.e. they have strong metallic bonds. \\nThis is because of presence of unpaired electrons. This gives rise to properties like \\nhigh density, high enthalpies of atomization, and high melting and boiling points. \\nb) Atomic radii:', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='c) 5d – transition series. It consists of elements with atomic number 57(La), 72(Hf) to \\n80(Hg) having incomplete 5d orbitals. It is called third transition series. \\nd) 6d – transition series. It consists of elements with atomic number 89(Ac), 104(Rf) to \\n112(Uub) having incomplete 6d orbitals. It is called fourth transition series. \\n•\\nGeneral Characteristics of transition elements: \\na) Metallic character: \\nAll transition elements are metallic in nature, i.e. they have strong metallic bonds. \\nThis is because of presence of unpaired electrons. This gives rise to properties like \\nhigh density, high enthalpies of atomization, and high melting and boiling points. \\nb) Atomic radii:', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''})],\n", + " 'answer': 'The d-block elements are known as transition elements and have general electronic configurations of (n – 1)d1-10 ns1-2, where (n – 1) stands for the penultimate (last but one) shell. These elements typically belong to groups 3 to 12 in the periodic table. On the other hand, the f-block elements consist of the lanthanoids and actinoids, where the 4f and 5f orbitals are progressively filled in the latter two long periods. Lanthanoids are the 14 elements immediately following lanthanum, from Cerium (58) to Lutetium (71), while actinoids are the 14 elements from actinium (89) to lawrencium (103). Additionally, the f-block elements also have 5d and 6d transition series with incomplete 5d and 6d orbitals, respectively.'}" + ] + }, + "metadata": {}, + "execution_count": 47 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Adding Memory" + ], + "metadata": { + "id": "_VVgNLo_T2QV" + } + }, + { + "cell_type": "code", + "source": [ + "from langchain.memory import ChatMessageHistory\n", + "from langchain.chains.combine_documents import create_stuff_documents_chain\n", + "from langchain.prompts import ChatPromptTemplate\n", + "from langchain.prompts.chat import MessagesPlaceholder" + ], + "metadata": { + "id": "Pg_Ak9xUT_Dd" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "chat = ChatOpenAI(model=\"gpt-3.5-turbo-1106\")\n", + "\n", + "question_answering_prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"Answer the user's questions based on the below context:\\n\\n{context}\",\n", + " ),\n", + " MessagesPlaceholder(variable_name=\"messages\"),\n", + " ]\n", + ")\n", + "\n", + "document_chain = create_stuff_documents_chain(chat, question_answering_prompt)" + ], + "metadata": { + "id": "cM719oR-L1GK" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from langchain.memory import ChatMessageHistory\n", + "\n", + "demo_ephemeral_chat_history = ChatMessageHistory()\n", + "\n", + "demo_ephemeral_chat_history.add_user_message(\"What is d block?\")\n", + "\n", + "document_chain.invoke(\n", + " {\n", + " \"messages\": demo_ephemeral_chat_history.messages,\n", + " \"context\": docs,\n", + " }\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + }, + "id": "-jNblq_PUD_T", + "outputId": "c75c406c-0506-4d07-e07a-e57860a9b866" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'The d-block elements refer to the elements in the middle of the periodic table, specifically belonging to groups 3 to 12. They have a general electronic configuration of (n – 1)d1-10 ns1-2, where (n – 1) stands for the penultimate (last but one) shell. These elements are also known as transition elements due to their incompletely filled d orbitals in their ground state or in any one of their oxidation states.'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 28 + } + ] + }, + { + "cell_type": "code", + "source": [ + "from typing import Dict\n", + "\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "\n", + "\n", + "def parse_retriever_input(params: Dict):\n", + " return params[\"messages\"][-1].content\n", + "\n", + "\n", + "retrieval_chain = RunnablePassthrough.assign(\n", + " context=parse_retriever_input | retriever,\n", + ").assign(\n", + " answer=document_chain,\n", + ")" + ], + "metadata": { + "id": "kTS0zAfFUS1M" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "response = retrieval_chain.invoke(\n", + " {\n", + " \"messages\": demo_ephemeral_chat_history.messages,\n", + " }\n", + ")\n", + "\n", + "response" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TanGNvh4Uadb", + "outputId": "4d31f382-6b48-4505-d4fc-b011f0a482be" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'messages': [HumanMessage(content='What is d block?')],\n", + " 'context': [Document(page_content='Portal for CBSE Notes, Test Papers, Sample Papers, Tips and Tricks \\nCBSE Class-12 Chemistry Quick Revision Notes \\nChapter-08: The D and F-Block Elements \\n \\n \\n• The d -Block elements: \\na) The elements lying in the middle of periodic table belonging to groups 3 to 12 are \\nknown as d – block elements. \\nb) Their general electronic configuration is (n – 1)d1-10 ns1-2 where (n – 1) stands for \\npenultimate (last but one) shell. \\n• Transition element: \\na) A transition element is defined as the one which has incompletely filled d orbitals in \\nits ground state or in any one of its oxidation states. \\nb) Zinc, cadmium, mercury are not regarded as transition metals due to completely \\nfilled d – orbital. \\n•\\nThe f-Block elements: \\nThe elements constituting the f -block are those in which the 4 f and 5 f orbitals are \\nprogressively filled in the latter two long periods. \\n•\\nLanthanoids: \\nThe 14 elements immediately following lanthanum, i.e., Cerium (58) to Lutetium (71)', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='c) 5d – transition series. It consists of elements with atomic number 57(La), 72(Hf) to \\n80(Hg) having incomplete 5d orbitals. It is called third transition series. \\nd) 6d – transition series. It consists of elements with atomic number 89(Ac), 104(Rf) to \\n112(Uub) having incomplete 6d orbitals. It is called fourth transition series. \\n•\\nGeneral Characteristics of transition elements: \\na) Metallic character: \\nAll transition elements are metallic in nature, i.e. they have strong metallic bonds. \\nThis is because of presence of unpaired electrons. This gives rise to properties like \\nhigh density, high enthalpies of atomization, and high melting and boiling points. \\nb) Atomic radii:', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='•\\nLanthanoids: \\nThe 14 elements immediately following lanthanum, i.e., Cerium (58) to Lutetium (71) \\nare called lanthanoids. They belong to first inner transition series. Lanthanum (57) has \\nsimilar properties. Therefore, it is studied along with lanthanoids. \\n•\\nActinoids: \\nThe 14 elements immediately following actinium (89), with atomic numbers 90 \\n(Thorium) to 103 (Lawrencium) are called actinoids. They belong to second inner \\ntransition series. Actinium (89) has similar properties. Therefore, it is studied along with \\nactinoids. \\n•\\nFour transition series: \\na) 3d – transition series. The transition elements with atomic number 21(Sc) to 30(Zn) and \\nhaving incomplete 3d orbitals is called the first transition series. \\nb) 4d – transition series. It consists of elements with atomic number 39(Y) to 48 (Cd) and \\nhaving incomplete 4d orbitals. It is called second transition series. \\nc) 5d – transition series. It consists of elements with atomic number 57(La), 72(Hf) to', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 0, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''}),\n", + " Document(page_content='transition elements are almost similar to those of the third row of transition \\nelements. \\nd) Ionisation enthalpy: \\nThere is slight and irregular variation in ionization energies of transition metals due \\nto irregular variation of atomic size. The I.E. of 5d transition series is higher than 3d \\nand 4d transition series because of Lanthanoid Contraction. \\ne) Oxidation state: \\nTransition metals show variable oxidation states due to tendency of (n-1)d as well as \\nns electrons to take part in bond formation. \\nf)\\n Magnetic properties: \\nMost of transition metals are paramagnetic in nature due to presence of unpaired \\nelectrons. It increase s from Sc to Cr and then decreases because number of unpaired \\nand then decrease because number of unpaired electrons increases from Sc to Cr and \\nthen decreases. \\ng) Catalytic properties: \\nMost of transition metals are used as catalyst because of (i) presence of incomplete', metadata={'author': 'Elpis', 'creationDate': \"D:20141218110508+05'30'\", 'creator': 'PDFCreator Version 1.5.0(Foxit Advanced PDF Editor)', 'file_path': '/content/d and f block.pdf', 'format': 'PDF 1.4', 'keywords': '', 'modDate': 'D:20171202153403', 'page': 1, 'producer': 'GPL Ghostscript 9.05', 'source': '/content/d and f block.pdf', 'subject': '', 'title': '12_chemistry_notes_ch08_the_dblock_f-block_elements', 'total_pages': 4, 'trapped': ''})],\n", + " 'answer': 'The d-block elements are the elements lying in the middle of the periodic table, belonging to groups 3 to 12. They are also known as transition elements. Their general electronic configuration is (n – 1)d1-10 ns1-2, where (n – 1) stands for the penultimate (last but one) shell. Transition elements are defined as ones that have incompletely filled d orbitals in their ground state or in any one of their oxidation states. This characteristic gives rise to their unique properties and behavior.'}" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ] + }, + { + "cell_type": "code", + "source": [ + "demo_ephemeral_chat_history.add_ai_message(response[\"answer\"])\n", + "\n", + "demo_ephemeral_chat_history.add_user_message(\"tell me more about that!\")\n", + "\n", + "retrieval_chain.invoke(\n", + " {\n", + " \"messages\": demo_ephemeral_chat_history.messages,\n", + " },\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hnFAwBUPUdgL", + "outputId": "088f0411-0a99-4bc6-e2a9-1ae2e57d990c" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'messages': [HumanMessage(content='What is d block?'),\n", + " AIMessage(content='The d-block elements are the elements lying in the middle of the periodic table, belonging to groups 3 to 12. They are also known as transition elements. Their general electronic configuration is (n – 1)d1-10 ns1-2, where (n – 1) stands for the penultimate (last but one) shell. Transition elements are defined as ones that have incompletely filled d orbitals in their ground state or in any one of their oxidation states. This characteristic gives rise to their unique properties and behavior.'),\n", + " HumanMessage(content='tell me more about that!')],\n", + " 'context': [Document(page_content='However, simple perceptron neurons that linearly combine the current input element and the last unit state may easily lose the long-term dependencies. For example, we start a sentence with “Alice is working at …” and later after a whole paragraph, we want to start the next sentence with “She” or “He” correctly. If the model forgets the character’s name “Alice”, we can never know. To resolve the issue, researchers created a special neuron with a much more complicated internal structure for memorizing long-term context, named “Long-short term memory (LSTM)” cell. It is smart enough to learn for how long it should memorize the old information, when to forget, when to make use of the new data, and how to combine the old memory with new input. This introduction is so well written that I recommend everyone with interest in LSTM to read it. It has been officially promoted in the Tensorflow documentation ;-)', metadata={'source': 'https://lilianweng.github.io/posts/2017-06-21-overview/'}),\n", + " Document(page_content='Meanwhile, many companies are spending resources on pushing the edges of AI applications, that indeed have the potential to change or even revolutionize how we are gonna live. Familiar examples include self-driving cars, chatbots, home assistant devices and many others. One of the secret receipts behind the progress we have had in recent years is deep learning.\\nWhy Does Deep Learning Work Now?#\\nDeep learning models, in simple words, are large and deep artificial neural nets. A neural network (“NN”) can be well presented in a directed acyclic graph: the input layer takes in signal vectors; one or multiple hidden layers process the outputs of the previous layer. The initial concept of a neural network can be traced back to more than half a century ago. But why does it work now? Why do people start talking about them all of a sudden?', metadata={'source': 'https://lilianweng.github.io/posts/2017-06-21-overview/'}),\n", + " Document(page_content='(The post was originated from my talk for WiMLDS x Fintech meetup hosted by Affirm.)\\nI believe many of you have watched or heard of the games between AlphaGo and professional Go player Lee Sedol in 2016. Lee has the highest rank of nine dan and many world championships. No doubt, he is one of the best Go players in the world, but he lost by 1-4 in this series versus AlphaGo. Before this, Go was considered to be an intractable game for computers to master, as its simple rules lay out an exponential number of variations in the board positions, many more than what in Chess. This event surely highlighted 2016 as a big year for AI. Because of AlphaGo, much attention has been attracted to the progress of AI.', metadata={'source': 'https://lilianweng.github.io/posts/2017-06-21-overview/'}),\n", + " Document(page_content='Fig. 1. A three-layer artificial neural network. (Image source: http://cs231n.github.io/convolutional-networks/#conv)\\nThe reason is surprisingly simple:\\n\\nWe have a lot more data.\\nWe have much powerful computers.', metadata={'source': 'https://lilianweng.github.io/posts/2017-06-21-overview/'})],\n", + " 'answer': 'Certainly! Transition elements, or d-block elements, have some distinctive characteristics due to the presence of incompletely filled d orbitals. Some of these properties include variable oxidation states, the formation of colored compounds, complex formation, catalytic activity, and magnetic behavior. \\n\\nTheir variable oxidation states allow them to form a wide variety of compounds with different stoichiometries and properties. The formation of colored compounds is often due to the d-d electronic transitions within the d orbitals. \\n\\nTransition elements also have the ability to form coordination complexes due to their ability to accept and donate electrons, leading to the formation of complex ions.\\n\\nAdditionally, many transition metals exhibit catalytic activity due to their ability to undergo redox reactions. This property makes them important in industrial processes and biological systems.\\n\\nFurthermore, some transition elements are magnetic, which is attributed to the presence of unpaired electrons in their d orbitals.\\n\\nOverall, the d-block elements exhibit a wide range of properties and play crucial roles in various industrial, biological, and environmental processes.'}" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "uvJa63vxUjv0" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file