From a94dfad0cc4fe55f7599f02cd039a10aac3a8190 Mon Sep 17 00:00:00 2001 From: "Apoorv Verma [AP]" Date: Fri, 13 Dec 2019 14:40:46 -0800 Subject: [PATCH 1/2] Initial run through on MLMD-Cases notebook --- sdk/python/sample/MLMD-Cases.ipynb | 404 +++++++++++++++++++++++++++++ 1 file changed, 404 insertions(+) create mode 100644 sdk/python/sample/MLMD-Cases.ipynb diff --git a/sdk/python/sample/MLMD-Cases.ipynb b/sdk/python/sample/MLMD-Cases.ipynb new file mode 100644 index 000000000..fdb9856c1 --- /dev/null +++ b/sdk/python/sample/MLMD-Cases.ipynb @@ -0,0 +1,404 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "outputs": [], + "source": [ + "### Install the _Kubeflow-metadata_ library (Load prereqs)\n", + "_**Note:** Make sure to have run:_\n", + "\n", + "```bash\n", + "kubectl port-forward --namespace kubeflow $(kubectl get pod --namespace kubeflow --selector=\"component=grpc-server,kustomize.component=metadata\" --output jsonpath='{.items[0].metadata.name}') 8080:8080\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "Requirement already satisfied: kubeflow-metadata in d:\\ap-langs\\anaconda3\\lib\\site-packages (0.3.0)\nRequirement already satisfied: ml-metadata==0.15.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from kubeflow-metadata) (0.15.0)\nRequirement already satisfied: retrying in d:\\ap-langs\\anaconda3\\lib\\site-packages (from kubeflow-metadata) (1.3.3)\nRequirement already satisfied: absl-py<1,>=0.7 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from ml-metadata==0.15.0->kubeflow-metadata) (0.8.1)\nRequirement already satisfied: six<2,>=1.10 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from ml-metadata==0.15.0->kubeflow-metadata) (1.13.0)\nRequirement already satisfied: protobuf<4,>=3.7 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from ml-metadata==0.15.0->kubeflow-metadata) (3.11.1)\nRequirement already satisfied: tensorflow<3,>=1.15 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from ml-metadata==0.15.0->kubeflow-metadata) (2.0.0)\nRequirement already satisfied: setuptools in d:\\ap-langs\\anaconda3\\lib\\site-packages (from protobuf<4,>=3.7->ml-metadata==0.15.0->kubeflow-metadata) (42.0.1.post20191125)\nRequirement already satisfied: astor>=0.6.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.8.1)\nRequirement already satisfied: numpy<2.0,>=1.16.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.17.4)\nRequirement already satisfied: gast==0.2.2 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.2.2)\nRequirement already satisfied: keras-preprocessing>=1.0.5 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.1.0)\nRequirement already satisfied: keras-applications>=1.0.8 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.0.8)\nRequirement already satisfied: tensorflow-estimator<2.1.0,>=2.0.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2.0.1)\nRequirement already satisfied: grpcio>=1.8.6 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.25.0)\nRequirement already satisfied: google-pasta>=0.1.6 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.1.8)\nRequirement already satisfied: wrapt>=1.11.1 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.11.2)\nRequirement already satisfied: wheel>=0.26 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.33.6)\nRequirement already satisfied: opt-einsum>=2.3.2 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (3.1.0)\nRequirement already satisfied: tensorboard<2.1.0,>=2.0.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2.0.2)\nRequirement already satisfied: termcolor>=1.1.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.1.0)\nRequirement already satisfied: h5py in d:\\ap-langs\\anaconda3\\lib\\site-packages (from keras-applications>=1.0.8->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2.9.0)\nRequirement already satisfied: google-auth<2,>=1.6.3 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.8.1)\nRequirement already satisfied: markdown>=2.6.8 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (3.1.1)\nRequirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.4.1)\nRequirement already satisfied: werkzeug>=0.11.15 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.16.0)\nRequirement already satisfied: requests<3,>=2.21.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2.22.0)\nRequirement already satisfied: pyasn1-modules>=0.2.1 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.2.7)\nRequirement already satisfied: cachetools<3.2,>=2.0.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (3.1.1)\nRequirement already satisfied: rsa<4.1,>=3.1.4 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (4.0)\nRequirement already satisfied: requests-oauthlib>=0.7.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.3.0)\nRequirement already satisfied: idna<2.9,>=2.5 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2.8)\nRequirement already satisfied: certifi>=2017.4.17 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2019.11.28)\nRequirement already satisfied: chardet<3.1.0,>=3.0.2 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (3.0.4)\nRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.24.2)\nRequirement already satisfied: pyasn1<0.5.0,>=0.4.6 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.4.8)\nRequirement already satisfied: oauthlib>=3.0.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (3.1.0)\nRequirement already satisfied: pandas in d:\\ap-langs\\anaconda3\\lib\\site-packages (0.25.3)\nRequirement already satisfied: python-dateutil>=2.6.1 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from pandas) (2.8.1)\nRequirement already satisfied: pytz>=2017.2 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from pandas) (2019.3)\nRequirement already satisfied: numpy>=1.13.3 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from pandas) (1.17.4)\nRequirement already satisfied: six>=1.5 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from python-dateutil>=2.6.1->pandas) (1.13.0)\n" + } + ], + "source": [ + "# To use the latest publish `kubeflow-metadata` library, you can run:\n", + "!pip install kubeflow-metadata --user\n", + "# Install other packages:\n", + "!pip install pandas --user\n", + "# Then restart the Notebook kernel." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas\n", + "from kubeflow.metadata import metadata\n", + "from datetime import datetime\n", + "from uuid import uuid4\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "outputs": [], + "source": [ + "### Load all test cases\n", + "- All 5 columns\n", + "- 7 Columns worth of data\n", + "- Partials:\n", + " - Active Execution\n", + " - 3 columns\n", + " - 4 columns\n", + "- Multiples\n", + " - Multi-Input\n", + " - Multi-Output\n", + " - Multi-Execution" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "cases = [\n", + " \"All 5 columns\",\n", + " \"7 Columns worth of data\",\n", + " \"Active Execution\",\n", + " \"3 columns\",\n", + " \"4 columns\"\n", + "]\n", + "\n", + "ws = [\n", + " metadata.Workspace(\n", + " store=metadata.Store(grpc_host=\"localhost\", grpc_port=8080),\n", + " name=\"test_case_{}\".format(i),\n", + " description=x,\n", + " labels={\"n1\": \"v1\"}) for i, x in enumerate(cases)]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "runs = [\n", + " metadata.Run(\n", + " workspace=w,\n", + " name=\"run-\" + datetime.utcnow().isoformat(\"T\") ,\n", + " description=\"a run in ws_{}\".format(i),\n", + " ) for i, w in enumerate(ws)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "execs = [\n", + " metadata.Execution(\n", + " name = \"execution-\" + datetime.utcnow().isoformat(\"T\") ,\n", + " workspace=w,\n", + " run=runs[i],\n", + " description=cases[i],\n", + " ) for i, w in enumerate(ws)]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "Created executions: [14, 15, 16, 17, 18]\n" + } + ], + "source": [ + "print('Created executions:', list(map(lambda x: x.id, execs)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "outputs": [], + "source": [ + "### Let's create fake data sources, that can be shared by our sources" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "Data sets: [\"{id: 19, version: 'data_set_version_33726879-8232-4a52-84a5-fe3bb110a63c'}\", \"{id: 20, version: 'data_set_version_f679ee55-55ef-4f66-a1e6-e9d59ea270fe'}\"]\nData sets: [\"{id: 19, version: 'data_set_version_33726879-8232-4a52-84a5-fe3bb110a63c'}\", \"{id: 20, version: 'data_set_version_f679ee55-55ef-4f66-a1e6-e9d59ea270fe'}\"]\nData sets: [\"{id: 19, version: 'data_set_version_33726879-8232-4a52-84a5-fe3bb110a63c'}\", \"{id: 20, version: 'data_set_version_f679ee55-55ef-4f66-a1e6-e9d59ea270fe'}\"]\nData sets: [\"{id: 19, version: 'data_set_version_33726879-8232-4a52-84a5-fe3bb110a63c'}\", \"{id: 20, version: 'data_set_version_f679ee55-55ef-4f66-a1e6-e9d59ea270fe'}\"]\nData sets: [\"{id: 19, version: 'data_set_version_33726879-8232-4a52-84a5-fe3bb110a63c'}\"]\n" + } + ], + "source": [ + "get_date_set_version = lambda: \"data_set_version_\" + str(uuid4())\n", + "fileSources = [\n", + " metadata.DataSet(\n", + " description=\"Sample file set 1\",\n", + " name=\"table-dump\",\n", + " owner=\"ap@kubeflow.org\",\n", + " uri=\"file://datasets/dump1\",\n", + " version=get_date_set_version(),\n", + " query=\"SELECT * FROM mytable\"),\n", + " metadata.DataSet(\n", + " description=\"Sample file set 2\",\n", + " name=\"cloud-table\",\n", + " owner=\"ap@kubeflow.org\",\n", + " uri=\"gs://cloud/table.csv\",\n", + " version=get_date_set_version(),\n", + " query=\"SELECT * FROM mytable\"),\n", + "]\n", + "\n", + "how_many_sources = np.random.choice(len(fileSources), len(execs))\n", + "data_sets = []\n", + "\n", + "for i, src_count in enumerate(how_many_sources):\n", + " exec = execs[i]\n", + " ds = fileSources[0:src_count+1]\n", + " ds = list(map(lambda x: exec.log_input(x), ds))\n", + " print(\"Data sets:\", [\"{{id: {0.id}, version: '{0.version}'}}\".format(d) for d in ds])\n", + " data_sets.append(ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "outputs": [], + "source": [ + "### Log a model" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "Models: [\"{id: 21, version: 'model_version_0'}\"]\nModels: [\"{id: 23, version: 'model_version_1'}\"]\nModels: [\"{id: 24, version: 'model_version_2'}\"]\nModels: [\"{id: 25, version: 'model_version_3'}\", \"{id: 26, version: 'model_version_3'}\"]\nModels: [\"{id: 27, version: 'model_version_4'}\"]\n" + } + ], + "source": [ + "models = []\n", + "for i, exec in enumerate(execs):\n", + " model_version = \"model_version_{}\".format(i)\n", + " l = []\n", + " l.append(exec.log_output(\n", + " metadata.Model(\n", + " name=\"MNIST\",\n", + " description=\"model to recognize handwritten digits\",\n", + " owner=\"someone@kubeflow.org\",\n", + " uri=\"gcs://my-bucket/mnist\",\n", + " model_type=\"neural network\",\n", + " training_framework={\n", + " \"name\": \"tensorflow\",\n", + " \"version\": \"v1.0\"\n", + " },\n", + " hyperparameters={\n", + " \"learning_rate\": 0.5,\n", + " \"layers\": [10, 3, 1],\n", + " \"early_stop\": True\n", + " },\n", + " version=model_version,\n", + " labels={\"mylabel\": \"l1\"})))\n", + " if np.random.choice(2, 1, p=[.6, .4]) == 1:\n", + " l.append(exec.log_output(\n", + " metadata.Model(\n", + " name=\"SVHN\",\n", + " description=\"model to recognize house numbers on map images\",\n", + " owner=\"ap@kubeflow.org\",\n", + " uri=\"gcs://my-bucket/svhn\",\n", + " model_type=\"neural network\",\n", + " training_framework={\n", + " \"name\": \"pytorch\",\n", + " \"version\": \"v1.0\"\n", + " },\n", + " hyperparameters={\n", + " \"learning_rate\": 0.0001,\n", + " \"layers\": [10, 3, 1],\n", + " \"early_stop\": True\n", + " },\n", + " version=model_version,\n", + " labels={\"mylabel\": \"l1\"})))\n", + " print(\"Models:\", [\"{{id: {0.id}, version: '{0.version}'}}\".format(d) for d in l])\n", + " models.append(l)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "outputs": [], + "source": [ + "### Log an evaluation of a model" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "Metrics created!\n" + } + ], + "source": [ + "for i, exec in enumerate(execs):\n", + " for model in models[i]:\n", + " for data_set in data_sets[i]:\n", + " metrics = exec.log_output(\n", + " metadata.Metrics(\n", + " name=\"{}-evaluation\".format(model.name),\n", + " description=\"validating the {0.name} model to {0.description}\".format(model),\n", + " owner=model.owner,\n", + " uri=\"gcs://my-bucket/{}-eval.csv\".format(model.name.lower()),\n", + " data_set_id=str(data_set.id),\n", + " model_id=str(model.id),\n", + " metrics_type=metadata.Metrics.VALIDATION,\n", + " values={\"accuracy\": np.random.uniform(low=.6)},\n", + " labels={\"mylabel\": \"l1\"}))\n", + "print('Metrics created!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "outputs": [], + "source": [ + "### Add Metadata for serving the model" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": "Found the mode with id 21 and version 'model_version_0'.\nFound the mode with id 23 and version 'model_version_1'.\nFound the mode with id 24 and version 'model_version_2'.\nFound the mode with id 25 and version 'model_version_3'.\nFound the mode with id 25 and version 'model_version_3'.\nFound the mode with id 27 and version 'model_version_4'.\n" + } + ], + "source": [ + "for i, w in enumerate(ws):\n", + " serving_application = metadata.Execution(\n", + " name=\"serving model\",\n", + " workspace=w,\n", + " description=\"an execution to represent model serving component\",\n", + " )\n", + " for model in models[i]:\n", + " # Noticed we use model name, version, uri to uniquely identify existing model.\n", + " served_model = metadata.Model(\n", + " name=\"MNIST\",\n", + " uri=\"gcs://my-bucket/mnist\",\n", + " version=model.version,\n", + " )\n", + " m=serving_application.log_input(served_model)\n", + " print(\"Found the mode with id {0.id} and version '{0.version}'.\".format(m))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "outputs": [], + "source": [ + "### List all models in the workspace" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idworkspacerunversionownerdescriptionnamemodel_typecreate_timeuritraining_frameworkhyperparameterslabelskwargs
023test_case_1run-2019-12-13T22:18:15.220690model_version_1someone@kubeflow.orgmodel to recognize handwritten digitsMNISTneural network2019-12-13T22:19:08.908693Zgcs://my-bucket/mnist{'name': 'tensorflow', 'version': 'v1.0'}{'learning_rate': 0.5, 'layers': [10, 3, 1], '...{'mylabel': 'l1'}{}
\n
", + "text/plain": " id workspace run version \\\n0 23 test_case_1 run-2019-12-13T22:18:15.220690 model_version_1 \n\n owner description name \\\n0 someone@kubeflow.org model to recognize handwritten digits MNIST \n\n model_type create_time uri \\\n0 neural network 2019-12-13T22:19:08.908693Z gcs://my-bucket/mnist \n\n training_framework \\\n0 {'name': 'tensorflow', 'version': 'v1.0'} \n\n hyperparameters labels kwargs \n0 {'learning_rate': 0.5, 'layers': [10, 3, 1], '... {'mylabel': 'l1'} {} " + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pandas.DataFrame.from_dict(ws[1].list(metadata.Model.ARTIFACT_TYPE_NAME))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "outputs": [], + "source": [ + "### Basic Lineage Tracking" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "# print(\"model id is %s\\n\" % model.id)\n", + " \n", + "# model_events = ws1.store.get_events_by_artifact_ids([model.id])\n", + "\n", + "# execution_ids = set(e.execution_id for e in model_events)\n", + "# print(\"All executions related to the model are {}\".format(execution_ids))\n", + "# # assert execution_ids == set([serving_application.id, exec.id])\n", + "\n", + "# trainer_events = ws1.store.get_events_by_execution_ids([exec.id])\n", + "# artifact_ids = set(e.artifact_id for e in trainer_events)\n", + "# print(\"All artifacts related to the training event are {}\".format(artifact_ids))# assert artifact_ids == set([model.id, metrics.id, data_set.id])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file From 95eff7f8d8a5f958acb28ac6e28e595553e31c38 Mon Sep 17 00:00:00 2001 From: "Apoorv Verma [AP]" Date: Fri, 20 Dec 2019 16:22:14 -0500 Subject: [PATCH 2/2] Updated to create 7 columns for index 2 and 3. Seems to not work --- sdk/python/sample/MLMD-Cases.ipynb | 100 ++++++++++++++++++----------- 1 file changed, 61 insertions(+), 39 deletions(-) diff --git a/sdk/python/sample/MLMD-Cases.ipynb b/sdk/python/sample/MLMD-Cases.ipynb index fdb9856c1..69ee6856a 100644 --- a/sdk/python/sample/MLMD-Cases.ipynb +++ b/sdk/python/sample/MLMD-Cases.ipynb @@ -15,15 +15,8 @@ }, { "cell_type": "code", - "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": "Requirement already satisfied: kubeflow-metadata in d:\\ap-langs\\anaconda3\\lib\\site-packages (0.3.0)\nRequirement already satisfied: ml-metadata==0.15.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from kubeflow-metadata) (0.15.0)\nRequirement already satisfied: retrying in d:\\ap-langs\\anaconda3\\lib\\site-packages (from kubeflow-metadata) (1.3.3)\nRequirement already satisfied: absl-py<1,>=0.7 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from ml-metadata==0.15.0->kubeflow-metadata) (0.8.1)\nRequirement already satisfied: six<2,>=1.10 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from ml-metadata==0.15.0->kubeflow-metadata) (1.13.0)\nRequirement already satisfied: protobuf<4,>=3.7 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from ml-metadata==0.15.0->kubeflow-metadata) (3.11.1)\nRequirement already satisfied: tensorflow<3,>=1.15 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from ml-metadata==0.15.0->kubeflow-metadata) (2.0.0)\nRequirement already satisfied: setuptools in d:\\ap-langs\\anaconda3\\lib\\site-packages (from protobuf<4,>=3.7->ml-metadata==0.15.0->kubeflow-metadata) (42.0.1.post20191125)\nRequirement already satisfied: astor>=0.6.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.8.1)\nRequirement already satisfied: numpy<2.0,>=1.16.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.17.4)\nRequirement already satisfied: gast==0.2.2 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.2.2)\nRequirement already satisfied: keras-preprocessing>=1.0.5 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.1.0)\nRequirement already satisfied: keras-applications>=1.0.8 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.0.8)\nRequirement already satisfied: tensorflow-estimator<2.1.0,>=2.0.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2.0.1)\nRequirement already satisfied: grpcio>=1.8.6 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.25.0)\nRequirement already satisfied: google-pasta>=0.1.6 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.1.8)\nRequirement already satisfied: wrapt>=1.11.1 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.11.2)\nRequirement already satisfied: wheel>=0.26 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.33.6)\nRequirement already satisfied: opt-einsum>=2.3.2 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (3.1.0)\nRequirement already satisfied: tensorboard<2.1.0,>=2.0.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2.0.2)\nRequirement already satisfied: termcolor>=1.1.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.1.0)\nRequirement already satisfied: h5py in d:\\ap-langs\\anaconda3\\lib\\site-packages (from keras-applications>=1.0.8->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2.9.0)\nRequirement already satisfied: google-auth<2,>=1.6.3 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.8.1)\nRequirement already satisfied: markdown>=2.6.8 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (3.1.1)\nRequirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.4.1)\nRequirement already satisfied: werkzeug>=0.11.15 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.16.0)\nRequirement already satisfied: requests<3,>=2.21.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2.22.0)\nRequirement already satisfied: pyasn1-modules>=0.2.1 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.2.7)\nRequirement already satisfied: cachetools<3.2,>=2.0.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (3.1.1)\nRequirement already satisfied: rsa<4.1,>=3.1.4 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (4.0)\nRequirement already satisfied: requests-oauthlib>=0.7.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.3.0)\nRequirement already satisfied: idna<2.9,>=2.5 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2.8)\nRequirement already satisfied: certifi>=2017.4.17 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (2019.11.28)\nRequirement already satisfied: chardet<3.1.0,>=3.0.2 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (3.0.4)\nRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (1.24.2)\nRequirement already satisfied: pyasn1<0.5.0,>=0.4.6 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (0.4.8)\nRequirement already satisfied: oauthlib>=3.0.0 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard<2.1.0,>=2.0.0->tensorflow<3,>=1.15->ml-metadata==0.15.0->kubeflow-metadata) (3.1.0)\nRequirement already satisfied: pandas in d:\\ap-langs\\anaconda3\\lib\\site-packages (0.25.3)\nRequirement already satisfied: python-dateutil>=2.6.1 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from pandas) (2.8.1)\nRequirement already satisfied: pytz>=2017.2 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from pandas) (2019.3)\nRequirement already satisfied: numpy>=1.13.3 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from pandas) (1.17.4)\nRequirement already satisfied: six>=1.5 in d:\\ap-langs\\anaconda3\\lib\\site-packages (from python-dateutil>=2.6.1->pandas) (1.13.0)\n" - } - ], + "outputs": [], "source": [ "# To use the latest publish `kubeflow-metadata` library, you can run:\n", "!pip install kubeflow-metadata --user\n", @@ -65,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -87,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -102,7 +95,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -117,13 +110,13 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", - "text": "Created executions: [14, 15, 16, 17, 18]\n" + "text": "Created executions: [61, 62, 63, 64, 65]\n" } ], "source": [ @@ -140,13 +133,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", - "text": "Data sets: [\"{id: 19, version: 'data_set_version_33726879-8232-4a52-84a5-fe3bb110a63c'}\", \"{id: 20, version: 'data_set_version_f679ee55-55ef-4f66-a1e6-e9d59ea270fe'}\"]\nData sets: [\"{id: 19, version: 'data_set_version_33726879-8232-4a52-84a5-fe3bb110a63c'}\", \"{id: 20, version: 'data_set_version_f679ee55-55ef-4f66-a1e6-e9d59ea270fe'}\"]\nData sets: [\"{id: 19, version: 'data_set_version_33726879-8232-4a52-84a5-fe3bb110a63c'}\", \"{id: 20, version: 'data_set_version_f679ee55-55ef-4f66-a1e6-e9d59ea270fe'}\"]\nData sets: [\"{id: 19, version: 'data_set_version_33726879-8232-4a52-84a5-fe3bb110a63c'}\", \"{id: 20, version: 'data_set_version_f679ee55-55ef-4f66-a1e6-e9d59ea270fe'}\"]\nData sets: [\"{id: 19, version: 'data_set_version_33726879-8232-4a52-84a5-fe3bb110a63c'}\"]\n" + "text": "Data sets: [\"{id: 95, version: 'data_set_version_980a618d-0b0f-431a-b58d-d31e6f792ba7'}\"]\nData sets: [\"{id: 95, version: 'data_set_version_980a618d-0b0f-431a-b58d-d31e6f792ba7'}\"]\nData sets: [\"{id: 95, version: 'data_set_version_980a618d-0b0f-431a-b58d-d31e6f792ba7'}\", \"{id: 96, version: 'data_set_version_7cbfcb57-101f-4a7b-9d37-72547e7314ca'}\"]\nData sets: [\"{id: 95, version: 'data_set_version_980a618d-0b0f-431a-b58d-d31e6f792ba7'}\"]\nData sets: [\"{id: 95, version: 'data_set_version_980a618d-0b0f-431a-b58d-d31e6f792ba7'}\", \"{id: 96, version: 'data_set_version_7cbfcb57-101f-4a7b-9d37-72547e7314ca'}\"]\n" } ], "source": [ @@ -189,13 +182,13 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", - "text": "Models: [\"{id: 21, version: 'model_version_0'}\"]\nModels: [\"{id: 23, version: 'model_version_1'}\"]\nModels: [\"{id: 24, version: 'model_version_2'}\"]\nModels: [\"{id: 25, version: 'model_version_3'}\", \"{id: 26, version: 'model_version_3'}\"]\nModels: [\"{id: 27, version: 'model_version_4'}\"]\n" + "text": "Models: [\"{id: 11, version: 'model_version_0'}\"]\nModels: [\"{id: 12, version: 'model_version_1'}\", \"{id: 30, version: 'model_version_1'}\"]\nModels: [\"{id: 13, version: 'model_version_2'}\"]\nModels: [\"{id: 15, version: 'model_version_3'}\", \"{id: 74, version: 'model_version_3'}\"]\nModels: [\"{id: 16, version: 'model_version_4'}\"]\n" } ], "source": [ @@ -254,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -291,22 +284,26 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 13, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": "Found the mode with id 21 and version 'model_version_0'.\nFound the mode with id 23 and version 'model_version_1'.\nFound the mode with id 24 and version 'model_version_2'.\nFound the mode with id 25 and version 'model_version_3'.\nFound the mode with id 25 and version 'model_version_3'.\nFound the mode with id 27 and version 'model_version_4'.\n" - } - ], + "outputs": [], "source": [ "for i, w in enumerate(ws):\n", - " serving_application = metadata.Execution(\n", - " name=\"serving model\",\n", - " workspace=w,\n", - " description=\"an execution to represent model serving component\",\n", - " )\n", + " serving_application = None\n", + " if i in [2,3]:\n", + " print('Retrain for', i)\n", + " serving_application = metadata.Execution(\n", + " name=\"Retrain step\",\n", + " workspace=w,\n", + " run=runs[i],\n", + " description=\"retrain model to be more accurate on a scoped problem\",\n", + " )\n", + " else:\n", + " serving_application = metadata.Execution(\n", + " name=\"serving model\",\n", + " workspace=w,\n", + " description=\"an execution to represent model serving component\",\n", + " )\n", " for model in models[i]:\n", " # Noticed we use model name, version, uri to uniquely identify existing model.\n", " served_model = metadata.Model(\n", @@ -315,7 +312,34 @@ " version=model.version,\n", " )\n", " m=serving_application.log_input(served_model)\n", - " print(\"Found the mode with id {0.id} and version '{0.version}'.\".format(m))" + " if i in [2,3]:\n", + " print('Attaching new model', i)\n", + " o_model = metadata.Model(\n", + " name=\"Retrained MNIST\",\n", + " description=\"better recognition of slanted digits\",\n", + " owner=\"ap@kubeflow.org\",\n", + " uri=\"gcs://my-bucket/mnist-slanted\",\n", + " model_type=\"neural network\",\n", + " training_framework={\n", + " \"name\": \"pytorch\",\n", + " \"version\": \"v1.0\"\n", + " },\n", + " hyperparameters={\n", + " \"learning_rate\": 0.01,\n", + " \"layers\": [5, 3, 1],\n", + " \"early_stop\": True\n", + " },\n", + " version=model_version,\n", + " labels={\"mylabel\": \"l2\"}\n", + " )\n", + " serving_application.log_output(o_model)\n", + " exec = metadata.Execution(\n", + " name=\"serving model\",\n", + " workspace=w,\n", + " run=runs[i],\n", + " description=\"an execution to represent model serving component\",\n", + " )\n", + " exec.log_input(o_model)" ] }, { @@ -328,21 +352,21 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 19, "metadata": {}, "outputs": [ { "data": { - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idworkspacerunversionownerdescriptionnamemodel_typecreate_timeuritraining_frameworkhyperparameterslabelskwargs
023test_case_1run-2019-12-13T22:18:15.220690model_version_1someone@kubeflow.orgmodel to recognize handwritten digitsMNISTneural network2019-12-13T22:19:08.908693Zgcs://my-bucket/mnist{'name': 'tensorflow', 'version': 'v1.0'}{'learning_rate': 0.5, 'layers': [10, 3, 1], '...{'mylabel': 'l1'}{}
\n
", - "text/plain": " id workspace run version \\\n0 23 test_case_1 run-2019-12-13T22:18:15.220690 model_version_1 \n\n owner description name \\\n0 someone@kubeflow.org model to recognize handwritten digits MNIST \n\n model_type create_time uri \\\n0 neural network 2019-12-13T22:19:08.908693Z gcs://my-bucket/mnist \n\n training_framework \\\n0 {'name': 'tensorflow', 'version': 'v1.0'} \n\n hyperparameters labels kwargs \n0 {'learning_rate': 0.5, 'layers': [10, 3, 1], '... {'mylabel': 'l1'} {} " + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idworkspacerunversionownerdescriptionnamemodel_typecreate_timeuritraining_frameworkhyperparameterslabelskwargs
013test_case_2run-2019-12-13T23:05:35.516946model_version_2someone@kubeflow.orgmodel to recognize handwritten digitsMNISTneural network2019-12-13T23:05:38.064051Zgcs://my-bucket/mnist{'name': 'tensorflow', 'version': 'v1.0'}{'learning_rate': 0.5, 'layers': [10, 3, 1], '...{'mylabel': 'l1'}{}
114test_case_2run-2019-12-13T23:05:35.516946model_version_2ap@kubeflow.orgmodel to recognize house numbers on map imagesSVHNneural network2019-12-13T23:05:38.641097Zgcs://my-bucket/svhn{'name': 'pytorch', 'version': 'v1.0'}{'learning_rate': 0.0001, 'layers': [10, 3, 1]...{'mylabel': 'l1'}{}
\n
", + "text/plain": " id workspace run version \\\n0 13 test_case_2 run-2019-12-13T23:05:35.516946 model_version_2 \n1 14 test_case_2 run-2019-12-13T23:05:35.516946 model_version_2 \n\n owner description \\\n0 someone@kubeflow.org model to recognize handwritten digits \n1 ap@kubeflow.org model to recognize house numbers on map images \n\n name model_type create_time uri \\\n0 MNIST neural network 2019-12-13T23:05:38.064051Z gcs://my-bucket/mnist \n1 SVHN neural network 2019-12-13T23:05:38.641097Z gcs://my-bucket/svhn \n\n training_framework \\\n0 {'name': 'tensorflow', 'version': 'v1.0'} \n1 {'name': 'pytorch', 'version': 'v1.0'} \n\n hyperparameters labels kwargs \n0 {'learning_rate': 0.5, 'layers': [10, 3, 1], '... {'mylabel': 'l1'} {} \n1 {'learning_rate': 0.0001, 'layers': [10, 3, 1]... {'mylabel': 'l1'} {} " }, - "execution_count": 29, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pandas.DataFrame.from_dict(ws[1].list(metadata.Model.ARTIFACT_TYPE_NAME))" + "pandas.DataFrame.from_dict(ws[2].list(metadata.Model.ARTIFACT_TYPE_NAME))" ] }, { @@ -355,7 +379,6 @@ }, { "cell_type": "code", - "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -374,7 +397,6 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": {}, "outputs": [], "source": []