diff --git a/README.md b/README.md index e07b5dc..aec74cc 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This repository contains code which demonstrates ML-Ops using a `FastAPI` applic ## Running Instructions - Create a fork of the repo using the `fork` button. -- Clone your fork using `git clone https://www.github.com//mlops-iris.git` +- Clone your fork using `git clone https://www.github.com/samplepython/mlops-iris.git` - Install dependencies using `pip3 install -r requirements.txt` - Run application using `python3 main.py` - Run tests using `pytest` @@ -14,7 +14,7 @@ This repository contains code which demonstrates ML-Ops using a `FastAPI` applic - `build` (test) and `upload_zip` for all pushes ## Assignment Tasks -1. Change this README to add your name here: . Add and commit changes to a new branch and create a pull request ONLY TO YOUR OWN FORK to see the CI/CD build happening. If the build succeeds, merge the pull request with master and see the CI/CD `upload_zip` take place. +1. Change this README to add your name here: Raghu Kumar. Add and commit changes to a new branch and create a pull request ONLY TO YOUR OWN FORK to see the CI/CD build happening. If the build succeeds, merge the pull request with master and see the CI/CD `upload_zip` take place. 2. Add 2 more unit tests of your choice to `test_app.py` and make sure they are passing. 3. Add one more classifier to startup and use only the one with better accuracy. 4. Add the attribute `timestamp` to the response and return the current time with it. diff --git a/ml_utils.py b/ml_utils.py index bdd4dc8..6623cab 100644 --- a/ml_utils.py +++ b/ml_utils.py @@ -1,15 +1,22 @@ from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB +from sklearn.ensemble import RandomForestClassifier +from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score # define a Gaussain NB classifier -clf = GaussianNB() +gaussian_object = GaussianNB() +random_forest_object = RandomForestClassifier(random_state=2) +decision_tree_object = DecisionTreeClassifier() + +best_model = None # define the class encodings and reverse encodings classes = {0: "Iris Setosa", 1: "Iris Versicolour", 2: "Iris Virginica"} r_classes = {y: x for x, y in classes.items()} + # function to train and load the model during startup def load_model(): # load the dataset from the official sklearn datasets @@ -17,18 +24,40 @@ def load_model(): # do the test-train split and train the model X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - clf.fit(X_train, y_train) + + gaussian_object.fit(X_train, y_train) + random_forest_object.fit(X_train, y_train) + decision_tree_object.fit(X_train, y_train) # calculate the print the accuracy score - acc = accuracy_score(y_test, clf.predict(X_test)) - print(f"Model trained with accuracy: {round(acc, 3)}") + acc_gaussion = round(accuracy_score(y_test, gaussian_object.predict(X_test)),3) + print(f"GaussianNB Model trained with accuracy: {acc_gaussion}") + + acc_random_forest = round(accuracy_score(y_test, random_forest_object.predict(X_test))) + print(f"RandomForestClassifier Model trained with accuracy: {acc_random_forest}") + + acc_decision_tree = round(accuracy_score(y_test, decision_tree_object.predict(X_test))) + print(f"DecisionTreeClassifier Model trained with accuracy: {acc_decision_tree}") + + model_accuracy_dict = { gaussian_object: acc_gaussion, + random_forest_object: acc_random_forest, + decision_tree_object: acc_decision_tree, + } + global best_model + best_model = max(model_accuracy_dict, key=model_accuracy_dict.get) + print(f'Best model to predict is: {best_model}') + #return best_model + # function to predict the flower using the model def predict(query_data): x = list(query_data.dict().values()) - prediction = clf.predict([x])[0] + + print(f'Best model to evaluate is: {best_model}') + prediction = best_model.predict([x])[0] print(f"Model prediction: {classes[prediction]}") + return classes[prediction] # function to retrain the model as part of the feedback loop @@ -38,4 +67,4 @@ def retrain(data): y = [r_classes[d.flower_class] for d in data] # fit the classifier again based on the new data obtained - clf.fit(X, y) + gaussian_object.fit(X, y) diff --git a/test_app.py b/test_app.py index b65fc90..cee6b44 100644 --- a/test_app.py +++ b/test_app.py @@ -24,3 +24,32 @@ def test_pred_virginica(): # asserting the correct response is received assert response.status_code == 200 assert response.json() == {"flower_class": "Iris Virginica"} + + +def test_pred_setosa(): + # defining a sample payload for the testcase + payload = { + "sepal_length": 2, + "sepal_width": 1, + "petal_length": 1, + "petal_width": 0, + } + with TestClient(app) as client: + response = client.post("/predict_flower", json=payload) + # asserting the correct response is received + assert response.status_code == 200 + assert response.json() == {"flower_class": "Iris Setosa"} + +def test_pred_virginica2(): + # defining a sample payload for the testcase + payload = { + "sepal_length": 8, + "sepal_width": 1, + "petal_length": 1, + "petal_width": 8, + } + with TestClient(app) as client: + response = client.post("/predict_flower", json=payload) + # asserting the correct response is received + assert response.status_code == 200 + assert response.json() == {"flower_class": "Iris Setosa"}