PGCSEDS-IIITH · samplepython · Aug 12, 2021 · Aug 12, 2021 · Aug 12, 2021 · Aug 12, 2021
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@ This repository contains code which demonstrates ML-Ops using a `FastAPI` applic
 
 ## Running Instructions
 - Create a fork of the repo using the `fork` button.
-- Clone your fork using `git clone https://www.github.com/<your-username>/mlops-iris.git`
+- Clone your fork using `git clone https://www.github.com/samplepython/mlops-iris.git`
 - Install dependencies using `pip3 install -r requirements.txt`
 - Run application using `python3 main.py`
 - Run tests using `pytest`
@@ -14,7 +14,7 @@ This repository contains code which demonstrates ML-Ops using a `FastAPI` applic
 - `build` (test) and `upload_zip` for all pushes
 
 ## Assignment Tasks
-1. Change this README to add your name here: <your-name>. Add and commit changes to a new branch and create a pull request ONLY TO YOUR OWN FORK to see the CI/CD build happening. If the build succeeds, merge the pull request with master and see the CI/CD `upload_zip` take place.
+1. Change this README to add your name here: Raghu Kumar. Add and commit changes to a new branch and create a pull request ONLY TO YOUR OWN FORK to see the CI/CD build happening. If the build succeeds, merge the pull request with master and see the CI/CD `upload_zip` take place.
 2. Add 2 more unit tests of your choice to `test_app.py` and make sure they are passing.
 3. Add one more classifier to startup and use only the one with better accuracy.
 4. Add the attribute `timestamp` to the response and return the current time with it. 
diff --git a/ml_utils.py b/ml_utils.py
@@ -1,34 +1,63 @@
 from sklearn import datasets
 from sklearn.model_selection import train_test_split
 from sklearn.naive_bayes import GaussianNB
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.tree import DecisionTreeClassifier
 from sklearn.metrics import accuracy_score
 
 # define a Gaussain NB classifier
-clf = GaussianNB()
+gaussian_object = GaussianNB()
+random_forest_object = RandomForestClassifier(random_state=2)
+decision_tree_object = DecisionTreeClassifier()
+
+best_model = None
 
 # define the class encodings and reverse encodings
 classes = {0: "Iris Setosa", 1: "Iris Versicolour", 2: "Iris Virginica"}
 r_classes = {y: x for x, y in classes.items()}
 
+
 # function to train and load the model during startup
 def load_model():
     # load the dataset from the official sklearn datasets
     X, y = datasets.load_iris(return_X_y=True)
 
     # do the test-train split and train the model
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-    clf.fit(X_train, y_train)
+
+    gaussian_object.fit(X_train, y_train)
+    random_forest_object.fit(X_train, y_train)
+    decision_tree_object.fit(X_train, y_train)
 
     # calculate the print the accuracy score
-    acc = accuracy_score(y_test, clf.predict(X_test))
-    print(f"Model trained with accuracy: {round(acc, 3)}")
+    acc_gaussion = round(accuracy_score(y_test, gaussian_object.predict(X_test)),3)
+    print(f"GaussianNB Model trained with accuracy: {acc_gaussion}")
+
+    acc_random_forest = round(accuracy_score(y_test, random_forest_object.predict(X_test)))
+    print(f"RandomForestClassifier Model trained with accuracy: {acc_random_forest}")
+
+    acc_decision_tree = round(accuracy_score(y_test, decision_tree_object.predict(X_test)))
+    print(f"DecisionTreeClassifier Model trained with accuracy: {acc_decision_tree}")    
+
+    model_accuracy_dict = { gaussian_object: acc_gaussion,
+                           random_forest_object: acc_random_forest,
+                           decision_tree_object: acc_decision_tree,
+                           }
+    global best_model
+    best_model = max(model_accuracy_dict, key=model_accuracy_dict.get)
+    print(f'Best model to predict is: {best_model}')
+    #return best_model
 
 
+
 # function to predict the flower using the model
 def predict(query_data):
     x = list(query_data.dict().values())
-    prediction = clf.predict([x])[0]
+
+    print(f'Best model to evaluate is: {best_model}')
+    prediction = best_model.predict([x])[0]
     print(f"Model prediction: {classes[prediction]}")
+
     return classes[prediction]
 
 # function to retrain the model as part of the feedback loop
@@ -38,4 +67,4 @@ def retrain(data):
     y = [r_classes[d.flower_class] for d in data]
 
     # fit the classifier again based on the new data obtained
-    clf.fit(X, y)
+    gaussian_object.fit(X, y)
diff --git a/test_app.py b/test_app.py
@@ -24,3 +24,32 @@ def test_pred_virginica():
         # asserting the correct response is received
         assert response.status_code == 200
         assert response.json() == {"flower_class": "Iris Virginica"}
+
+
+def test_pred_setosa():
+    # defining a sample payload for the testcase
+    payload = {
+        "sepal_length": 2,
+        "sepal_width": 1,
+        "petal_length": 1,
+        "petal_width": 0,
+    }
+    with TestClient(app) as client:
+        response = client.post("/predict_flower", json=payload)
+        # asserting the correct response is received
+        assert response.status_code == 200
+        assert response.json() == {"flower_class": "Iris Setosa"}
+
+def test_pred_virginica2():
+    # defining a sample payload for the testcase
+    payload = {
+        "sepal_length": 8,
+        "sepal_width": 1,
+        "petal_length": 1,
+        "petal_width": 8,
+    }
+    with TestClient(app) as client:
+        response = client.post("/predict_flower", json=payload)
+        # asserting the correct response is received
+        assert response.status_code == 200
+        assert response.json() == {"flower_class": "Iris Setosa"}