Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ This repository contains code which demonstrates ML-Ops using a `FastAPI` applic

## Running Instructions
- Create a fork of the repo using the `fork` button.
- Clone your fork using `git clone https://www.github.com/<your-username>/mlops-iris.git`
- Clone your fork using `git clone https://www.github.com/samplepython/mlops-iris.git`
- Install dependencies using `pip3 install -r requirements.txt`
- Run application using `python3 main.py`
- Run tests using `pytest`
Expand All @@ -14,7 +14,7 @@ This repository contains code which demonstrates ML-Ops using a `FastAPI` applic
- `build` (test) and `upload_zip` for all pushes

## Assignment Tasks
1. Change this README to add your name here: <your-name>. Add and commit changes to a new branch and create a pull request ONLY TO YOUR OWN FORK to see the CI/CD build happening. If the build succeeds, merge the pull request with master and see the CI/CD `upload_zip` take place.
1. Change this README to add your name here: Raghu Kumar. Add and commit changes to a new branch and create a pull request ONLY TO YOUR OWN FORK to see the CI/CD build happening. If the build succeeds, merge the pull request with master and see the CI/CD `upload_zip` take place.
2. Add 2 more unit tests of your choice to `test_app.py` and make sure they are passing.
3. Add one more classifier to startup and use only the one with better accuracy.
4. Add the attribute `timestamp` to the response and return the current time with it.
41 changes: 35 additions & 6 deletions ml_utils.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,63 @@
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# define a Gaussain NB classifier
clf = GaussianNB()
gaussian_object = GaussianNB()
random_forest_object = RandomForestClassifier(random_state=2)
decision_tree_object = DecisionTreeClassifier()

best_model = None

# define the class encodings and reverse encodings
classes = {0: "Iris Setosa", 1: "Iris Versicolour", 2: "Iris Virginica"}
r_classes = {y: x for x, y in classes.items()}


# function to train and load the model during startup
def load_model():
# load the dataset from the official sklearn datasets
X, y = datasets.load_iris(return_X_y=True)

# do the test-train split and train the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
clf.fit(X_train, y_train)

gaussian_object.fit(X_train, y_train)
random_forest_object.fit(X_train, y_train)
decision_tree_object.fit(X_train, y_train)

# calculate the print the accuracy score
acc = accuracy_score(y_test, clf.predict(X_test))
print(f"Model trained with accuracy: {round(acc, 3)}")
acc_gaussion = round(accuracy_score(y_test, gaussian_object.predict(X_test)),3)
print(f"GaussianNB Model trained with accuracy: {acc_gaussion}")

acc_random_forest = round(accuracy_score(y_test, random_forest_object.predict(X_test)))
print(f"RandomForestClassifier Model trained with accuracy: {acc_random_forest}")

acc_decision_tree = round(accuracy_score(y_test, decision_tree_object.predict(X_test)))
print(f"DecisionTreeClassifier Model trained with accuracy: {acc_decision_tree}")

model_accuracy_dict = { gaussian_object: acc_gaussion,
random_forest_object: acc_random_forest,
decision_tree_object: acc_decision_tree,
}
global best_model
best_model = max(model_accuracy_dict, key=model_accuracy_dict.get)
print(f'Best model to predict is: {best_model}')
#return best_model



# function to predict the flower using the model
def predict(query_data):
x = list(query_data.dict().values())
prediction = clf.predict([x])[0]

print(f'Best model to evaluate is: {best_model}')
prediction = best_model.predict([x])[0]
print(f"Model prediction: {classes[prediction]}")

return classes[prediction]

# function to retrain the model as part of the feedback loop
Expand All @@ -38,4 +67,4 @@ def retrain(data):
y = [r_classes[d.flower_class] for d in data]

# fit the classifier again based on the new data obtained
clf.fit(X, y)
gaussian_object.fit(X, y)
29 changes: 29 additions & 0 deletions test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,32 @@ def test_pred_virginica():
# asserting the correct response is received
assert response.status_code == 200
assert response.json() == {"flower_class": "Iris Virginica"}


def test_pred_setosa():
# defining a sample payload for the testcase
payload = {
"sepal_length": 2,
"sepal_width": 1,
"petal_length": 1,
"petal_width": 0,
}
with TestClient(app) as client:
response = client.post("/predict_flower", json=payload)
# asserting the correct response is received
assert response.status_code == 200
assert response.json() == {"flower_class": "Iris Setosa"}

def test_pred_virginica2():
# defining a sample payload for the testcase
payload = {
"sepal_length": 8,
"sepal_width": 1,
"petal_length": 1,
"petal_width": 8,
}
with TestClient(app) as client:
response = client.post("/predict_flower", json=payload)
# asserting the correct response is received
assert response.status_code == 200
assert response.json() == {"flower_class": "Iris Setosa"}