SignSpeak/eval.py at main · YamanDevelopment/SignSpeak · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import joblib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
import numpy as np

# DATA_FILE and MODEL_SAVE_PATH are assumed to be defined globally from previous cells.

def _preprocess_dataframe_for_evaluation(df_raw, label_encoder):
    """
    Preprocesses a raw DataFrame for evaluation using the already fitted label_encoder.
    This function applies the same feature extraction and normalization steps
    as during training. It expects a 'label' column in df_raw.
    """
    df = df_raw.copy() # Work on a copy

    feature_cols = [col for col in df.columns if 'landmark_' in col]
    feature_cols = [c for c in feature_cols if c.endswith('_x') or c.endswith('_y')]

    X = df[feature_cols].copy()
    y_raw = df['label'] # Keep raw labels for comparison or error checking if needed

    # Encode labels using the provided label_encoder
    y_encoded = label_encoder.transform(y_raw)

    if 'handedness' in df.columns:
        if df['handedness'].dtype == 'object':
            # Re-encode handedness consistently for evaluation.
            # This assumes 'Left', 'Right' are the expected categories.
            # If not, this might fail for new data with different categories.
            temp_handedness_encoder = LabelEncoder()
            X['handedness'] = temp_handedness_encoder.fit_transform(df['handedness'])
        else:
            X['handedness'] = df['handedness']

    if 'landmark_0_x' in X.columns and 'landmark_0_y' in X.columns:
        print("Normalizing coordinates relative to wrist (landmark_0) in DataFrame...")
        wrist_x = X['landmark_0_x']
        wrist_y = X['landmark_0_y']

        for col in feature_cols:
            if col.endswith('_x'):
                X[col] = X[col] - wrist_x
            elif col.endswith('_y'):
                X[col] = X[col] - wrist_y

        X = X.drop(columns=['landmark_0_x', 'landmark_0_y'])

    return X, y_encoded


def evaluate_custom_dataframe(input_df, model_filepath):
    """
    Loads a trained model and evaluates it on a provided pandas DataFrame.
    This function evaluates the *entire* input DataFrame.
    """
    if 'label' not in input_df.columns:
        raise ValueError("The input DataFrame for evaluation must contain a 'label' column.")

    print(f"\n--- Evaluating provided DataFrame ---")
    print(f"Loading model from {model_filepath}...")
    model_data = joblib.load(model_filepath)
    clf = model_data['model']
    label_encoder = model_data['label_encoder']

    print("Preprocessing input DataFrame for evaluation...")
    X_processed, y_true_encoded = _preprocess_dataframe_for_evaluation(input_df, label_encoder)

    print("Making predictions on the provided DataFrame...")
    y_pred_encoded = clf.predict(X_processed)

    accuracy = accuracy_score(y_true_encoded, y_pred_encoded)
    print(f"\nAccuracy of loaded model on provided DataFrame: {accuracy:.4f}")

    unique_labels_in_df = np.unique(y_true_encoded)
    target_names_for_report = label_encoder.inverse_transform(unique_labels_in_df)

    print("\nClassification Report for provided DataFrame:")
    print(classification_report(y_true_encoded, y_pred_encoded, labels=unique_labels_in_df, target_names=target_names_for_report))


# Execute the evaluation functions
if __name__ == '__main__':
    # New demonstration: Evaluate a custom DataFrame
    df_raw_for_custom_eval = pd.read_excel(DATA_FILE)
    # Take a sample or specific rows for custom evaluation
    # For demonstration, let's use the first 50 rows as "new" data for evaluation
    sample_df_for_eval = df_raw_for_custom_eval.head(50)
    evaluate_custom_dataframe(sample_df_for_eval, MODEL_SAVE_PATH)