blocks-transformer/processing.py

import pandas as pd
import numpy as np
import xgboost as xgb
import joblib
import json


def processing(input_data):
    df = pd.DataFrame(input_data)

    # Load Model
    model_path = "./xgboost_model.joblib"
    # model_path = "C:/Users/abinisha/habemco_flowx/m1_v1/xgboost_model.joblib"
    model = joblib.load(model_path)
    df.rename(columns={'riskrating': 'RiskRating', 'trueipgeo': 'TrueIpGeo'}, inplace=True)

    # Load Category Orders
    category_orders_path ="./category_orders_train.json"
    # category_orders_path = "C:/Users/abinisha/habemco_flowx/m1_v1/category_orders_train.json"
    with open(category_orders_path, 'r') as f:
        category_orders = json.load(f)

    if df.empty:
        raise ValueError("Input DataFrame is empty.")


    # Ensure all expected features exist
    expected_features = model.feature_names


    for col, categories in category_orders.items():
        df[col].replace([None, "", "null", np.nan, "nan", " "], np.nan, inplace=True)
        df[col] = pd.Categorical(df[col], categories=categories, ordered=True)

    # missing_features = [feature for feature in expected_features if feature not in df.columns]
    # for feature in missing_features:
    #     df[feature] = np.nan  # Use NaN to avoid dtype issues

    # Create XGBoost DMatrix
    dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=np.nan)

    # Make predictions
    predictions = model.predict(dmatrix)
    df['prediction'] = predictions

    return df