47 lines
1.5 KiB
Python
47 lines
1.5 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
import xgboost as xgb
|
|
import joblib
|
|
import json
|
|
|
|
|
|
def processing(input_data):
|
|
df = pd.DataFrame(input_data)
|
|
|
|
# Load Model
|
|
model_path = "./xgboost_model.joblib"
|
|
# model_path = "C:/Users/abinisha/habemco_flowx/m1_v1/xgboost_model.joblib"
|
|
model = joblib.load(model_path)
|
|
df.rename(columns={'riskrating': 'RiskRating', 'trueipgeo': 'TrueIpGeo'}, inplace=True)
|
|
|
|
# Load Category Orders
|
|
category_orders_path ="./category_orders_train.json"
|
|
# category_orders_path = "C:/Users/abinisha/habemco_flowx/m1_v1/category_orders_train.json"
|
|
with open(category_orders_path, 'r') as f:
|
|
category_orders = json.load(f)
|
|
|
|
if df.empty:
|
|
raise ValueError("Input DataFrame is empty.")
|
|
|
|
|
|
# Ensure all expected features exist
|
|
expected_features = model.feature_names
|
|
|
|
|
|
for col, categories in category_orders.items():
|
|
df[col].replace([None, "", "null", np.nan, "nan", " "], np.nan, inplace=True)
|
|
df[col] = pd.Categorical(df[col], categories=categories, ordered=True)
|
|
|
|
# missing_features = [feature for feature in expected_features if feature not in df.columns]
|
|
# for feature in missing_features:
|
|
# df[feature] = np.nan # Use NaN to avoid dtype issues
|
|
|
|
# Create XGBoost DMatrix
|
|
dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=np.nan)
|
|
|
|
# Make predictions
|
|
predictions = model.predict(dmatrix)
|
|
df['prediction'] = predictions
|
|
|
|
return df
|