blocks-transformer/processing.py
admin user ead9a776da
All checks were successful
Build and Push Docker Image / test (push) Successful in 55s
Build and Push Docker Image / build_and_push (push) Successful in 3m21s
Advanced M series V1 model block
2025-03-12 16:12:18 +00:00

47 lines
1.5 KiB
Python

import pandas as pd
import numpy as np
import xgboost as xgb
import joblib
import json
def processing(input_data):
df = pd.DataFrame(input_data)
# Load Model
model_path = "./xgboost_model.joblib"
# model_path = "C:/Users/abinisha/habemco_flowx/m1_v1/xgboost_model.joblib"
model = joblib.load(model_path)
df.rename(columns={'riskrating': 'RiskRating', 'trueipgeo': 'TrueIpGeo'}, inplace=True)
# Load Category Orders
category_orders_path ="./category_orders_train.json"
# category_orders_path = "C:/Users/abinisha/habemco_flowx/m1_v1/category_orders_train.json"
with open(category_orders_path, 'r') as f:
category_orders = json.load(f)
if df.empty:
raise ValueError("Input DataFrame is empty.")
# Ensure all expected features exist
expected_features = model.feature_names
for col, categories in category_orders.items():
df[col].replace([None, "", "null", np.nan, "nan", " "], np.nan, inplace=True)
df[col] = pd.Categorical(df[col], categories=categories, ordered=True)
# missing_features = [feature for feature in expected_features if feature not in df.columns]
# for feature in missing_features:
# df[feature] = np.nan # Use NaN to avoid dtype issues
# Create XGBoost DMatrix
dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=np.nan)
# Make predictions
predictions = model.predict(dmatrix)
df['prediction'] = predictions
return df