import pandas as pd import numpy as np import xgboost as xgb import joblib import json def processing(input_data): df = pd.DataFrame(input_data) # Load Model model_path = "./xgboost_model.joblib" # model_path = "C:/Users/abinisha/habemco_flowx/m1_v1/xgboost_model.joblib" model = joblib.load(model_path) df.rename(columns={'riskrating': 'RiskRating', 'trueipgeo': 'TrueIpGeo'}, inplace=True) # Load Category Orders category_orders_path ="./category_orders_train.json" # category_orders_path = "C:/Users/abinisha/habemco_flowx/m1_v1/category_orders_train.json" with open(category_orders_path, 'r') as f: category_orders = json.load(f) if df.empty: raise ValueError("Input DataFrame is empty.") # Ensure all expected features exist expected_features = model.feature_names for col, categories in category_orders.items(): df[col].replace([None, "", "null", np.nan, "nan", " "], np.nan, inplace=True) df[col] = pd.Categorical(df[col], categories=categories, ordered=True) # missing_features = [feature for feature in expected_features if feature not in df.columns] # for feature in missing_features: # df[feature] = np.nan # Use NaN to avoid dtype issues # Create XGBoost DMatrix dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=np.nan) # Make predictions predictions = model.predict(dmatrix) df['prediction'] = predictions return df