42 lines
1.6 KiB
Python
42 lines
1.6 KiB
Python
import pandas as pd
|
|
import xgboost as xgb
|
|
import math
|
|
import joblib
|
|
import json
|
|
|
|
def processing(input_data):
|
|
df = pd.DataFrame(input_data)
|
|
model = joblib.load("./xgboost_model.joblib")
|
|
with open('./category_orders_train.json', 'r') as f:
|
|
category_orders = json.load(f)
|
|
|
|
if df.empty:
|
|
raise ValueError("Input DataFrame is empty.")
|
|
|
|
categorical_columns = ["application_source_name", "ownhome", "employmentstatus", "payfrequency", "fraud_risk", "ip_net_speed_cell", "riskrating"]
|
|
for col in categorical_columns:
|
|
if col in df.columns:
|
|
df[col] = df[col].str.lower()
|
|
df[col].replace([None, "", "null", math.isnan, pd.NA], "none", inplace=True)
|
|
df[col] = pd.Categorical(df[col], categories=category_orders[col])
|
|
else:
|
|
df[col] = pd.Categorical(["none"], categories=category_orders.get(col, ["none"]))
|
|
|
|
non_categorical_columns = [col for col in df.columns if col not in categorical_columns]
|
|
for col in non_categorical_columns:
|
|
if col in df.columns:
|
|
df[col] = df[col].astype(str).str.lower().replace(["null", "nan", "", None], pd.NA)
|
|
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
else:
|
|
df[col] = pd.NA
|
|
|
|
expected_features = model.feature_names
|
|
missing_features = [feature for feature in expected_features if feature not in df.columns]
|
|
for feature in missing_features:
|
|
df[feature] = None
|
|
|
|
dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True)
|
|
predictions = model.predict(dmatrix)
|
|
|
|
return predictions
|