blocks-transformer/processing.py
admin user 8f303a4993
All checks were successful
Build and Push Docker Image / test (push) Successful in 2m8s
Build and Push Docker Image / build_and_push (push) Successful in 2m13s
Early Term Default/Fraud indicator v1 block
2025-01-21 21:17:40 +00:00

42 lines
1.6 KiB
Python

import pandas as pd
import xgboost as xgb
import math
import joblib
import json
def processing(input_data):
df = pd.DataFrame(input_data)
model = joblib.load("./xgboost_model.joblib")
with open('./category_orders_train.json', 'r') as f:
category_orders = json.load(f)
if df.empty:
raise ValueError("Input DataFrame is empty.")
categorical_columns = ["application_source_name", "ownhome", "employmentstatus", "payfrequency", "fraud_risk", "ip_net_speed_cell", "riskrating"]
for col in categorical_columns:
if col in df.columns:
df[col] = df[col].str.lower()
df[col].replace([None, "", "null", math.isnan, pd.NA], "none", inplace=True)
df[col] = pd.Categorical(df[col], categories=category_orders[col])
else:
df[col] = pd.Categorical(["none"], categories=category_orders.get(col, ["none"]))
non_categorical_columns = [col for col in df.columns if col not in categorical_columns]
for col in non_categorical_columns:
if col in df.columns:
df[col] = df[col].astype(str).str.lower().replace(["null", "nan", "", None], pd.NA)
df[col] = pd.to_numeric(df[col], errors="coerce")
else:
df[col] = pd.NA
expected_features = model.feature_names
missing_features = [feature for feature in expected_features if feature not in df.columns]
for feature in missing_features:
df[feature] = None
dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True)
predictions = model.predict(dmatrix)
return predictions