Compare commits
No commits in common. "g-1-v-1" and "main" have entirely different histories.
61
block.py
61
block.py
@ -1,44 +1,21 @@
|
|||||||
import logging
|
@flowx_block
|
||||||
from typing import List, Dict
|
def example_function(request: dict) -> dict:
|
||||||
from graph_pre_processing import pre_processing_g1, pre_processing_g2
|
|
||||||
from graph_processing import processing_g1, processing_g2
|
|
||||||
from graph_post_processing import post_processing_g1, post_processing_g2
|
|
||||||
|
|
||||||
# Configure logging
|
# Processing logic here...
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
return {
|
||||||
def __main__(results: List[Dict]) -> Dict:
|
"meta_info": [
|
||||||
logger.info("data receiving in g1v1 block: %s", results)
|
{
|
||||||
g1_input = pre_processing_g1(results)
|
"name": "created_date",
|
||||||
g2_input = pre_processing_g2(results)
|
"type": "string",
|
||||||
logger.info("pre_processed_data_g1: %s", g1_input)
|
"value": "2024-11-05"
|
||||||
logger.info("pre_processed_data_g2: %s", g2_input)
|
}
|
||||||
|
],
|
||||||
cluster_size = g1_input.get("cluster_size", 2)
|
"fields": [
|
||||||
if cluster_size is None:
|
{
|
||||||
cluster_size = 2
|
"name": "",
|
||||||
|
"type": "",
|
||||||
if cluster_size < 2:
|
"value": ""
|
||||||
g1_processed = {**g1_input, "prediction": 0}
|
}
|
||||||
g2_processed = {**g2_input, "prediction_g2": 0}
|
]
|
||||||
else:
|
}
|
||||||
g1_processed = processing_g1(g1_input)
|
|
||||||
g2_processed = processing_g2(g2_input)
|
|
||||||
|
|
||||||
logger.info("prediction_g1: %.8f", float(g1_processed.get("prediction", 0)))
|
|
||||||
logger.info("prediction_g2: %.8f", float(g2_processed.get("prediction_g2", 0)))
|
|
||||||
|
|
||||||
final_g1 = post_processing_g1(g1_processed)
|
|
||||||
final_g2 = post_processing_g2(g2_processed)
|
|
||||||
final = {**final_g1, **final_g2}
|
|
||||||
logger.info(final)
|
|
||||||
|
|
||||||
return final
|
|
||||||
|
|
||||||
# testing :
|
|
||||||
# __main__
|
|
||||||
|
|||||||
@ -1,55 +0,0 @@
|
|||||||
import logging
|
|
||||||
import math
|
|
||||||
from decimal import Decimal, ROUND_HALF_UP
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def post_processing_g1(data):
|
|
||||||
try:
|
|
||||||
prediction = data.get("prediction", 0)
|
|
||||||
score_g1 = round(
|
|
||||||
min(prediction * 100 + 0.00001, 1) * 89 +
|
|
||||||
max(math.log2(prediction * 100 + 0.000001) * 193, 0),
|
|
||||||
0
|
|
||||||
)
|
|
||||||
data["hd_score_g1"] = score_g1
|
|
||||||
logger.info("score_g1 calculated: %s", score_g1)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Error processing score_g1 calculations: %s", e)
|
|
||||||
|
|
||||||
return {
|
|
||||||
key: data.get(key, None)
|
|
||||||
for key in [
|
|
||||||
"hd_score_m1", "hd_score_g1", "cluster_size_users_v2",
|
|
||||||
"target_connected_30_sum", "email_cnt", "rejected_app_count",
|
|
||||||
"app_dt_day_cnt", "hd_score_iso_m2"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def post_processing_g2(data):
|
|
||||||
prediction = data.get("prediction_g2", data.get("prediction"))
|
|
||||||
hd_score_g2 = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
if prediction is not None:
|
|
||||||
prediction_val = float(prediction)
|
|
||||||
raw_score = (prediction_val * 100) * 20 + math.log((prediction_val + 0.000001) * 100, 2) * 41.6
|
|
||||||
# SQL-like rounding (half up)
|
|
||||||
hd_score_g2 = int(Decimal(str(raw_score)).quantize(Decimal("1"), rounding=ROUND_HALF_UP))
|
|
||||||
hd_score_g2 = max(hd_score_g2, 0.0)
|
|
||||||
logger.info("score_g2 calculated: %s", hd_score_g2)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Error processing score_g2 calculations: %s", e)
|
|
||||||
|
|
||||||
return {"hd_score_g2": hd_score_g2}
|
|
||||||
|
|
||||||
|
|
||||||
# Backward compatibility alias
|
|
||||||
post_processing = post_processing_g1
|
|
||||||
@ -1,91 +0,0 @@
|
|||||||
import logging
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
G2_PREDICTORS = [
|
|
||||||
"hd_score_m2",
|
|
||||||
"rejected_app_count",
|
|
||||||
"hd_score_m2_connected_max",
|
|
||||||
"hd_score_m2_connected_avg",
|
|
||||||
"applicant_age_connected_max",
|
|
||||||
"applicant_age_connected_avg",
|
|
||||||
"account_tel_first_seen_min_conn",
|
|
||||||
"account_tel_first_seen_max_conn",
|
|
||||||
"account_tel_first_seen_avg_conn",
|
|
||||||
"ssn_hash_first_seen_min_conn",
|
|
||||||
"ssn_hash_first_seen_avg_conn",
|
|
||||||
"account_login_first_seen_min_conn",
|
|
||||||
"digital_id_first_seen_max_conn",
|
|
||||||
"true_ip_first_seen_min_conn",
|
|
||||||
"true_ip_first_seen_max_conn",
|
|
||||||
"dist_em_ip_ref_km_min_conn",
|
|
||||||
"pct_acc_email_attr_challenged_1_conn",
|
|
||||||
"account_login_first_seen_range_conn",
|
|
||||||
"account_login_first_seen_stddev_conn",
|
|
||||||
"cpu_clock_range_conn",
|
|
||||||
"summary_risk_score_max_conn",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def _coerce_float(value):
|
|
||||||
if value is None:
|
|
||||||
return np.nan
|
|
||||||
try:
|
|
||||||
return float(value)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
return np.nan
|
|
||||||
|
|
||||||
|
|
||||||
def pre_processing_g1(results):
|
|
||||||
result = results[0]
|
|
||||||
dtypes = {
|
|
||||||
"hd_score_m1": float,
|
|
||||||
"cluster_size_users_v2": float,
|
|
||||||
"target_connected_30_sum": float,
|
|
||||||
"email_cnt": float,
|
|
||||||
"rejected_app_count": float,
|
|
||||||
"app_dt_day_cnt": float,
|
|
||||||
"hd_score_iso_m2": float
|
|
||||||
}
|
|
||||||
data = {
|
|
||||||
"hd_score_m1": result["hd_score_m1"],
|
|
||||||
"cluster_size_users_v2": result["cluster_size_users_v2"],
|
|
||||||
"target_connected_30_sum": result["target_connected_30_sum"],
|
|
||||||
"email_cnt": result["email_cnt"],
|
|
||||||
"rejected_app_count": result["rejected_app_count"],
|
|
||||||
"app_dt_day_cnt": result["app_dt_day_cnt"],
|
|
||||||
"cluster_size": result["cluster_size"],
|
|
||||||
"hd_score_iso_m2": result["hd_score_iso_m2"],
|
|
||||||
}
|
|
||||||
|
|
||||||
for col, dtype in dtypes.items():
|
|
||||||
if col in data:
|
|
||||||
value = str(data[col]).strip()
|
|
||||||
data[col] = dtype(value) if value.replace(".", "", 1).isdigit() else None
|
|
||||||
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
def pre_processing_g2(results):
|
|
||||||
result = results[0]
|
|
||||||
working = dict(result)
|
|
||||||
if "rejected_app_count_g2" in working:
|
|
||||||
# Always prefer the G2-specific count for G2 preprocessing
|
|
||||||
working["rejected_app_count"] = working.get("rejected_app_count_g2")
|
|
||||||
|
|
||||||
data = {}
|
|
||||||
for feature in G2_PREDICTORS:
|
|
||||||
data[feature] = _coerce_float(working.get(feature))
|
|
||||||
|
|
||||||
data["cluster_size"] = working.get("cluster_size")
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
# Backward compatibility alias
|
|
||||||
pre_processing = pre_processing_g1
|
|
||||||
@ -1,61 +0,0 @@
|
|||||||
import xgboost as xgb
|
|
||||||
import pandas as pd
|
|
||||||
import joblib
|
|
||||||
import logging
|
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def processing_g1(data):
|
|
||||||
df = pd.DataFrame([data])
|
|
||||||
if df.empty:
|
|
||||||
logger.error("Input DataFrame is empty.")
|
|
||||||
|
|
||||||
# Load Model
|
|
||||||
# model_path = "C:/Users/abinisha/habemco_flowx/g1_v1/xgboost_model_G1.joblib"
|
|
||||||
model_path = "./xgboost_model_G1.joblib"
|
|
||||||
model = joblib.load(model_path)
|
|
||||||
|
|
||||||
expected_features = model.feature_names
|
|
||||||
|
|
||||||
df = df.applymap(lambda x: float("nan") if x is None else x)
|
|
||||||
|
|
||||||
dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=float("nan"))
|
|
||||||
|
|
||||||
prediction = model.predict(dmatrix)
|
|
||||||
|
|
||||||
df["prediction"] = prediction
|
|
||||||
|
|
||||||
return df.iloc[0].to_dict()
|
|
||||||
|
|
||||||
|
|
||||||
def processing_g2(data):
|
|
||||||
df = pd.DataFrame([data])
|
|
||||||
if df.empty:
|
|
||||||
logger.error("Input DataFrame is empty.")
|
|
||||||
|
|
||||||
# model_path = "C:/Users/abinisha/habemco_flowx/g1_v1/xgboost_model_G2.joblib"
|
|
||||||
model_path = "./xgboost_model_G2.joblib"
|
|
||||||
model = joblib.load(model_path)
|
|
||||||
|
|
||||||
expected_features = model.feature_names
|
|
||||||
|
|
||||||
df = df.reindex(columns=expected_features)
|
|
||||||
df = df.applymap(lambda x: float("nan") if x is None else x)
|
|
||||||
|
|
||||||
dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=float("nan"))
|
|
||||||
|
|
||||||
prediction = model.predict(dmatrix)
|
|
||||||
|
|
||||||
df["prediction_g2"] = prediction
|
|
||||||
|
|
||||||
return df.iloc[0].to_dict()
|
|
||||||
|
|
||||||
|
|
||||||
# Backward compatibility alias
|
|
||||||
processing = processing_g1
|
|
||||||
@ -1,11 +1 @@
|
|||||||
{
|
{}
|
||||||
"$schema": "http://json-schema.org/draft-07/schema",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"results": {
|
|
||||||
"type": ["array", "null"],
|
|
||||||
"items": {"type": "object"}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": []
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,5 +1 @@
|
|||||||
joblib==1.4.2
|
{}
|
||||||
pandas==2.2.3
|
|
||||||
xgboost==2.1.3
|
|
||||||
typing==3.6.1
|
|
||||||
|
|
||||||
|
|||||||
@ -1,42 +1 @@
|
|||||||
{
|
{}
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"hd_score_m1": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"description": "HD fraud Score M1"
|
|
||||||
},
|
|
||||||
"hd_score_g1": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"description": "HD fraud Score G1"
|
|
||||||
},
|
|
||||||
"hd_score_g2": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"description": "HD fraud Score G2"
|
|
||||||
},
|
|
||||||
"cluster_size_users_v2": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"description": "Size of the user cluster in version 2."
|
|
||||||
},
|
|
||||||
"target_connected_30_sum": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"description": "Sum of target connections within 30 days."
|
|
||||||
},
|
|
||||||
"email_cnt": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"description": "Count of emails associated with the application."
|
|
||||||
},
|
|
||||||
"rejected_app_count": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"description": "Count of rejected applications for the applicant."
|
|
||||||
},
|
|
||||||
"app_dt_day_cnt": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"description": "Number of application days counted."
|
|
||||||
},
|
|
||||||
"hd_score_iso_m2": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"description": "HD fraud Score M2"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@ -1,47 +0,0 @@
|
|||||||
import unittest
|
|
||||||
from block import __main__
|
|
||||||
|
|
||||||
|
|
||||||
data = [{
|
|
||||||
"application_key": "A3CDD39F-10F8-40B0-A4C9-0E1558B75131",
|
|
||||||
"hd_score_m1": 1101.0,
|
|
||||||
"hd_score_iso_m2": 1113,
|
|
||||||
"cluster_size": 10,
|
|
||||||
"cluster_size_users_v2": 3,
|
|
||||||
"target_connected_30_sum": 0.0,
|
|
||||||
"email_cnt": 3,
|
|
||||||
"rejected_app_count": 6.0,
|
|
||||||
"app_dt_day_cnt": 7,
|
|
||||||
"hd_score_m2": 1188,
|
|
||||||
"hd_score_m2_connected_max": 1197.0,
|
|
||||||
"hd_score_m2_connected_avg": 1184.888889,
|
|
||||||
"applicant_age_connected_max": 60.0,
|
|
||||||
"applicant_age_connected_avg": 52.44444444,
|
|
||||||
"account_tel_first_seen_min_conn": 879.0,
|
|
||||||
"account_tel_first_seen_max_conn": 989.0,
|
|
||||||
"account_tel_first_seen_avg_conn": 949.6666667,
|
|
||||||
"ssn_hash_first_seen_min_conn": 5.0,
|
|
||||||
"ssn_hash_first_seen_avg_conn": 58.0,
|
|
||||||
"account_login_first_seen_min_conn": 0.0,
|
|
||||||
"digital_id_first_seen_max_conn": 2652.0,
|
|
||||||
"true_ip_first_seen_min_conn": 1857.0,
|
|
||||||
"true_ip_first_seen_max_conn": 1967.0,
|
|
||||||
"dist_em_ip_ref_km_min_conn": 17.43689023,
|
|
||||||
"pct_acc_email_attr_challenged_1_conn": 0.0,
|
|
||||||
"account_login_first_seen_range_conn": 2313.0,
|
|
||||||
"account_login_first_seen_stddev_conn": 1042.4994,
|
|
||||||
"cpu_clock_range_conn": 9054.0,
|
|
||||||
"summary_risk_score_max_conn": 14.0
|
|
||||||
}]
|
|
||||||
|
|
||||||
|
|
||||||
class TestBlock(unittest.TestCase):
|
|
||||||
def test_main_returns_scores(self):
|
|
||||||
block_result = __main__(data)
|
|
||||||
self.assertIsInstance(block_result, dict)
|
|
||||||
self.assertIn("hd_score_g1", block_result)
|
|
||||||
self.assertIn("hd_score_g2", block_result)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user