12 changed files with 23 additions and 356 deletions
--- a/README.md
+++ b/README.md
@ -1,2 +1 @@
-# G Series Block
-Loads and score G 1 v1 model
+**Hello world!!!**
--- a/block.py
+++ b/block.py
@ -1,44 +1,21 @@
-import logging
-from typing import List, Dict
-from graph_pre_processing import pre_processing_g1, pre_processing_g2
-from graph_processing import processing_g1, processing_g2
-from graph_post_processing import post_processing_g1, post_processing_g2
+@flowx_block
+def example_function(request: dict) -> dict:

-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
-)
-logger = logging.getLogger(__name__)
+  # Processing logic here...

-
-def __main__(results: List[Dict]) -> Dict:
-    logger.info("data receiving in g1v1 block: %s", results)
-    g1_input = pre_processing_g1(results)
-    g2_input = pre_processing_g2(results)
-    logger.info("pre_processed_data_g1: %s", g1_input)
-    logger.info("pre_processed_data_g2: %s", g2_input)
-
-    cluster_size = g1_input.get("cluster_size", 2)
-    if cluster_size is None:
-        cluster_size = 2
-
-    if cluster_size < 2:
-        g1_processed = {**g1_input, "prediction": 0}
-        g2_processed = {**g2_input, "prediction_g2": 0}
-    else:
-        g1_processed = processing_g1(g1_input)
-        g2_processed = processing_g2(g2_input)
-
-    logger.info("prediction_g1: %.8f", float(g1_processed.get("prediction", 0)))
-    logger.info("prediction_g2: %.8f", float(g2_processed.get("prediction_g2", 0)))
-
-    final_g1 = post_processing_g1(g1_processed)
-    final_g2 = post_processing_g2(g2_processed)
-    final = {**final_g1, **final_g2}
-    logger.info(final)
-
-    return final
-
-# testing :
-# __main__
+  return {
+    "meta_info": [
+      {
+        "name": "created_date",
+        "type": "string",
+        "value": "2024-11-05"
+      }
+    ],
+    "fields": [
+      {
+        "name": "",
+        "type": "",
+        "value": ""
+      }
+    ]
+  }
--- a/graph_post_processing.py
+++ b/graph_post_processing.py
@ -1,55 +0,0 @@
-import logging
-import math
-from decimal import Decimal, ROUND_HALF_UP
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
-)
-logger = logging.getLogger(__name__)
-
-
-def post_processing_g1(data):
-    try:
-        prediction = data.get("prediction", 0)
-        score_g1 = round(
-            min(prediction * 100 + 0.00001, 1) * 89 +
-            max(math.log2(prediction * 100 + 0.000001) * 193, 0),
-            0
-        )
-        data["hd_score_g1"] = score_g1
-        logger.info("score_g1 calculated: %s", score_g1)
-    except Exception as e:
-        logger.error("Error processing score_g1 calculations: %s", e)
-
-    return {
-        key: data.get(key, None)
-        for key in [
-            "hd_score_m1", "hd_score_g1", "cluster_size_users_v2",
-            "target_connected_30_sum", "email_cnt", "rejected_app_count",
-            "app_dt_day_cnt", "hd_score_iso_m2"
-        ]
-    }
-
-
-def post_processing_g2(data):
-    prediction = data.get("prediction_g2", data.get("prediction"))
-    hd_score_g2 = None
-
-    try:
-        if prediction is not None:
-            prediction_val = float(prediction)
-            raw_score = (prediction_val * 100) * 20 + math.log((prediction_val + 0.000001) * 100, 2) * 41.6
-            # SQL-like rounding (half up)
-            hd_score_g2 = int(Decimal(str(raw_score)).quantize(Decimal("1"), rounding=ROUND_HALF_UP))
-            hd_score_g2 = max(hd_score_g2, 0.0)
-            logger.info("score_g2 calculated: %s", hd_score_g2)
-    except Exception as e:
-        logger.error("Error processing score_g2 calculations: %s", e)
-
-    return {"hd_score_g2": hd_score_g2}
-
-
-# Backward compatibility alias
-post_processing = post_processing_g1
--- a/graph_pre_processing.py
+++ b/graph_pre_processing.py
@ -1,91 +0,0 @@
-import logging
-import numpy as np
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
-)
-logger = logging.getLogger(__name__)
-
-G2_PREDICTORS = [
-    "hd_score_m2",
-    "rejected_app_count",
-    "hd_score_m2_connected_max",
-    "hd_score_m2_connected_avg",
-    "applicant_age_connected_max",
-    "applicant_age_connected_avg",
-    "account_tel_first_seen_min_conn",
-    "account_tel_first_seen_max_conn",
-    "account_tel_first_seen_avg_conn",
-    "ssn_hash_first_seen_min_conn",
-    "ssn_hash_first_seen_avg_conn",
-    "account_login_first_seen_min_conn",
-    "digital_id_first_seen_max_conn",
-    "true_ip_first_seen_min_conn",
-    "true_ip_first_seen_max_conn",
-    "dist_em_ip_ref_km_min_conn",
-    "pct_acc_email_attr_challenged_1_conn",
-    "account_login_first_seen_range_conn",
-    "account_login_first_seen_stddev_conn",
-    "cpu_clock_range_conn",
-    "summary_risk_score_max_conn",
-]
-
-
-def _coerce_float(value):
-    if value is None:
-        return np.nan
-    try:
-        return float(value)
-    except (TypeError, ValueError):
-        return np.nan
-
-
-def pre_processing_g1(results):
-    result = results[0]
-    dtypes = {
-        "hd_score_m1": float,
-        "cluster_size_users_v2": float,
-        "target_connected_30_sum": float,
-        "email_cnt": float,
-        "rejected_app_count": float,
-        "app_dt_day_cnt": float,
-        "hd_score_iso_m2": float
-    }
-    data = {
-        "hd_score_m1": result["hd_score_m1"],
-        "cluster_size_users_v2": result["cluster_size_users_v2"],
-        "target_connected_30_sum": result["target_connected_30_sum"],
-        "email_cnt": result["email_cnt"],
-        "rejected_app_count": result["rejected_app_count"],
-        "app_dt_day_cnt": result["app_dt_day_cnt"],
-        "cluster_size": result["cluster_size"],
-        "hd_score_iso_m2": result["hd_score_iso_m2"],
-    }
-
-    for col, dtype in dtypes.items():
-        if col in data:
-            value = str(data[col]).strip()
-            data[col] = dtype(value) if value.replace(".", "", 1).isdigit() else None
-
-    return data
-
-
-def pre_processing_g2(results):
-    result = results[0]
-    working = dict(result)
-    if "rejected_app_count_g2" in working:
-        # Always prefer the G2-specific count for G2 preprocessing
-        working["rejected_app_count"] = working.get("rejected_app_count_g2")
-
-    data = {}
-    for feature in G2_PREDICTORS:
-        data[feature] = _coerce_float(working.get(feature))
-
-    data["cluster_size"] = working.get("cluster_size")
-    return data
-
-
-# Backward compatibility alias
-pre_processing = pre_processing_g1
--- a/graph_processing.py
+++ b/graph_processing.py
@ -1,61 +0,0 @@
-import xgboost as xgb
-import pandas as pd
-import joblib
-import logging
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
-)
-logger = logging.getLogger(__name__)
-
-
-def processing_g1(data):
-    df = pd.DataFrame([data])
-    if df.empty:
-        logger.error("Input DataFrame is empty.")
-
-    # Load Model
-    # model_path = "C:/Users/abinisha/habemco_flowx/g1_v1/xgboost_model_G1.joblib"
-    model_path = "./xgboost_model_G1.joblib"
-    model = joblib.load(model_path)
-
-    expected_features = model.feature_names
-
-    df = df.applymap(lambda x: float("nan") if x is None else x)
-
-    dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=float("nan"))
-
-    prediction = model.predict(dmatrix)
-
-    df["prediction"] = prediction
-
-    return df.iloc[0].to_dict()
-
-
-def processing_g2(data):
-    df = pd.DataFrame([data])
-    if df.empty:
-        logger.error("Input DataFrame is empty.")
-
-    # model_path = "C:/Users/abinisha/habemco_flowx/g1_v1/xgboost_model_G2.joblib"
-    model_path = "./xgboost_model_G2.joblib"
-    model = joblib.load(model_path)
-
-    expected_features = model.feature_names
-
-    df = df.reindex(columns=expected_features)
-    df = df.applymap(lambda x: float("nan") if x is None else x)
-
-    dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=float("nan"))
-
-    prediction = model.predict(dmatrix)
-
-    df["prediction_g2"] = prediction
-
-    return df.iloc[0].to_dict()
-
-
-# Backward compatibility alias
-processing = processing_g1
--- a/request_schema.json
+++ b/request_schema.json
@ -1,11 +1 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema",
-  "type": "object",
-  "properties": {
-    "results": {
-      "type": ["array", "null"],
-      "items": {"type": "object"}
-    }
-  },
-  "required": []
-}
+{}
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1 @@
-joblib==1.4.2
-pandas==2.2.3
-xgboost==2.1.3
-typing==3.6.1
-
+{}
--- a/response_schema.json
+++ b/response_schema.json
@ -1,42 +1 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "type": "object",
-  "properties": {
-    "hd_score_m1": {
-      "type": ["number", "null"],
-      "description": "HD fraud Score M1"
-    },
-    "hd_score_g1": {
-      "type": ["number", "null"],
-      "description": "HD fraud Score G1"
-    },
-    "hd_score_g2": {
-      "type": ["number", "null"],
-      "description": "HD fraud Score G2"
-    },
-    "cluster_size_users_v2": {
-      "type": ["number", "null"],
-      "description": "Size of the user cluster in version 2."
-    },
-    "target_connected_30_sum": {
-      "type": ["number", "null"],
-      "description": "Sum of target connections within 30 days."
-    },
-    "email_cnt": {
-      "type": ["number", "null"],
-      "description": "Count of emails associated with the application."
-    },
-    "rejected_app_count": {
-      "type": ["number", "null"],
-      "description": "Count of rejected applications for the applicant."
-    },
-    "app_dt_day_cnt": {
-      "type": ["number", "null"],
-      "description": "Number of application days counted."
-    },
-    "hd_score_iso_m2": {
-      "type": ["number", "null"],
-      "description": "HD fraud Score M2"
-    }
-  }
-}
+{}
--- a/test_block.py
+++ b/test_block.py
@ -1,47 +0,0 @@
-import unittest
-from block import __main__
-
-
-data = [{
-            "application_key": "A3CDD39F-10F8-40B0-A4C9-0E1558B75131",
-            "hd_score_m1": 1101.0,
-            "hd_score_iso_m2": 1113,
-            "cluster_size": 10,
-            "cluster_size_users_v2": 3,
-            "target_connected_30_sum": 0.0,
-            "email_cnt": 3,
-            "rejected_app_count": 6.0,
-            "app_dt_day_cnt": 7,
-            "hd_score_m2": 1188,
-            "hd_score_m2_connected_max": 1197.0,
-            "hd_score_m2_connected_avg": 1184.888889,
-            "applicant_age_connected_max": 60.0,
-            "applicant_age_connected_avg": 52.44444444,
-            "account_tel_first_seen_min_conn": 879.0,
-            "account_tel_first_seen_max_conn": 989.0,
-            "account_tel_first_seen_avg_conn": 949.6666667,
-            "ssn_hash_first_seen_min_conn": 5.0,
-            "ssn_hash_first_seen_avg_conn": 58.0,
-            "account_login_first_seen_min_conn": 0.0,
-            "digital_id_first_seen_max_conn": 2652.0,
-            "true_ip_first_seen_min_conn": 1857.0,
-            "true_ip_first_seen_max_conn": 1967.0,
-            "dist_em_ip_ref_km_min_conn": 17.43689023,
-            "pct_acc_email_attr_challenged_1_conn": 0.0,
-            "account_login_first_seen_range_conn": 2313.0,
-            "account_login_first_seen_stddev_conn": 1042.4994,
-            "cpu_clock_range_conn": 9054.0,
-            "summary_risk_score_max_conn": 14.0
-        }]
-
-
-class TestBlock(unittest.TestCase):
-    def test_main_returns_scores(self):
-        block_result = __main__(data)
-        self.assertIsInstance(block_result, dict)
-        self.assertIn("hd_score_g1", block_result)
-        self.assertIn("hd_score_g2", block_result)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/xgboost_model.joblib
+++ b/xgboost_model.joblib
--- a/xgboost_model_G1.joblib
+++ b/xgboost_model_G1.joblib
--- a/xgboost_model_G2.joblib
+++ b/xgboost_model_G2.joblib