Advanced M series V1 model block

2025-03-12 16:12:18 +00:00 · 2025-03-12 16:12:18 +00:00 · ead9a776da
commit ead9a776da
parent 4ca7b2486f
11 changed files with 767 additions and 22 deletions
--- a/README.md
+++ b/README.md
@ -1 +1,3 @@
-**Hello world!!!**
+## Advanced M series V1 model block
 M Series Model trained on historical data to identify fraudulent patterns.
--- a/block.py
+++ b/block.py
@ -1,21 +1,174 @@
-@flowx_block
+import pandas as pd
-def example_function(request: dict) -> dict:
+import logging
 import json
 import jmespath
 import regex as re
 from pre_processing import pre_processing
 from processing import processing
 from post_processing import post_processing
-  # Processing logic here...
+# Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
 )
 logger = logging.getLogger(__name__)
-  return {
+
-    "meta_info": [
+def extract_value(blob, expression):
-      {
+    try:
-        "name": "created_date",
+        return jmespath.search(expression, blob)
-        "type": "string",
+    except Exception:
-        "value": "2024-11-05"
+        return None
-      }
+
 # Coalesce function to return the first non-None value
 def coalesce(*args):
    for value in args:
        if value is not None:
            return value
    return None
 # New sanitize blob function
 def sanitize_blob(blob):
    try:
        blob = re.sub(r'"(\w+)":"(\{[^}]+\})"', r'"\1":\2', blob)
        blob = re.sub(r'"tps_vendor_raw_response"\s*:\s*"\?\{', '"tps_vendor_raw_response":{', blob)
        blob = blob.replace('\\"', '"')
        blob = blob.replace('\\n', '')
        blob = blob.replace('\\t', '')
        blob = blob.replace('\\\\', '')
        blob = re.sub(r'(\}\})"', r'\1', blob)
        blob = re.sub(r',\s*([\}\]])', r'\1', blob)
        return json.loads(blob)
    except json.JSONDecodeError as e:
        logger.error(f"JSON Decode Error: {e}")
        error_pos = e.pos
        snippet = blob[max(0, error_pos - 50): error_pos + 50]
        logger.error(f"Error near:\n{snippet}")
        return None
 #---------------- Sanitise ends here 
 # Function to extract a value using JMESPath
 # Expressions to extract values
 expressions = {
    "first_seen_days": [
        "tps_vendor_raw_response.query.results[0].first_seen_days",
        "emailage.emailriskscore.first_seen_days"
    ],
-    "fields": [
+    "ea_score": [
-      {
+        "tps_vendor_raw_response.query.results[0].EAScore",
-        "name": "",
+        "emailage.emailriskscore.eascore"
-        "type": "",
+    ],
-        "value": ""
+    "email_creation_days": [
-      }
+        "tps_vendor_raw_response.query.results[0].email_creation_days"
-    ]
+    ],
-  }
+    "summary_risk_score": ["summary_risk_score"],
    "digital_id_trust_score_rating": ["digital_id_trust_score_rating"],
    "os_version": ["os_version"],
    "account_email_worst_score": ["account_email_worst_score"],
    "true_ip_score": ["true_ip_score"],
    "ip_net_speed_cell": [
        "tps_vendor_raw_response.query.results[0].ip_netSpeedCell",
        # "true_ip_connection_type"
    ],
    "account_email_score": ["account_email_score"],
    "true_ip_worst_score": ["true_ip_worst_score"],
    "proxy_ip_worst_score": ["proxy_ip_worst_score"],
    "proxy_ip_score": ["proxy_ip_score"],
    "fuzzy_device_score": ["fuzzy_device_score"],
    "ip_region_confidence": ["tps_vendor_raw_response.query.results[0].ip_regionconf"],
    "true_ip_state_confidence": ["true_ip_state_confidence"],
    "fuzzy_device_worst_score": ["fuzzy_device_worst_score"],
    "digital_id_confidence_rating": ["digital_id_confidence_rating"]
 }
 def __main__(
    #Application->
    application_key: str,
    application_timestamp: str,
    application_ssn : str,
    application_email_address: str,
    application_bank_account_number: str,
    application_is_rejected: str,
    application_date_of_birth: str,
    #uprovaloanapplication->
    educationlevel:str,
    employmentstatus: str,
    lengthatbank: str,
    lengthatjob: str,
    ownhome: str,
    payfrequency: str,
    monthsatresidence: str,
    #thxresponse->
    EventType: str,
    DigitalIdConfidence: str,
    RiskRating: str,
    TmxSummaryReasonCode: str,
    TrueIpGeo: str,
    Blob:str,
    DeviceId:str,
    FuzzyDeviceId: str
    ) -> dict:
    # Convert input parameters into a flat dictionary
    data = {
        "application_key" : application_key,
        "application_timestamp" : application_timestamp,
        "application_ssn " : application_ssn ,
        "application_email_address" : application_email_address,
        "application_bank_account_number" : application_bank_account_number,
        "application_is_rejected" : application_is_rejected,
        "application_date_of_birth" : application_date_of_birth,
        "educationlevel" : educationlevel,
        "employmentstatus" : employmentstatus,
        "lengthatbank" : lengthatbank,
        "lengthatjob" : lengthatjob,
        "ownhome" : ownhome,
        "payfrequency" : payfrequency,
        "monthsatresidence" : monthsatresidence,
        "EventType" : EventType,
        "DigitalIdConfidence" : DigitalIdConfidence,
        "RiskRating" : RiskRating,
        "TmxSummaryReasonCode" : TmxSummaryReasonCode,
        "TrueIpGeo" : TrueIpGeo,
        "Blob" : Blob,
        "DeviceId" : DeviceId,
        "FuzzyDeviceId" : FuzzyDeviceId
    }
    # Convert dictionary to a single-row DataFrame
    combined_df = pd.DataFrame([data])
    combined_df.columns = combined_df.columns.str.lower()
    combined_df["application_email_address"] = combined_df["application_email_address"].str.lower()
    if Blob:
        combined_df["blob"] = combined_df["blob"].apply(sanitize_blob)
        # Step 2: Extract values using the expressions dictionary
        for column, expressions_list in expressions.items():
            combined_df[column] = combined_df["blob"].apply(lambda x: coalesce(*[extract_value(x, expr) for expr in expressions_list]))
        logger.info("pre_flowx data")
        logger.info(combined_df.iloc[0].drop('blob').to_dict())
    else:
        for column, expressions_list in expressions.items():
            combined_df[column] = None
        logger.info("pre_flowx data")
        logger.info(combined_df.iloc[0].to_dict())
    pre_processed_data = pre_processing(combined_df)
    # logger.info(f"pre_processed_data: {pre_processed_data}")
    logger.info("pre_processed data")
    logger.info(pre_processed_data.iloc[0].to_dict())
    df = processing(pre_processed_data)
    logger.info("procesed_data")
    logger.info(df.iloc[0].to_dict())
    df["application_timestamp"] = df["application_timestamp"].astype(str)
    # logger.info("prediction: %.8f", float(df['prediction'].iloc[0]))
    result = post_processing(df)
    # logger.info("Score: %.0f", float(result["hd_score_m1"]))
    logger.info(result)
    return result
 # testing :
 # __main__
--- a/category_orders_train.json
+++ b/category_orders_train.json
@ -0,0 +1,88 @@
 {
 "employmentstatus": [
        "disability",
        "fixed income",
        "full time employed",
        "other",
        "part time employment",
        "retired benefits",
        "self employed",
        "student",
        "unemployed",
        "welfare"
    ],
    "TrueIpGeo": [
        "other",
        "us"
    ],
    "digital_id_trust_score_rating": [
        "high",
        "low",
        "neutral",
        "very_high",
        "very_low"
    ],
    "educationlevel": [
        "associate's degree",
        "bachelor's degree",
        "doctorate",
        "high school",
        "master's degree",
        "other"
    ],
    "os_version": [
        "10",
        "11",
        "12",
        "13",
        "14",
        "15",
        "16",
        "17",
        "18",
        "8",
        "9",
        "unknown"
    ],
    "ip_net_speed_cell": [
        "broadband",
        "cable",
        "dialup",
        "dsl",
        "fixed wireless",
        "mobile",
        "mobile wireless",
        "ocx",
        "satellite",
        "t1",
        "tx",
        "wireless",
        "xdsl"
    ],
    "day_night": [
        "Day",
        "Night"
    ],
    "digital_id_confidence_rating": [
        "high",
        "medium",
        "very_high",
        "very_low"
    ],
    "RiskRating": [
        "high",
        "low",
        "medium",
        "neutral",
        "trusted"
    ],
    "payfrequency": [
        "biweekly",
        "semimonthly"
    ],
    "ownhome": [
        "false",
        "true"
    ]
 }
--- a/post_processing.py
+++ b/post_processing.py
@ -0,0 +1,25 @@
 import logging
 import numpy as np
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
 )
 logger = logging.getLogger(__name__)
 def post_processing(df):
    try:
        df['hd_score_m1'] = np.round(
            np.minimum(df['prediction'] * 100 + 0.00001, 1) * 85 +
            np.maximum(np.log2(df['prediction'] * 100 + 0.000001) * 185, 0),
            0
        )
        logging.info(f"hd_score_m1 calculated: {df['hd_score_m1'].iloc[0]}")
    except Exception as e:
        logging.error(f"Error processing hd_score_m1 calculations: {e}")
    return df[['application_key', 'application_timestamp', 'deviceid', 'fuzzydeviceid', 'application_email_address', 'hd_score_m1']].iloc[0].to_dict()
--- a/pre_processing.py
+++ b/pre_processing.py
@ -0,0 +1,254 @@
 import pandas as pd
 import numpy as np
 import logging
 # Configure logging
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
 )
 logger = logging.getLogger(__name__)
 def pre_processing(data_df):
    # combined_df = pd.DataFrame([input_data])
    # data = pd.DataFrame(data)
    combined_df = data_df
    combined_df["applicant_age"] = combined_df.apply(lambda row: pd.to_datetime(row["application_timestamp"]).year - pd.to_datetime(row["application_date_of_birth"]).year if pd.notnull(row["application_timestamp"]) and pd.notnull(row["application_date_of_birth"]) else None,axis=1
    )
    # Extracting Temporal features 
    combined_df['application_timestamp'] = pd.to_datetime(combined_df["application_timestamp"])
    combined_df.loc[:, 'application_time'] = pd.to_datetime(combined_df['application_timestamp']).dt.time
    combined_df['day'] = combined_df['application_timestamp'].dt.day
    combined_df['day_of_week'] = combined_df['application_timestamp'].dt.weekday  # 0=Monday, 6=Sunday
    combined_df['day_sin'] = np.sin(2 * np.pi * combined_df['day'] / 31)
    combined_df['day_cos'] = np.cos(2 * np.pi * combined_df['day'] / 31)
    combined_df['day_of_week_sin'] = np.sin(2 * np.pi * combined_df['day_of_week'] / 7)
    combined_df['day_of_week_cos'] = np.cos(2 * np.pi * combined_df['day_of_week'] / 7)
    # combined_df['is_weekend'] = combined_df['day_of_week'].apply(lambda x: 1 if x >= 5 else 0)
    # Create a day/night variable
    def classify_day_night(hour):
        if 6 <= hour < 18:
            return 'Day'
        else:
            return 'Night'
    # Extract hour from application_time
    combined_df['hour'] = combined_df['application_time'].apply(lambda x: x.hour if pd.notnull(x) else np.nan)
    combined_df['day_night'] = combined_df['hour'].apply(lambda hour: classify_day_night(hour) if pd.notnull(hour) else 'Unknown')
    # combined_df['os_version'] = combined_df['os_version'].str.replace(r'[^a-zA-Z0-9]', '_', regex=True)
    combined_df['os_version'] = combined_df['os_version'].apply(lambda x: x.split('.')[0] if isinstance(x, str) and '.' in x 
                                                            else x.split('_')[0] if isinstance(x, str) and '_' in x 
                                                            else x)
    # Datatype conversions 
    # combined_df['Level_1_Link_Accept'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Level_1_Link_Accept', na=False, regex=True).astype(int)
    combined_df['Identity_Negative_History'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Identity_Negative_History', na=False, regex=True).astype(int)
    combined_df['Device_Negative_History'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Device_Negative_History', na=False, regex=True).astype(int)
    combined_df['Level_1_Link_Reject'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Level_1_Link_Reject', na=False, regex=True).astype(int)
    combined_df['IP_Negative_History'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('IP_Negative_History', na=False, regex=True).astype(int)
    combined_df['Identity_Spoofing'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Identity_Spoofing', na=False, regex=True).astype(int)
    # combined_df['Bot'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Bot', na=False, regex=True).astype(int)
    combined_df['digitalidconfidence'] = pd.to_numeric(combined_df['digitalidconfidence'], errors='coerce').astype('Int64')
    # Rename Columns if Required 
    combined_df.rename(columns={
        'DigitalIdConfidence': 'digitalidconfidence',
        # 'inputipaddress_consistency': 'inputip_consistency',
        # 'requestid_consistency': 'request_consistency',
        # Add others as required if present in your DataFrame and needing renaming.
    }, inplace=True)
    # #Testing : remove below
    # combined_df.to_csv('op-pre-processing_intermediate.csv', index=False)
    dtype_dict = {
        "applicant_age" :  int,
        "digitalidconfidence" :  float,
        "first_seen_days" :  float,
        "employmentstatus" :  str,
        "ea_score" :  float,
        "trueipgeo" :  str,
        "hour" :  int,
        "email_creation_days" :  float,
        "lengthatjob" :  float,
        "day_cos" :  float,
        "summary_risk_score" :  float,
        "digital_id_trust_score_rating" :  str,
        "day" :  'int32',
        "lengthatbank" :  float,
        "day_of_week_cos" :  float,
        "Level_1_Link_Reject" :  int,
        "Identity_Negative_History" :  int,
        "educationlevel" :  str,
        "os_version" :  str,
        "account_email_worst_score" :  float,
        "true_ip_score" :  float,
        "ip_net_speed_cell" :  str,
        "account_email_score" :  float,
        "day_of_week" :  'int32',
        "true_ip_worst_score" :  float,
        "proxy_ip_worst_score" :  float,
        "day_night" :  str,
        "proxy_ip_score" :  float,
        "monthsatresidence" :  float,
        "Device_Negative_History" :  int,
        "fuzzy_device_score" :  float,
        "day_sin" :  float,
        "ip_region_confidence" :  float,
        "true_ip_state_confidence" :  float,
        "IP_Negative_History" :  int,
        "fuzzy_device_worst_score" :  float,
        "digital_id_confidence_rating" :  str,
        "day_of_week_sin" :  float,
        "riskrating" :  str,
        "payfrequency" :  str,
        "ownhome" :  str,
        "Identity_Spoofing" :  int
    }
    next_block_cols = ['application_key', 'application_timestamp', 'deviceid', 'fuzzydeviceid', 'application_email_address']
    cols_to_keep = [col for col in dtype_dict.keys() if col in combined_df.columns]
    final_cols = list(set(next_block_cols).union(set(cols_to_keep)))
    # Type casting
    for col, dtype in dtype_dict.items():
        if col in combined_df.columns:
            if dtype == int:
                combined_df[col] = pd.to_numeric(combined_df[col], errors='coerce', downcast='integer')
            elif dtype == float:
                combined_df[col] = pd.to_numeric(combined_df[col], errors='coerce', downcast='float')
            elif dtype == str:
                combined_df[col] = combined_df[col].astype(str)
    # cross check data type 
    capping_dict = {
        "applicant_age": (18, 93),
        "digitalidconfidence": (0, 9017),
        "first_seen_days": (0, 10486),
        "ea_score": (1, 930),
        "hour": (0, 23),
        "email_creation_days": (2438, 9661),
        "lengthatjob": (1, 24),
        "day_cos": (-0.9948693234, 1),
        "summary_risk_score": (-100, 30),
        "day": (1, 31),
        "lengthatbank": (0, 25),
        "day_of_week_cos": (-0.9009688679, 1),
        "Level_1_Link_Reject": (0, 1),
        "Identity_Negative_History": (0, 1),
        "account_email_worst_score": (-52, 0),
        "true_ip_score": (-38, 49),
        "account_email_score": (-18, 9),
        "day_of_week": (0, 6),
        "true_ip_worst_score": (-100, 0),
        "proxy_ip_worst_score": (-100, 0),
        "proxy_ip_score": (-29, 60),
        "monthsatresidence": (0, 25),
        "Device_Negative_History": (0, 1),
        "fuzzy_device_score": (-29, 14),
        "day_sin": (-0.9987165072, 0.9987165072),
        "ip_region_confidence": (75, 99),
        # "true_ip_state_confidence": (5, 98),
        "IP_Negative_History": (0, 1),
        "fuzzy_device_worst_score": (-100, 0),
        "day_of_week_sin": (-0.9749279122, 0.9749279122),
        "Identity_Spoofing": (0, 1),
    }
    # Apply capping
    for column, (cap_min, cap_max) in capping_dict.items():
        if column in combined_df.columns:
            combined_df[column] = combined_df[column].clip(lower=cap_min, upper=cap_max)
    def handle_unknowns(X, column, known_values, default_treatment=None):
        if column not in X.columns:
            return X  #  Return X to avoid NoneType error
        known_values = {str(val).lower() for val in known_values}
        invalid_values = {None, "none", "nan", pd.NA}
        X[column] = X[column].apply(
            lambda x: str(x).lower() if pd.notna(x) and str(x).lower() in known_values
            else (default_treatment if pd.notna(x) and str(x).lower() not in invalid_values else np.nan)
        )
        return X  #  Always return the DataFrame
    unknown_treatments = {
        "employmentstatus": {
            "valid_values": [
                "disability", "fixed income", "full time employed", "part time employment",
                "retired benefits", "self employed", "student", "unemployed", "welfare"
            ],
            "default_treatment": "other"
        },
        "trueipgeo": {
            "valid_values": ["US"],
            "default_treatment": "other"
        },
        "digital_id_trust_score_rating": {
            "valid_values": ["very_high", "high", "neutral", "low"],
            "default_treatment": "very_low"
        },
        "educationlevel": {
            "valid_values": ["associate's degree", "bachelor's degree", "doctorate", "high school", "master's degree"],
            "default_treatment": "other"
        },
        "os_version": {
            "valid_values": [
                '18', '17', '16', '15', '14', '13', '12', '11', '10', '9', '8'
            ],
            "default_treatment": 'unknown'
        },
        "ip_net_speed_cell": {
            "valid_values": [
                "broadband", "cable", "dialup", "dsl", "fixed wireless", "mobile", "mobile wireless", "ocx", "satellite",
                "t1", "tx", "wireless", "xdsl"
            ],
            "default_treatment": "mobile"
        },
        "digital_id_confidence_rating": {
            "valid_values": ["high", "medium", "very_high"],
            "default_treatment": "very_low"
        },
        "riskrating": {
            "valid_values": ["low", "medium", "neutral", "trusted"],
            "default_treatment": "high"
        },
        "ownhome": {
            "valid_values": ["true", "false"],
            "default_treatment": np.nan
        },
    }
    for column, treatment in unknown_treatments.items():
        combined_df = handle_unknowns(combined_df, column, treatment["valid_values"], treatment["default_treatment"]) 
    payfrequency_map = {
        "biweekly": ["biweekly", "bi-weekly", "bi weekly", "bw"],
        "semimonthly": ["semi-monthly", "semimonthly"]
    }
    combined_df['payfrequency'] = combined_df['payfrequency'].apply(
        lambda x: next((key for key, values in payfrequency_map.items() if str(x).lower() in values), np.nan)
    )
    return  combined_df[final_cols]
--- a/processing.py
+++ b/processing.py
@ -0,0 +1,46 @@
 import pandas as pd
 import numpy as np 
 import xgboost as xgb
 import joblib
 import json
 def processing(input_data):
    df = pd.DataFrame(input_data)
    # Load Model
    model_path = "./xgboost_model.joblib"
    # model_path = "C:/Users/abinisha/habemco_flowx/m1_v1/xgboost_model.joblib"
    model = joblib.load(model_path)
    df.rename(columns={'riskrating': 'RiskRating', 'trueipgeo': 'TrueIpGeo'}, inplace=True)
    # Load Category Orders
    category_orders_path ="./category_orders_train.json"
    # category_orders_path = "C:/Users/abinisha/habemco_flowx/m1_v1/category_orders_train.json"
    with open(category_orders_path, 'r') as f:
        category_orders = json.load(f)
    if df.empty:
        raise ValueError("Input DataFrame is empty.")
    # Ensure all expected features exist
    expected_features = model.feature_names
    for col, categories in category_orders.items():
        df[col].replace([None, "", "null", np.nan, "nan", " "], np.nan, inplace=True)
        df[col] = pd.Categorical(df[col], categories=categories, ordered=True)
    # missing_features = [feature for feature in expected_features if feature not in df.columns]
    # for feature in missing_features:
    #     df[feature] = np.nan  # Use NaN to avoid dtype issues
    # Create XGBoost DMatrix
    dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=np.nan)
    # Make predictions
    predictions = model.predict(dmatrix)
    df['prediction'] = predictions
    return df
--- a/request_schema.json
+++ b/request_schema.json
@ -1 +1,95 @@
-{}
+{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "type": "object",
  "properties": {
    "application_key": {
      "type": ["string", "null"],
      "description": "Unique identifier for the application."
    },
    "application_timestamp": {
      "type": ["string", "null"],
      "description": "Timestamp when the application was submitted in UTC."
    },
    "application_ssn": {
      "type": ["string", "null"],
      "description": "Social Security Number of the applicant."
    },
    "application_email_address": {
      "type": ["string", "null"],
      "description": "Email address of the applicant."
    },
    "application_bank_account_number": {
      "type": ["string", "null"],
      "description": "Bank account number of the applicant."
    },
    "application_is_rejected": {
      "type": ["boolean", "null"],
      "description": "Indicates whether the application was rejected."
    },
    "application_date_of_birth": {
      "type": ["string", "null"],
      "description": "Date of birth of the applicant."
    },
    "EventType": {
      "type": ["string", "null"],
      "description": "Type of event associated with the application."
    },
    "RiskRating": {
      "type": ["string", "null"],
      "description": "Risk rating assigned to the application."
    },
    "TmxSummaryReasonCode": {
      "type": ["string", "null"],
      "description": "Reason code summary from third-party risk assessment."
    },
    "DigitalIdConfidence": {
      "type": ["string", "null"],
      "description": "Confidence score for the digital identity of the applicant."
    },
    "TrueIpGeo": {
      "type": ["string", "null"],
      "description": "Geolocation information of the true IP address used in the application."
    },
    "Blob": {
      "type": ["string", "null"],
      "description": "Raw data blob containing additional information related to the application."
    },
    "DeviceId": {
      "type": ["string", "null"],
      "description": "Unique identifier for the device used to submit the application."
    },
    "FuzzyDeviceId": {
      "type": ["string", "null"],
      "description": "Hashed or partially anonymized identifier for the device."
    },
    "ownhome": {
      "type": ["boolean", "null"],
      "description": "Indicates whether the applicant owns a home."
    },
    "employmentstatus": {
      "type": ["string", "null"],
      "description": "Employment status of the applicant."
    },
    "lengthatjob": {
      "type": ["number", "null"],
      "description": "Length of time (in months) the applicant has been at their current job."
    },
    "payfrequency": {
      "type": ["string", "null"],
      "description": "Frequency of pay for the applicant (e.g., weekly, biweekly, monthly)."
    },
    "lengthatbank": {
      "type": ["string", "null"],
      "description": "Length of time the applicant has been with their bank."
    },
    "educationlevel": {
      "type": ["string", "null"],
      "description": "Highest level of education attained by the applicant."
    },
    "monthsatresidence": {
      "type": ["number", "null"],
      "description": "Number of months the applicant has lived at their current residence."
    }
  },
  "required": []
 }
--- a/requirements.txt
+++ b/requirements.txt
@ -1 +1,6 @@
-{}
+pandas == 2.2.3
 numpy == 2.2.3
 xgboost == 2.1.4
 joblib == 1.4.2
 jmespath == 1.0.1
 regex == 2023.12.25
--- a/response_schema.json
+++ b/response_schema.json
@ -1 +1,34 @@
-{}
+{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "type": "object",
  "properties": {
    "application_key": {
      "type": ["string", "null"],
      "description": "Application Key"
    },
    "application_timestamp": {
      "type": ["string", "null"],
      "description": "Application Timestamp"
    },
    "deviceid": {
      "type": ["string", "null"],
      "description": "Deviceid"
    },
    "fuzzydeviceid": {
      "type": ["string", "null"],
      "description": "Fuzzy Deviceid"
    },
    "application_email_address": {
      "type": ["string", "null"],
      "description": "Application Email Address"
    },
    "hd_score_m1": {
      "type": ["number", "null"],
      "description": "HD Fraud Score M1"
    },
    "action": {
      "type": ["string", "null"],
      "description": "Recommended Action."
    }
  }
 }
--- a/test_block.py
+++ b/test_block.py
--- a/xgboost_model.joblib
+++ b/xgboost_model.joblib
`@ -1 +1,3 @@`
	`Hello world!!!`	`## Advanced M series V1 model block`

		`M Series Model trained on historical data to identify fraudulent patterns.`