Sync m-1-v-1 block with local updates

2025-11-23 23:22:32 -05:00 · 2025-11-23 23:22:32 -05:00 · d0f4d225ee
commit d0f4d225ee
parent 1bf55226e1
14 changed files with 29412 additions and 311 deletions
--- a/block.py
+++ b/block.py
@ -1,12 +1,12 @@
-import pandas as pd
 import logging
-import json
+
 import jmespath
-import regex as re
-from pre_processing import pre_processing
-from processing import processing
-from post_processing import post_processing
 import json_repair
+import pandas as pd
+import regex as re
+from pre_processing import pre_processing_all
+from processing import processing_all
+from post_processing import post_processing_all


 # Configure logging
@ -16,7 +16,7 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)

-_JSON_LIKE = re.compile(r'^\s*\?*[\{\[].*[\}\]]\s*$', re.DOTALL)
+_JSON_LIKE = re.compile(r"^\s*\?*[\{\[].*[\}\]]\s*$", re.DOTALL)


 def extract_value(blob, expression):
@ -25,14 +25,13 @@ def extract_value(blob, expression):
    except Exception:
        return None

+
 def coalesce(*args):
    for value in args:
        if value is not None:
            return value
    return None

-# New sanitize blob function
-

 def deep_repair(obj):
    # 1) If it's a string that *looks* like JSON (with or without one leading '?'),
@ -40,18 +39,17 @@ def deep_repair(obj):
    if isinstance(obj, str):
        s = obj.strip()
        if _JSON_LIKE.match(s):
-            # strip one leading '?' if present
-            if s.startswith('?'):
+            if s.startswith("?"):
                s = s[1:]
            parsed = json_repair.loads(s)
            return deep_repair(parsed)
        return obj

-    # 2) Dict → recurse on each value
+    # 2) Dict – recurse on each value
    if isinstance(obj, dict):
        return {k: deep_repair(v) for k, v in obj.items()}

-    # 3) List → recurse on each element
+    # 3) List – recurse on each element
    if isinstance(obj, list):
        return [deep_repair(v) for v in obj]

@ -66,34 +64,21 @@ def sanitize_blob(blob):
        logger.error("Failed to sanitize blob: %s", e)
        return None

-# Expressions to extract values
+# Expressions to extract values (M1 + added M2 fields)
 expressions = {
+    # M1 (existing)
    "first_seen_days": [
-        # 1) any vendor under integration_hub_results → first_seen_days
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].first_seen_days)[0]",
-
-        # 2) the flat “dotted” key
-        "Blob.\"emailage.emailriskscore.first_seen_days\"",
-
-        # 3) fallback to the top level tps_vendor_raw_response path
+        'Blob."emailage.emailriskscore.first_seen_days"',
        "Blob.tps_vendor_raw_response.query.results[0].first_seen_days",
    ],
    "ea_score": [
-        # 1) any vendor under integration_hub_results
-        'Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].EAScore',
-
-        # 2) the flat “dotted” key
+        "Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].EAScore",
        'Blob."emailage.emailriskscore.eascore"',
-
-        # 3) fallback to the top level tps_vendor_raw_response
-        'Blob.tps_vendor_raw_response.query.results[0].EAScore',
+        "Blob.tps_vendor_raw_response.query.results[0].EAScore",
    ],
    "email_creation_days": [
-        # 1) any vendor under integration_hub_results → results[0].email_creation_days
-        "(Blob.integration_hub_results.*"
-        ".tps_vendor_raw_response.query.results[0].email_creation_days)[0]",
-
-        # 2) fallback to the top level tps_vendor_raw_response path
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].email_creation_days)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].email_creation_days",
    ],
    "summary_risk_score": ["Blob.summary_risk_score"],
@ -102,11 +87,7 @@ expressions = {
    "account_email_worst_score": ["Blob.account_email_worst_score"],
    "true_ip_score": ["Blob.true_ip_score"],
    "ip_net_speed_cell": [
-        # 1) any vendor under integration_hub_results → results[0].ip_netSpeedCell
-        "(Blob.integration_hub_results.*"
-        ".tps_vendor_raw_response.query.results[0].ip_netSpeedCell)[0]",
-
-        # 2) fallback to the top level tps_vendor_raw_response path
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_netSpeedCell)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].ip_netSpeedCell",
    ],
    "account_email_score": ["Blob.account_email_score"],
@ -115,17 +96,105 @@ expressions = {
    "proxy_ip_score": ["Blob.proxy_ip_score"],
    "fuzzy_device_score": ["Blob.fuzzy_device_score"],
    "ip_region_confidence": [
-        # 1) any vendor under integration_hub_results → results[0].ip_regionconf
-        "(Blob.integration_hub_results.*"
-        ".tps_vendor_raw_response.query.results[0].ip_regionconf)[0]",
-
-        # 2) fallback to the top level tps_vendor_raw_response path
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_regionconf)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].ip_regionconf",
    ],
    "true_ip_state_confidence": ["Blob.true_ip_state_confidence"],
    "fuzzy_device_worst_score": ["Blob.fuzzy_device_worst_score"],
    "digital_id_confidence_rating": ["Blob.digital_id_confidence_rating"],
-    "trueipgeo": ["TrueIpGeo","Blob.true_ip_geo"],
+    "trueipgeo": ["TrueIpGeo", "Blob.true_ip_geo"],
+    # M2 additions
+    "policy_score": ["Blob.policy_score"],
+    "digital_id_trust_score": ["Blob.digital_id_trust_score"],
+    "proxy_score": ["Blob.proxy_score"],
+    "browser_spoof_score": ["Blob.browser_spoof_score"],
+    "input_ip_connection_type": ["Blob.input_ip_connection_type"],
+    "fuzzy_device_id_confidence": ["Blob.fuzzy_device_id_confidence"],
+    "fraudrisk": [
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].fraudRisk)[0]",
+        "Blob.tps_vendor_raw_response.query.results[0].fraudRisk",
+        'Blob."emailage.emailriskscore.fraudRisk"',
+    ],
+    "overalldigitalidentityscore": [
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].overallDigitalIdentityScore)[0]",
+        "Blob.tps_vendor_raw_response.query.results[0].overallDigitalIdentityScore",
+        'Blob."emailage.emailriskscore.overallDigitalIdentityScore"',
+    ],
+    "totalhits": [
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].totalhits)[0]",
+        "Blob.tps_vendor_raw_response.query.results[0].totalhits",
+        'Blob."emailage.emailriskscore.totalhits"',
+    ],
+    "uniquehits": [
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].uniquehits)[0]",
+        "Blob.tps_vendor_raw_response.query.results[0].uniquehits",
+        'Blob."emailage.emailriskscore.uniquehits"',
+    ],
+    "emailtofullnameconfidence": [
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].emailToFullNameConfidence)[0]",
+        "Blob.tps_vendor_raw_response.query.results[0].emailToFullNameConfidence",
+        'Blob."emailage.emailriskscore.emailToFullNameConfidence"',
+    ],
+    "emailtolastnameconfidence": [
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].emailToLastNameConfidence)[0]",
+        "Blob.tps_vendor_raw_response.query.results[0].emailToLastNameConfidence",
+        'Blob."emailage.emailriskscore.emailToLastNameConfidence"',
+    ],
+    "domain_creation_days": [
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].domain_creation_days)[0]",
+        "Blob.tps_vendor_raw_response.query.results[0].domain_creation_days",
+        'Blob."emailage.emailriskscore.domain_creation_days"',
+    ],
+    "iptophoneconfidence": [
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ipToPhoneConfidence)[0]",
+        "Blob.tps_vendor_raw_response.query.results[0].ipToPhoneConfidence",
+        'Blob."emailage.emailriskscore.ipToPhoneConfidence"',
+    ],
+    "di_autofill_count_login": [
+        "Blob.tmx_variables.di_autofill_count_login",
+        "Blob.policy_details_api.policy_detail_api.customer.rules.vars.variable.di_autofill_count_login",
+    ],
+    "accphone_gbl_velocity_hour": [
+        "Blob.tmx_variables.accphone_gbl_velocity_hour",
+        "Blob.tmx_variables._accphone_gbl_velocity_hour",
+    ],
+    # Lat/long fields for distance engineering
+    "ip_latitude": [
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_latitude)[0]",
+        "Blob.tps_vendor_raw_response.query.results[0].ip_latitude",
+    ],
+    "ip_longitude": [
+        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_longitude)[0]",
+        "Blob.tps_vendor_raw_response.query.results[0].ip_longitude",
+    ],
+    "tps_ip_latitude": ["Blob.tps_vendor_raw_response.query.results[0].ip_latitude"],
+    "tps_ip_longitude": ["Blob.tps_vendor_raw_response.query.results[0].ip_longitude"],
+    "true_ip_latitude": ["Blob.true_ip_latitude"],
+    "true_ip_longitude": ["Blob.true_ip_longitude"],
+    "proxy_ip_latitude": ["Blob.proxy_ip_latitude"],
+    "proxy_ip_longitude": ["Blob.proxy_ip_longitude"],
+    "dns_ip_latitude": ["Blob.dns_ip_latitude"],
+    "dns_ip_longitude": ["Blob.dns_ip_longitude"],
+    "input_ip_latitude": ["Blob.input_ip_latitude"],
+    "input_ip_longitude": ["Blob.input_ip_longitude"],
+    # First-seen timestamps for age deltas
+    "digital_id_first_seen": ["Blob.digital_id_first_seen"],
+    "account_email_first_seen": ["Blob.account_email_first_seen"],
+    "account_login_first_seen": ["Blob.account_login_first_seen"],
+    "account_telephone_first_seen": ["Blob.account_telephone_first_seen"],
+    "true_ip_first_seen": ["Blob.true_ip_first_seen"],
+    "ssn_hash_first_seen": ["Blob.ssn_hash_first_seen"],
+    "fuzzy_device_first_seen": ["Blob.fuzzy_device_first_seen"],
+    "national_id_first_seen": ["Blob.national_id_first_seen"],
+    "proxy_ip_first_seen": ["Blob.proxy_ip_first_seen"],
+    # Attribute arrays (used for one-hot style parsing)
+    "account_name_activities": ["Blob.account_name_activities"],
+    "account_email_attributes": ["Blob.account_email_attributes"],
+    "true_ip_attributes": ["Blob.true_ip_attributes"],
+    "true_ip_activities": ["Blob.true_ip_activities"],
+    "digital_id_attributes": ["Blob.digital_id_attributes"],
+    "account_telephone_attributes": ["Blob.account_telephone_attributes"],
+    "cpu_clock": ["Blob.cpu_clock"]
 }


@ -156,9 +225,9 @@ def __main__(
    TrueIpGeo: str,
    Blob: str,
    DeviceId: str,
-    FuzzyDeviceId: str
+    FuzzyDeviceId: str,
+    ReasonCode: str,
 ) -> dict:
-
    # Convert input parameters into a flat dictionary
    data = {
        "application_key": application_key,
@ -184,49 +253,82 @@ def __main__(
        "TrueIpGeo": TrueIpGeo,
        "Blob": Blob,
        "DeviceId": DeviceId,
-        "FuzzyDeviceId": FuzzyDeviceId
+        "FuzzyDeviceId": FuzzyDeviceId,
+        "ReasonCode": ReasonCode,
    }

    # Convert dictionary to a single-row DataFrame
    combined_df = pd.DataFrame([data])
    combined_df.columns = combined_df.columns.str.lower()
+    
+    # Uncomment Below For Testing using Uprova Batch Data 
+    # combined_df["educationlevel"] = None
+    # combined_df["monthsatresidence"] = None
+    # combined_df["ownhome"] = False
+    # combined_df['lengthatbank'] = 0
+
    combined_df["application_email_address"] = combined_df["application_email_address"].str.lower()
    if Blob:
        combined_df["blob"] = combined_df["blob"].apply(sanitize_blob)

        # Step 2: Extract values using the expressions dictionary
        for column, expressions_list in expressions.items():
-            combined_df[column] = combined_df["blob"].apply(lambda x: coalesce(
-                *[extract_value(x, expr) for expr in expressions_list]))
+            def _extract_with_fallback(blob_obj):
+                values = []
+                for expr in expressions_list:
+                    val = extract_value(blob_obj, expr)
+                    if val is None and isinstance(expr, str) and expr.startswith("Blob."):
+                        val = extract_value(blob_obj, expr[len("Blob.") :])
+                    values.append(val)
+                return coalesce(*values)

-        logger.info("pre_flowx data")
-        logger.info(combined_df.iloc[0].drop('blob').to_dict())
+            extracted = combined_df["blob"].apply(_extract_with_fallback)
+            if column in combined_df.columns:
+                combined_df[column] = extracted.where(extracted.notnull(), combined_df[column])
            else:
-        for column, expressions_list in expressions.items():
+                combined_df[column] = extracted
+
+        # logger.info("pre_flowx data")
+        # logger.info(combined_df.iloc[0].drop("blob").to_dict())
+    else:
+        for column in expressions:
            combined_df[column] = None
-        logger.info("pre_flowx data")
-        logger.info(combined_df.iloc[0].to_dict())
-    pre_processed_data = pre_processing(combined_df)
-    # logger.info(f"pre_processed_data: {pre_processed_data}")
-    logger.info("pre_processed data")
-    logger.info(pre_processed_data.iloc[0].to_dict())
-    df = processing(pre_processed_data)
-    logger.info("processed_data")
-    logger.info(df.iloc[0].to_dict())
-    df["application_timestamp"] = df["application_timestamp"].astype(str)
-    # logger.info("prediction: %.8f", float(df['prediction'].iloc[0]))
-    result = post_processing(df)
-    logger.info("post_processed_data")
-    logger.info(result)
+        # logger.info("pre_flowx data")
+        # logger.info(combined_df.iloc[0].to_dict())
+    df_m1, df_m2, df_thx = pre_processing_all(combined_df)
+    # logger.info("pre_processed data m1")
+    # logger.info(df_m1.iloc[0].to_dict())
+    # logger.info("pre_processed data m2")
+    # logger.info(df_m2.iloc[0].to_dict())
+
+    processed_m1, processed_m2, df_thx = processing_all(df_m1, df_m2, df_thx)
+    # logger.info("processed_data m1")
+    # logger.info(processed_m1.iloc[0].to_dict())
+    # logger.info("processed_data m2")
+    # logger.info(processed_m2.iloc[0].to_dict())
+
+    result = post_processing_all(processed_m1, processed_m2, df_thx)
    # State Check
    state_value = combined_df["state"].iloc[0]
    zip_value = combined_df["zip"].iloc[0]
-    if (pd.notnull(state_value) and state_value == "ZZ") or (pd.notnull(zip_value) and zip_value == "86445"):
+    if (pd.notnull(state_value) and state_value == "ZZ") or (
+        pd.notnull(zip_value) and zip_value == "86445"
+    ):
        result["hd_score_m1"] = 1250
-        logger.info("post_processed_data after state check")
-        logger.info(result)
+        result["hd_score_m2"] = 1250
+        result["hd_score_iso_m2"] = 1250
+        # logger.info("post_processed_data after state check")
+        # logger.info(result)
+
+    # Normalize numeric scores to built-in float so JSON encoding (e.g. Temporal)
+    # does not fail on NumPy scalar types like np.float32/np.float64.
+    for key in ("hd_score_m1", "hd_score_m2", "hd_score_iso_m2"):
+        if key in result and result[key] is not None:
+            try:
+                result[key] = float(result[key])
+            except (TypeError, ValueError):
+                logger.warning("Failed to cast %s=%r to float", key, result[key])
+
+    print(result)

    return result
-
-# testing :
-# __main__
--- a/category_orders_train_M1.json
+++ b/category_orders_train_M1.json
@ -0,0 +1,88 @@
+{
+"employmentstatus": [
+        "disability",
+        "fixed income",
+        "full time employed",
+        "other",
+        "part time employment",
+        "retired benefits",
+        "self employed",
+        "student",
+        "unemployed",
+        "welfare"
+    ],
+    "TrueIpGeo": [
+        "other",
+        "us"
+    ],
+    "digital_id_trust_score_rating": [
+        "high",
+        "low",
+        "neutral",
+        "very_high",
+        "very_low"
+    ],
+    "educationlevel": [
+        "associate's degree",
+        "bachelor's degree",
+        "doctorate",
+        "high school",
+        "master's degree",
+        "other"
+    ],
+    "os_version": [
+        "10",
+        "11",
+        "12",
+        "13",
+        "14",
+        "15",
+        "16",
+        "17",
+        "18",
+        "8",
+        "9",
+        "unknown"
+    ],
+    "ip_net_speed_cell": [
+        "broadband",
+        "cable",
+        "dialup",
+        "dsl",
+        "fixed wireless",
+        "mobile",
+        "mobile wireless",
+        "ocx",
+        "satellite",
+        "t1",
+        "tx",
+        "wireless",
+        "xdsl"
+    ],
+    "day_night": [
+        "Day",
+        "Night"
+    ],
+    "digital_id_confidence_rating": [
+        "high",
+        "medium",
+        "very_high",
+        "very_low"
+    ],
+    "RiskRating": [
+        "high",
+        "low",
+        "medium",
+        "neutral",
+        "trusted"
+    ],
+    "payfrequency": [
+        "biweekly",
+        "semimonthly"
+    ],
+    "ownhome": [
+        "false",
+        "true"
+    ]
+
+}
--- a/category_orders_train_M2.json
+++ b/category_orders_train_M2.json
@ -0,0 +1,303 @@
+{
+    "riskrating": [
+        "high",
+        "low",
+        "medium",
+        "neutral",
+        "trusted"
+    ],
+    "input_ip_connection_type": [
+        "cable",
+        "consumer satellite",
+        "dialup",
+        "dsl",
+        "fixed wireless",
+        "framerelay",
+        "isdn",
+        "mobile wireless",
+        "ocx",
+        "tx"
+    ],
+    "fraudrisk": [
+        "001 very low",
+        "003 very low",
+        "005 very low",
+        "006 very low",
+        "008 very low",
+        "009 very low",
+        "010 very low",
+        "011 very low",
+        "012 very low",
+        "014 very low",
+        "015 very low",
+        "016 very low",
+        "017 very low",
+        "018 very low",
+        "020 very low",
+        "021 very low",
+        "022 very low",
+        "023 very low",
+        "024 very low",
+        "025 very low",
+        "026 very low",
+        "027 very low",
+        "028 very low",
+        "029 very low",
+        "030 very low",
+        "031 very low",
+        "032 very low",
+        "033 very low",
+        "034 very low",
+        "035 very low",
+        "036 very low",
+        "037 very low",
+        "038 very low",
+        "039 very low",
+        "040 very low",
+        "041 very low",
+        "042 very low",
+        "043 very low",
+        "044 very low",
+        "045 very low",
+        "046 very low",
+        "047 very low",
+        "048 very low",
+        "049 very low",
+        "050 very low",
+        "051 very low",
+        "052 very low",
+        "053 very low",
+        "054 very low",
+        "055 very low",
+        "056 very low",
+        "057 very low",
+        "058 very low",
+        "059 very low",
+        "060 very low",
+        "061 very low",
+        "062 very low",
+        "063 very low",
+        "064 very low",
+        "065 very low",
+        "066 very low",
+        "067 very low",
+        "068 very low",
+        "069 very low",
+        "070 very low",
+        "071 very low",
+        "072 very low",
+        "073 very low",
+        "074 very low",
+        "075 very low",
+        "076 very low",
+        "077 very low",
+        "078 very low",
+        "079 very low",
+        "080 very low",
+        "081 very low",
+        "082 very low",
+        "083 very low",
+        "084 very low",
+        "085 very low",
+        "086 very low",
+        "087 very low",
+        "088 very low",
+        "089 very low",
+        "090 very low",
+        "091 very low",
+        "092 very low",
+        "093 very low",
+        "094 very low",
+        "095 very low",
+        "096 very low",
+        "097 very low",
+        "098 very low",
+        "099 very low",
+        "100 very low",
+        "101 low",
+        "102 low",
+        "103 low",
+        "104 low",
+        "105 low",
+        "106 low",
+        "107 low",
+        "108 low",
+        "109 low",
+        "110 low",
+        "111 low",
+        "112 low",
+        "113 low",
+        "114 low",
+        "115 low",
+        "116 low",
+        "117 low",
+        "118 low",
+        "119 low",
+        "120 low",
+        "121 low",
+        "122 low",
+        "123 low",
+        "124 low",
+        "125 low",
+        "126 low",
+        "127 low",
+        "128 low",
+        "129 low",
+        "130 low",
+        "131 low",
+        "132 low",
+        "133 low",
+        "134 low",
+        "135 low",
+        "136 low",
+        "137 low",
+        "138 low",
+        "139 low",
+        "140 low",
+        "141 low",
+        "142 low",
+        "143 low",
+        "144 low",
+        "145 low",
+        "146 low",
+        "147 low",
+        "148 low",
+        "149 low",
+        "153 low",
+        "154 low",
+        "156 low",
+        "157 low",
+        "158 low",
+        "159 low",
+        "160 low",
+        "161 low",
+        "162 low",
+        "163 low",
+        "164 low",
+        "165 low",
+        "166 low",
+        "167 low",
+        "168 low",
+        "169 low",
+        "170 low",
+        "171 low",
+        "172 low",
+        "173 low",
+        "174 low",
+        "175 low",
+        "177 low",
+        "178 low",
+        "179 low",
+        "180 low",
+        "181 low",
+        "182 low",
+        "183 low",
+        "184 low",
+        "185 low",
+        "186 low",
+        "187 low",
+        "188 low",
+        "189 low",
+        "190 low",
+        "191 low",
+        "192 low",
+        "193 low",
+        "194 low",
+        "195 low",
+        "196 low",
+        "197 low",
+        "198 low",
+        "199 low",
+        "200 low",
+        "201 low",
+        "202 low",
+        "203 low",
+        "204 low",
+        "205 low",
+        "206 low",
+        "207 low",
+        "208 low",
+        "209 low",
+        "210 low",
+        "211 low",
+        "212 low",
+        "213 low",
+        "214 low",
+        "215 low",
+        "216 low",
+        "217 low",
+        "218 low",
+        "219 low",
+        "220 low",
+        "221 low",
+        "222 low",
+        "224 low",
+        "225 low",
+        "226 low",
+        "227 low",
+        "228 low",
+        "229 low",
+        "230 low",
+        "231 low",
+        "232 low",
+        "233 low",
+        "234 low",
+        "235 low",
+        "236 low",
+        "237 low",
+        "238 low",
+        "239 low",
+        "240 low",
+        "241 low",
+        "242 low",
+        "243 low",
+        "244 low",
+        "245 low",
+        "246 low",
+        "247 low",
+        "248 low",
+        "250 low",
+        "252 low",
+        "254 low",
+        "259 low",
+        "267 low",
+        "268 low",
+        "271 low",
+        "272 low",
+        "274 low",
+        "275 low",
+        "278 low",
+        "282 low",
+        "287 low",
+        "288 low",
+        "289 low",
+        "290 low",
+        "291 low",
+        "293 low",
+        "296 low",
+        "297 low",
+        "464 moderate",
+        "467 moderate",
+        "485 moderate",
+        "491 moderate",
+        "492 moderate",
+        "496 moderate",
+        "702 review",
+        "703 review",
+        "705 review",
+        "706 review",
+        "707 review",
+        "708 review",
+        "710 review",
+        "730 review",
+        "790 review",
+        "801 high",
+        "890 high",
+        "902 very high",
+        "906 very high"
+    ],
+    "day_night": [
+        "day",
+        "night"
+    ]
+}
--- a/isotonic_model_M2.joblib
+++ b/isotonic_model_M2.joblib
--- a/latitute_longitute_reference.csv
+++ b/latitute_longitute_reference.csv
--- a/post_processing.py
+++ b/post_processing.py
@ -1,5 +1,10 @@
 import logging
+from typing import Dict
+
 import numpy as np
+import pandas as pd
+
+from pre_processing import THX_FIELDS

 # Configure logging
 logging.basicConfig(
@ -9,17 +14,85 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)


-def post_processing(df):
-
+def post_processing_m1(df: pd.DataFrame) -> pd.DataFrame:
+    df = df.copy()
    try:
-        df['hd_score_m1'] = np.round(
-            np.minimum(df['prediction'] * 100 + 0.00001, 1) * 85 +
-            np.maximum(np.log2(df['prediction'] * 100 + 0.000001) * 185, 0),
-            0
+        df["hd_score_m1"] = np.round(
+            np.minimum(df["prediction"] * 100 + 0.00001, 1) * 85
+            + np.maximum(np.log2(df["prediction"] * 100 + 0.000001) * 185, 0),
+            0,
        )
-        logging.info(f"hd_score_m1 calculated: {df['hd_score_m1'].iloc[0]}")
+        logging.info("hd_score_m1 calculated: %s", df["hd_score_m1"].iloc[0])
    except Exception as e:
-        logging.error(f"Error processing hd_score_m1 calculations: {e}")
+        logging.error("Error processing hd_score_m1 calculations: %s", e)
+    return df


-    return df[['application_key', 'application_timestamp', 'deviceid', 'fuzzydeviceid', 'application_email_address', 'hd_score_m1']].iloc[0].to_dict()
+def post_processing_m2(df: pd.DataFrame) -> pd.DataFrame:
+    df = df.copy()
+    try:
+        df["hd_score_m2"] = np.round(
+            np.minimum(df["pd_m2"] * 100.0 + 0.00001, 1.0) * 75.0
+            + np.maximum(np.log2(df["pd_m2"] * 100.0 + 0.000001) * 180.0, 0.0),
+            0,
+        )
+        df["hd_score_iso_m2"] = np.round(
+            np.minimum(df["pd_m2_iso"] * 100.0 + 0.00001, 1.0) * 97.0
+            + np.maximum(np.log2(df["pd_m2_iso"] * 100.0 + 0.000001) * 246.0, 0.0),
+            0,
+        )
+        logging.info("hd_score_m2 calculated: %s", df["hd_score_m2"].iloc[0])
+        logging.info("hd_score_iso_m2 calculated: %s", df["hd_score_iso_m2"].iloc[0])
+    except Exception as e:
+        logging.error("Error processing hd_score_m2 calculations: %s", e)
+    return df
+
+
+def _safe_get(df: pd.DataFrame, column: str):
+    """Return scalar from single-row DataFrame, normalizing NaN/None to None."""
+    if column not in df.columns:
+        return None
+    val = df[column].iloc[0]
+    if isinstance(val, (list, dict)):
+        return val
+    try:
+        if pd.isna(val):
+            return None
+    except TypeError:
+        pass
+    return val
+
+
+def post_processing_all(df_m1: pd.DataFrame, df_m2: pd.DataFrame, df_thx: pd.DataFrame) -> Dict[str, object]:
+    df_m1_scored = post_processing_m1(df_m1)
+    df_m2_scored = post_processing_m2(df_m2)
+    row_m1 = df_m1_scored.iloc[0]
+    row_m2 = df_m2_scored.iloc[0]
+    result = {
+        "application_key": row_m1.get("application_key"),
+        "application_timestamp": str(row_m1.get("application_timestamp")) if row_m1.get("application_timestamp") is not None else None,
+        "deviceid": row_m1.get("deviceid"),
+        "fuzzydeviceid": row_m1.get("fuzzydeviceid"),
+        "application_email_address": row_m1.get("application_email_address"),
+        "hd_score_m1": row_m1.get("hd_score_m1"),
+        "hd_score_m2": row_m2.get("hd_score_m2"),
+        "hd_score_iso_m2": row_m2.get("hd_score_iso_m2"),
+        "action": None,
+    }
+    flattened_thx = {field: _safe_get(df_thx, field) for field in THX_FIELDS if field not in result}
+    result.update(flattened_thx)
+    return result
+
+
+# Legacy entry point for backward compatibility
+def post_processing(df: pd.DataFrame) -> Dict[str, object]:
+    df_scored = post_processing_m1(df)
+    row = df_scored.iloc[0]
+    return {
+        "application_key": row.get("application_key"),
+        "application_timestamp": str(row.get("application_timestamp")) if row.get("application_timestamp") is not None else None,
+        "deviceid": row.get("deviceid"),
+        "fuzzydeviceid": row.get("fuzzydeviceid"),
+        "application_email_address": row.get("application_email_address"),
+        "hd_score_m1": row.get("hd_score_m1"),
+    }
--- a/pre_processing.py
+++ b/pre_processing.py
@ -1,6 +1,11 @@
-import pandas as pd
-import numpy as np
 import logging
+import math
+import re
+from pathlib import Path
+from typing import Dict, Iterable, List, Tuple, Union
+
+import numpy as np
+import pandas as pd

 # Configure logging
 logging.basicConfig(
@ -9,127 +14,288 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)

+BASE_DIR = Path(__file__).resolve().parent
+M2_LATLONG_REF_PATH = BASE_DIR / "latitute_longitute_reference.csv"
+THX_FIELDS = [
+    "application_key",
+    "application_timestamp",
+    "digital_id_first_seen",
+    "summary_risk_score",
+    "cpu_clock",
+    "account_login_first_seen",
+    "account_telephone_first_seen",
+    "true_ip_first_seen",
+    "ssn_hash_first_seen",
+    "account_email_attributes",
+    "tps_ip_latitude",
+    "tps_ip_longitude",
+]

-def pre_processing(data_df):
+# Hardcoded M2 data dictionary (replaces file lookup)
+M2_DATA_DICTIONARY: Dict[str, Dict[str, Union[float, str, None]]] = {
+    "account_email_attributes_challenge_passed": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "account_email_attributes_challenged": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "account_email_first_seen_age": {"data_type": "int", "valid_min": -999.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "account_login_first_seen_age": {"data_type": "int", "valid_min": -999.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "account_name_activities_trusted_prob": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "account_telephone_attributes_challenge_failed": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "account_telephone_attributes_loan_app": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "account_telephone_attributes_trusted": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "account_telephone_first_seen_age": {"data_type": "int", "valid_min": -999.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "accphone_gbl_velocity_hour": {"data_type": "float", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "applicant_age": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "browser_spoof_score": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "day": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "day_cos": {"data_type": "float", "valid_min": -999.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "day_sin": {"data_type": "float", "valid_min": -999.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "di_autofill_count_login": {"data_type": "float", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "digital_id_attributes_challenged": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "digital_id_attributes_trusted": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "digital_id_day_diff": {"data_type": "int", "valid_min": -999999.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "digital_id_first_seen_age": {"data_type": "int", "valid_min": -999.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "digital_id_month_diff": {"data_type": "int", "valid_min": -999.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "digital_id_trust_score": {"data_type": "float", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "digitalidconfidence": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "dist_dnsip_ref_km": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "dist_em_ip_ref_km": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "dist_inputip_ref_km": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "dist_proxyip_ref_km": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "dist_trueip_dnsip_km": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "dist_trueip_em_ip_km": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "dist_trueip_ref_km": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "domain_creation_days": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "emailtofullnameconfidence": {"data_type": "float", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "emailtolastnameconfidence": {"data_type": "float", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "first_seen_days": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "fraudrisk": {"data_type": "string", "valid_min": None, "valid_max": None, "observed_cap_min": None, "observed_cap_max": None},
+    "fuzzy_device_first_seen_age": {"data_type": "int", "valid_min": -999.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "fuzzy_device_id_confidence": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "hour": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "input_ip_connection_type": {"data_type": "string", "valid_min": None, "valid_max": None, "observed_cap_min": None, "observed_cap_max": None},
+    "iptophoneconfidence": {"data_type": "float", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "national_id_first_seen_age": {"data_type": "int", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "overalldigitalidentityscore": {"data_type": "float", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "policy_score": {"data_type": "float", "valid_min": -999.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "proxy_ip_first_seen_age": {"data_type": "int", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "proxy_ip_worst_score": {"data_type": "float", "valid_min": -999.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "proxy_score": {"data_type": "float", "valid_min": 0.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "reasoncode_new_smartid_lt_1wk_global": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "riskrating": {"data_type": "string", "valid_min": None, "valid_max": None, "observed_cap_min": None, "observed_cap_max": None},
+    "ssn_hash_first_seen_age": {"data_type": "int", "valid_min": -999.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "totalhits": {"data_type": "float", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "true_ip_activities_trusted_prob": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "true_ip_attributes_trusted": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "true_ip_attributes_trusted_conf": {"data_type": "int", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "true_ip_first_seen_age": {"data_type": "int", "valid_min": -999.0, "valid_max": 999999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "true_ip_worst_score": {"data_type": "float", "valid_min": -999.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+    "uniquehits": {"data_type": "float", "valid_min": 0.0, "valid_max": 999.0, "observed_cap_min": None, "observed_cap_max": None},
+}

-    # combined_df = pd.DataFrame([input_data])
-    # data = pd.DataFrame(data)
-    combined_df = data_df
-    combined_df["applicant_age"] = combined_df.apply(lambda row: pd.to_datetime(row["application_timestamp"]).year - pd.to_datetime(row["application_date_of_birth"]).year if pd.notnull(row["application_timestamp"]) and pd.notnull(row["application_date_of_birth"]) else None,axis=1
+# Hardcoded one-hot config (parsed_feature, model_var, contains)
+M2_ONEHOT_CONFIG: List[Tuple[str, str, str]] = [
+    ("reasoncode", "reasoncode_new_smartid_lt_1wk_global", "new_smartid_lt_1wk_global"),
+    ("account_name_activities", "account_name_activities_trusted_prob", "trusted_prob"),
+    ("account_email_attributes", "account_email_attributes_challenged", "challenged"),
+    ("account_email_attributes", "account_email_attributes_challenge_passed", "challenge_passed"),
+    ("true_ip_attributes", "true_ip_attributes_trusted", "trusted"),
+    ("true_ip_attributes", "true_ip_attributes_trusted_conf", "trusted_conf"),
+    ("digital_id_attributes", "digital_id_attributes_challenged", "challenged"),
+    ("digital_id_attributes", "digital_id_attributes_trusted", "trusted"),
+    ("account_telephone_attributes", "account_telephone_attributes_challenge_failed", "challenge_failed"),
+    ("account_telephone_attributes", "account_telephone_attributes_loan_app", "loan_app"),
+    ("account_telephone_attributes", "account_telephone_attributes_trusted", "trusted"),
+    ("true_ip_activities", "true_ip_activities_trusted_prob", "trusted_prob"),
+]
+
+
+# ----------------------------
+# Helpers
+# ----------------------------
+def _handle_unknowns(X: pd.DataFrame, column: str, known_values: Iterable[str], default_treatment=None):
+    if column not in X.columns:
+        return X
+    known_values = {str(val).lower() for val in known_values}
+    invalid_values = {None, "none", "nan", pd.NA}
+    X[column] = X[column].apply(
+        lambda x: str(x).lower()
+        if pd.notna(x) and str(x).lower() in known_values
+        else (default_treatment if pd.notna(x) and str(x).lower() not in invalid_values else np.nan)
+    )
+    return X
+
+
+def _haversine_km(lat1, lon1, lat2, lon2):
+    if None in (lat1, lon1, lat2, lon2):
+        return None
+    try:
+        rlat1 = float(lat1) * math.pi / 180.0
+        rlat2 = float(lat2) * math.pi / 180.0
+        dlat = (float(lat2) - float(lat1)) * math.pi / 180.0
+        dlon = (float(lon2) - float(lon1)) * math.pi / 180.0
+    except Exception:
+        return None
+
+    a = (
+        math.sin(dlat / 2.0) ** 2
+        + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2.0) ** 2
+    )
+    a = min(1.0, max(0.0, a))
+    return 2 * 6371.0088 * math.asin(math.sqrt(a))
+
+
+def _prep_latlong_ref():
+    if not M2_LATLONG_REF_PATH.exists():
+        logger.warning("latitute_longitute_reference.csv missing at %s", M2_LATLONG_REF_PATH)
+        return pd.DataFrame()
+    try:
+        ref = pd.read_csv(M2_LATLONG_REF_PATH, usecols=["postal_code_ref", "latitute_ref", "longitude_ref"])
+    except Exception:
+        ref = pd.read_csv(M2_LATLONG_REF_PATH)
+    # keep lower string version for matching
+    if "postal_code_ref" in ref.columns:
+        ref["postal_code_ref"] = ref["postal_code_ref"].astype(str).str.lower()
+    return ref
+
+
+def _normalize_zip_for_ref(zip_val):
+    """
+    Normalize zip/postal code values so they match reference CSV keys.
+
+    - Floats like 89503.0 -> "89503"
+    - Int-like strings "89503.0" -> "89503"
+    Note: we intentionally avoid zero-filling to preserve behaviour seen in UAT references
+    where leading-zero ZIPs are not matched to the reference table.
+    """
+    if pd.isna(zip_val):
+        return None
+    if isinstance(zip_val, (int, float)) and not isinstance(zip_val, bool):
+        return str(int(zip_val)).lower()
+    zip_str = str(zip_val).strip()
+    if zip_str.replace(".", "", 1).isdigit():
+        try:
+            return str(int(float(zip_str))).lower()
+        except Exception:
+            pass
+    return zip_str.lower() if zip_str else None
+
+
+# ----------------------------
+# M1 Pre-processing (existing behaviour)
+# ----------------------------
+def pre_processing_m1(data_df: pd.DataFrame) -> pd.DataFrame:
+    combined_df = data_df.copy()
+    combined_df["applicant_age"] = combined_df.apply(
+        lambda row: pd.to_datetime(row["application_timestamp"]).year - pd.to_datetime(row["application_date_of_birth"]).year
+        if pd.notnull(row["application_timestamp"]) and pd.notnull(row["application_date_of_birth"])
+        else None,
+        axis=1,
    )

-    # Extracting Temporal features 
-    combined_df['application_timestamp'] = pd.to_datetime(combined_df["application_timestamp"])
-    combined_df.loc[:, 'application_time'] = pd.to_datetime(combined_df['application_timestamp']).dt.time
+    combined_df["application_timestamp"] = pd.to_datetime(combined_df["application_timestamp"])
+    combined_df.loc[:, "application_time"] = pd.to_datetime(combined_df["application_timestamp"]).dt.time

-    combined_df['day'] = combined_df['application_timestamp'].dt.day
-    combined_df['day_of_week'] = combined_df['application_timestamp'].dt.weekday  # 0=Monday, 6=Sunday
+    combined_df["day"] = combined_df["application_timestamp"].dt.day
+    combined_df["day_of_week"] = combined_df["application_timestamp"].dt.weekday

-    combined_df['day_sin'] = np.sin(2 * np.pi * combined_df['day'] / 31)
-    combined_df['day_cos'] = np.cos(2 * np.pi * combined_df['day'] / 31)
-    combined_df['day_of_week_sin'] = np.sin(2 * np.pi * combined_df['day_of_week'] / 7)
-    combined_df['day_of_week_cos'] = np.cos(2 * np.pi * combined_df['day_of_week'] / 7)
+    combined_df["day_sin"] = np.sin(2 * np.pi * combined_df["day"] / 31)
+    combined_df["day_cos"] = np.cos(2 * np.pi * combined_df["day"] / 31)
+    combined_df["day_of_week_sin"] = np.sin(2 * np.pi * combined_df["day_of_week"] / 7)
+    combined_df["day_of_week_cos"] = np.cos(2 * np.pi * combined_df["day_of_week"] / 7)

-    # combined_df['is_weekend'] = combined_df['day_of_week'].apply(lambda x: 1 if x >= 5 else 0)
-
-    # Create a day/night variable
    def classify_day_night(hour):
        if 6 <= hour < 18:
-            return 'Day'
-        else:
-            return 'Night'
+            return "Day"
+        return "Night"

-    # Extract hour from application_time
-    combined_df['hour'] = combined_df['application_time'].apply(lambda x: x.hour if pd.notnull(x) else np.nan)
-    combined_df['day_night'] = combined_df['hour'].apply(lambda hour: classify_day_night(hour) if pd.notnull(hour) else 'Unknown')
+    combined_df["hour"] = combined_df["application_time"].apply(lambda x: x.hour if pd.notnull(x) else np.nan)
+    combined_df["day_night"] = combined_df["hour"].apply(lambda hour: classify_day_night(hour) if pd.notnull(hour) else "Unknown")

-    # combined_df['os_version'] = combined_df['os_version'].str.replace(r'[^a-zA-Z0-9]', '_', regex=True)
-    combined_df['os_version'] = combined_df['os_version'].apply(lambda x: x.split('.')[0] if isinstance(x, str) and '.' in x 
-                                                            else x.split('_')[0] if isinstance(x, str) and '_' in x 
-                                                            else x)
+    combined_df["os_version"] = combined_df["os_version"].apply(
+        lambda x: x.split(".")[0] if isinstance(x, str) and "." in x else x.split("_")[0] if isinstance(x, str) and "_" in x else x
+    )

+    combined_df["Identity_Negative_History"] = combined_df["tmxsummaryreasoncode"].astype(str).str.contains(
+        "Identity_Negative_History", na=False, regex=True
+    ).astype(int)
+    combined_df["Device_Negative_History"] = combined_df["tmxsummaryreasoncode"].astype(str).str.contains(
+        "Device_Negative_History", na=False, regex=True
+    ).astype(int)
+    combined_df["Level_1_Link_Reject"] = combined_df["tmxsummaryreasoncode"].astype(str).str.contains(
+        "Level_1_Link_Reject", na=False, regex=True
+    ).astype(int)
+    combined_df["IP_Negative_History"] = combined_df["tmxsummaryreasoncode"].astype(str).str.contains(
+        "IP_Negative_History", na=False, regex=True
+    ).astype(int)
+    combined_df["Identity_Spoofing"] = combined_df["tmxsummaryreasoncode"].astype(str).str.contains(
+        "Identity_Spoofing", na=False, regex=True
+    ).astype(int)

-    # Datatype conversions 
-    # combined_df['Level_1_Link_Accept'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Level_1_Link_Accept', na=False, regex=True).astype(int)
-    combined_df['Identity_Negative_History'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Identity_Negative_History', na=False, regex=True).astype(int)
-    combined_df['Device_Negative_History'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Device_Negative_History', na=False, regex=True).astype(int)
-    combined_df['Level_1_Link_Reject'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Level_1_Link_Reject', na=False, regex=True).astype(int)
-    combined_df['IP_Negative_History'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('IP_Negative_History', na=False, regex=True).astype(int)
-    combined_df['Identity_Spoofing'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Identity_Spoofing', na=False, regex=True).astype(int)
-    # combined_df['Bot'] = combined_df['tmxsummaryreasoncode'].astype(str).str.contains('Bot', na=False, regex=True).astype(int)
+    combined_df["digitalidconfidence"] = pd.to_numeric(combined_df["digitalidconfidence"], errors="coerce").astype("Int64")

-    combined_df['digitalidconfidence'] = pd.to_numeric(combined_df['digitalidconfidence'], errors='coerce').astype('Int64')
-
-    # Rename Columns if Required 
-    combined_df.rename(columns={
-        'DigitalIdConfidence': 'digitalidconfidence',
-        # 'inputipaddress_consistency': 'inputip_consistency',
-        # 'requestid_consistency': 'request_consistency',
-        # Add others as required if present in your DataFrame and needing renaming.
-    }, inplace=True)
-
-    # #Testing : remove below
-    # combined_df.to_csv('op-pre-processing_intermediate.csv', index=False)
+    combined_df.rename(
+        columns={
+            "DigitalIdConfidence": "digitalidconfidence",
+        },
+        inplace=True,
+    )

    dtype_dict = {
-        "applicant_age" :  int,
-        "digitalidconfidence" :  float,
-        "first_seen_days" :  float,
-        "employmentstatus" :  str,
-        "ea_score" :  float,
-        "trueipgeo" :  str,
-        "hour" :  int,
-        "email_creation_days" :  float,
-        "lengthatjob" :  float,
-        "day_cos" :  float,
-        "summary_risk_score" :  float,
-        "digital_id_trust_score_rating" :  str,
-        "day" :  'int32',
-        "lengthatbank" :  float,
-        "day_of_week_cos" :  float,
-        "Level_1_Link_Reject" :  int,
-        "Identity_Negative_History" :  int,
-        "educationlevel" :  str,
-        "os_version" :  str,
-        "account_email_worst_score" :  float,
-        "true_ip_score" :  float,
-        "ip_net_speed_cell" :  str,
-        "account_email_score" :  float,
-        "day_of_week" :  'int32',
-        "true_ip_worst_score" :  float,
-        "proxy_ip_worst_score" :  float,
-        "day_night" :  str,
-        "proxy_ip_score" :  float,
-        "monthsatresidence" :  float,
-        "Device_Negative_History" :  int,
-        "fuzzy_device_score" :  float,
-        "day_sin" :  float,
-        "ip_region_confidence" :  float,
-        "true_ip_state_confidence" :  float,
-        "IP_Negative_History" :  int,
-        "fuzzy_device_worst_score" :  float,
-        "digital_id_confidence_rating" :  str,
-        "day_of_week_sin" :  float,
-        "riskrating" :  str,
-        "payfrequency" :  str,
-        "ownhome" :  str,
-        "Identity_Spoofing" :  int
+        "applicant_age": int,
+        "digitalidconfidence": float,
+        "first_seen_days": float,
+        "employmentstatus": str,
+        "ea_score": float,
+        "trueipgeo": str,
+        "hour": int,
+        "email_creation_days": float,
+        "lengthatjob": float,
+        "day_cos": float,
+        "summary_risk_score": float,
+        "digital_id_trust_score_rating": str,
+        "day": "int32",
+        "lengthatbank": float,
+        "day_of_week_cos": float,
+        "Level_1_Link_Reject": int,
+        "Identity_Negative_History": int,
+        "educationlevel": str,
+        "os_version": str,
+        "account_email_worst_score": float,
+        "true_ip_score": float,
+        "ip_net_speed_cell": str,
+        "account_email_score": float,
+        "day_of_week": "int32",
+        "true_ip_worst_score": float,
+        "proxy_ip_worst_score": float,
+        "day_night": str,
+        "proxy_ip_score": float,
+        "monthsatresidence": float,
+        "Device_Negative_History": int,
+        "fuzzy_device_score": float,
+        "day_sin": float,
+        "ip_region_confidence": float,
+        "true_ip_state_confidence": float,
+        "IP_Negative_History": int,
+        "fuzzy_device_worst_score": float,
+        "day_of_week_sin": float,
+        "riskrating": str,
+        "payfrequency": str,
+        "ownhome": str,
+        "Identity_Spoofing": int,
    }

-    next_block_cols = ['application_key', 'application_timestamp', 'deviceid', 'fuzzydeviceid', 'application_email_address']
+    next_block_cols = ["application_key", "application_timestamp", "deviceid", "fuzzydeviceid", "application_email_address"]
    cols_to_keep = [col for col in dtype_dict.keys() if col in combined_df.columns]

    final_cols = list(set(next_block_cols).union(set(cols_to_keep)))
-    # Type casting
    for col, dtype in dtype_dict.items():
        if col in combined_df.columns:
            if dtype == int:
-                combined_df[col] = pd.to_numeric(combined_df[col], errors='coerce', downcast='integer')
+                combined_df[col] = pd.to_numeric(combined_df[col], errors="coerce", downcast="integer")
            elif dtype == float:
-                combined_df[col] = pd.to_numeric(combined_df[col], errors='coerce', downcast='float')
+                combined_df[col] = pd.to_numeric(combined_df[col], errors="coerce", downcast="float")
            elif dtype == str:
                combined_df[col] = combined_df[col].astype(str)
-    # cross check data type 
+
    capping_dict = {
        "applicant_age": (18, 93),
        "digitalidconfidence": (0, 9017),
@ -157,98 +323,254 @@ def pre_processing(data_df):
        "fuzzy_device_score": (-29, 14),
        "day_sin": (-0.9987165072, 0.9987165072),
        "ip_region_confidence": (75, 99),
-        # "true_ip_state_confidence": (5, 98),
        "IP_Negative_History": (0, 1),
        "fuzzy_device_worst_score": (-100, 0),
        "day_of_week_sin": (-0.9749279122, 0.9749279122),
        "Identity_Spoofing": (0, 1),
    }

-
-
-    # Apply capping
    for column, (cap_min, cap_max) in capping_dict.items():
        if column in combined_df.columns:
            combined_df[column] = combined_df[column].clip(lower=cap_min, upper=cap_max)

-
-    def handle_unknowns(X, column, known_values, default_treatment=None):
-        if column not in X.columns:
-            return X  #  Return X to avoid NoneType error
-        known_values = {str(val).lower() for val in known_values}
-        invalid_values = {None, "none", "nan", pd.NA}
-        X[column] = X[column].apply(
-            lambda x: str(x).lower() if pd.notna(x) and str(x).lower() in known_values
-            else (default_treatment if pd.notna(x) and str(x).lower() not in invalid_values else np.nan)
-        )
-        return X  #  Always return the DataFrame
-
-
-
-
    unknown_treatments = {
        "employmentstatus": {
            "valid_values": [
-                "disability", "fixed income", "full time employed", "part time employment",
-                "retired benefits", "self employed", "student", "unemployed", "welfare"
+                "disability",
+                "fixed income",
+                "full time employed",
+                "part time employment",
+                "retired benefits",
+                "self employed",
+                "student",
+                "unemployed",
+                "welfare",
            ],
-            "default_treatment": "other"
-        },
-        "trueipgeo": {
-            "valid_values": ["US"],
-            "default_treatment": "other"
-        },
-        "digital_id_trust_score_rating": {
-            "valid_values": ["very_high", "high", "neutral", "low"],
-            "default_treatment": "very_low"
+            "default_treatment": "other",
        },
+        "trueipgeo": {"valid_values": ["US"], "default_treatment": "other"},
+        "digital_id_trust_score_rating": {"valid_values": ["very_high", "high", "neutral", "low"], "default_treatment": "very_low"},
        "educationlevel": {
            "valid_values": ["associate's degree", "bachelor's degree", "doctorate", "high school", "master's degree"],
-            "default_treatment": "other"
+            "default_treatment": "other",
        },
        "os_version": {
-            "valid_values": [
-                '18', '17', '16', '15', '14', '13', '12', '11', '10', '9', '8'
-            ],
-            "default_treatment": 'unknown'
+            "valid_values": ["18", "17", "16", "15", "14", "13", "12", "11", "10", "9", "8"],
+            "default_treatment": "unknown",
        },
        "ip_net_speed_cell": {
            "valid_values": [
-                "broadband", "cable", "dialup", "dsl", "fixed wireless", "mobile", "mobile wireless", "ocx", "satellite",
-                "t1", "tx", "wireless", "xdsl"
+                "broadband",
+                "cable",
+                "dialup",
+                "dsl",
+                "fixed wireless",
+                "mobile",
+                "mobile wireless",
+                "ocx",
+                "satellite",
+                "t1",
+                "tx",
+                "wireless",
+                "xdsl",
            ],
-            "default_treatment": "mobile"
-        },
-        "digital_id_confidence_rating": {
-            "valid_values": ["high", "medium", "very_high"],
-            "default_treatment": "very_low"
-        },
-        "riskrating": {
-            "valid_values": ["low", "medium", "neutral", "trusted"],
-            "default_treatment": "high"
-        },
-        "ownhome": {
-            "valid_values": ["true", "false"],
-            "default_treatment": np.nan
+            "default_treatment": "mobile",
        },
+        "digital_id_confidence_rating": {"valid_values": ["high", "medium", "very_high"], "default_treatment": "very_low"},
+        "riskrating": {"valid_values": ["low", "medium", "neutral", "trusted"], "default_treatment": "high"},
+        "ownhome": {"valid_values": ["true", "false"], "default_treatment": np.nan},
    }

    for column, treatment in unknown_treatments.items():
-        combined_df = handle_unknowns(combined_df, column, treatment["valid_values"], treatment["default_treatment"]) 
+        combined_df = _handle_unknowns(combined_df, column, treatment["valid_values"], treatment["default_treatment"])

-    payfrequency_map = {
-        "biweekly": ["biweekly", "bi-weekly", "bi weekly", "bw"],
-        "semimonthly": ["semi-monthly", "semimonthly"]
-    }
+    payfrequency_map = {"biweekly": ["biweekly", "bi-weekly", "bi weekly", "bw"], "semimonthly": ["semi-monthly", "semimonthly"]}

-    combined_df['payfrequency'] = combined_df['payfrequency'].apply(
+    combined_df["payfrequency"] = combined_df["payfrequency"].apply(
        lambda x: next((key for key, values in payfrequency_map.items() if str(x).lower() in values), np.nan)
    )

    return combined_df[final_cols]


+# ----------------------------
+# M2 Pre-processing
+# ----------------------------
+def _apply_onehot_features(df: pd.DataFrame) -> pd.DataFrame:
+    df = df.copy()
+    for parsed_feature, model_var, contains_val in M2_ONEHOT_CONFIG:
+        value = df.get(parsed_feature, pd.Series([None])).iloc[0]
+        flag = 0
+        if isinstance(value, list):
+            flag = int(any(contains_val in str(v).lower() or re.sub(r"[^a-z0-9]+", " ", contains_val) in re.sub(r"[^a-z0-9]+", " ", str(v).lower()) for v in value))
+        elif isinstance(value, str):
+            val_norm = re.sub(r"[^a-z0-9]+", " ", value.lower())
+            contains_norm = re.sub(r"[^a-z0-9]+", " ", contains_val)
+            flag = int(contains_val in value.lower() or contains_norm in val_norm)
+        df[model_var] = flag
+    return df


+def _extract_first_seen_days(ts_value, app_ts):
+    ts = pd.to_datetime(ts_value, errors="coerce", utc=True)
+    app = pd.to_datetime(app_ts, errors="coerce", utc=True)
+    # align to naive for subtraction
+    if isinstance(ts, pd.Timestamp) and ts.tzinfo is not None:
+        ts = ts.tz_localize(None)
+    if isinstance(app, pd.Timestamp) and app.tzinfo is not None:
+        app = app.tz_localize(None)
+    if pd.isna(ts) or pd.isna(app):
+        return None
+    return (app.normalize() - ts.normalize()).days


+def _to_naive_ts(val):
+    ts = pd.to_datetime(val, errors="coerce", utc=True)
+    if isinstance(ts, pd.Timestamp) and ts.tzinfo is not None:
+        ts = ts.tz_localize(None)
+    return ts
+
+
+def _month_diff(earlier, later):
+    """Month difference (earlier - later) using year/month buckets."""
+    ts_earlier = _to_naive_ts(earlier)
+    ts_later = _to_naive_ts(later)
+    if pd.isna(ts_earlier) or pd.isna(ts_later):
+        return None
+    return (ts_earlier.year - ts_later.year) * 12 + (ts_earlier.month - ts_later.month)
+
+
+def pre_processing_m2(data_df: pd.DataFrame) -> pd.DataFrame:
+    df = data_df.copy()
+    df.columns = df.columns.str.lower()
+
+    # Timestamp-derived features (align with M1 behaviour to keep probabilities consistent)
+    df["application_timestamp"] = pd.to_datetime(df["application_timestamp"], errors="coerce", utc=True)
+    df["day"] = df["application_timestamp"].dt.day
+    df["hour"] = df["application_timestamp"].dt.hour
+    df["day_sin"] = np.sin(2 * np.pi * df["day"] / 31)
+    df["day_cos"] = np.cos(2 * np.pi * df["day"] / 31)
+
+    def _classify_day_night(hour_val):
+        if pd.isna(hour_val):
+            return np.nan
+        return "day" if 6 <= hour_val < 18 else "night"
+
+    df["day_night"] = df["hour"].apply(_classify_day_night)
+
+    # Apply onehot flags from attributes
+    df = _apply_onehot_features(df)
+
+    # Distances
+    lat_ref = _prep_latlong_ref()
+    if not lat_ref.empty and "zip" in df.columns:
+        zip_value = df["zip"].iloc[0]
+        zip_lookup = _normalize_zip_for_ref(zip_value)
+        ref_row = lat_ref[lat_ref["postal_code_ref"] == zip_lookup] if zip_lookup else pd.DataFrame()
+        lat_ref_val = ref_row["latitute_ref"].iloc[0] if not ref_row.empty else None
+        lon_ref_val = ref_row["longitude_ref"].iloc[0] if not ref_row.empty else None
+    else:
+        lat_ref_val = None
+        lon_ref_val = None
+
+    df["dist_inputip_ref_km"] = df.apply(
+        lambda r: _haversine_km(lat_ref_val, lon_ref_val, r.get("input_ip_latitude"), r.get("input_ip_longitude")), axis=1
+    )
+    df["dist_em_ip_ref_km"] = df.apply(
+        lambda r: _haversine_km(lat_ref_val, lon_ref_val, r.get("ip_latitude") or r.get("tps_ip_latitude"), r.get("ip_longitude") or r.get("tps_ip_longitude")),
+        axis=1,
+    )
+    df["dist_proxyip_ref_km"] = df.apply(
+        lambda r: _haversine_km(lat_ref_val, lon_ref_val, r.get("proxy_ip_latitude"), r.get("proxy_ip_longitude")), axis=1
+    )
+    df["dist_dnsip_ref_km"] = df.apply(
+        lambda r: _haversine_km(lat_ref_val, lon_ref_val, r.get("dns_ip_latitude"), r.get("dns_ip_longitude")), axis=1
+    )
+    df["dist_trueip_ref_km"] = df.apply(
+        lambda r: _haversine_km(lat_ref_val, lon_ref_val, r.get("true_ip_latitude"), r.get("true_ip_longitude")), axis=1
+    )
+    df["dist_trueip_em_ip_km"] = df.apply(
+        lambda r: _haversine_km(r.get("true_ip_latitude"), r.get("true_ip_longitude"), r.get("ip_latitude") or r.get("tps_ip_latitude"), r.get("ip_longitude") or r.get("tps_ip_longitude")),
+        axis=1,
+    )
+    df["dist_trueip_dnsip_km"] = df.apply(
+        lambda r: _haversine_km(r.get("true_ip_latitude"), r.get("true_ip_longitude"), r.get("dns_ip_latitude"), r.get("dns_ip_longitude")),
+        axis=1,
+    )
+
+    # Ages
+    app_ts_val = df.get("application_timestamp", pd.Series([None])).iloc[0]
+    def _safe_day_diff(row):
+        if not row.get("digital_id_first_seen"):
+            return None
+        val = _extract_first_seen_days(row.get("digital_id_first_seen"), app_ts_val)
+        return -val if val is not None else None
+
+    df["digital_id_day_diff"] = df.apply(_safe_day_diff, axis=1)
+    df["digital_id_month_diff"] = df.apply(lambda r: _month_diff(r.get("digital_id_first_seen"), app_ts_val), axis=1)
+
+    for col_name in [
+        "digital_id_first_seen",
+        "account_email_first_seen",
+        "account_login_first_seen",
+        "account_telephone_first_seen",
+        "true_ip_first_seen",
+        "ssn_hash_first_seen",
+        "fuzzy_device_first_seen",
+        "national_id_first_seen",
+        "proxy_ip_first_seen",
+    ]:
+        out_col = f"{col_name}_age"
+        df[out_col] = df.apply(lambda r: _extract_first_seen_days(r.get(col_name), app_ts_val), axis=1)
+
+    # applicant_age for consistency if not present
+    if "applicant_age" not in df.columns:
+        df["applicant_age"] = df.apply(
+            lambda row: pd.to_datetime(row["application_timestamp"]).year - pd.to_datetime(row["application_date_of_birth"]).year
+            if pd.notnull(row.get("application_timestamp")) and pd.notnull(row.get("application_date_of_birth"))
+            else None,
+            axis=1,
+        )
+
+    # Safe casting and capping using data dictionary
+    for var_name, rules in M2_DATA_DICTIONARY.items():
+        if var_name not in df.columns:
+            continue
+        col = pd.to_numeric(df[var_name], errors="coerce") if rules.get("data_type") in ["float", "int"] else df[var_name]
+        if rules.get("data_type") == "int":
+            col = col.astype("float")
+        valid_min = rules.get("valid_min")
+        valid_max = rules.get("valid_max")
+        observed_min = rules.get("observed_cap_min")
+        observed_max = rules.get("observed_cap_max")
+        if observed_min is not None or observed_max is not None:
+            col = col.clip(lower=observed_min, upper=observed_max)
+        # if valid_min is not None:
+        #     col = col.where(col >= valid_min, np.nan)
+        # if valid_max is not None:
+        #     col = col.where(col <= valid_max, np.nan)
+        df[var_name] = col
+
+    return df
+
+
+def pre_processing_all(data_df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
+    # Ensure requested THX fields exist so downstream packaging always has keys
+    df_base = data_df.copy()
+
+    for field in THX_FIELDS:
+        if field in df_base.columns:
+            df_base[field] = df_base[field].astype(str)
+        else:
+            df_base[field] = None
+    df_thx = df_base[THX_FIELDS].copy()
+
+    df_m1 = pre_processing_m1(df_base.copy())
+    df_m2 = pre_processing_m2(df_base.copy())
+    return df_m1, df_m2, df_thx
+
+
+# Backwards compatible entry point (used by legacy code/tests if any)
+def pre_processing(data_df: pd.DataFrame) -> pd.DataFrame:
+    df_m1, _, _ = pre_processing_all(data_df)
+    return df_m1
--- a/processing.py
+++ b/processing.py
@ -1,46 +1,107 @@
-import pandas as pd
-import numpy as np 
-import xgboost as xgb
-import joblib
 import json
+from functools import lru_cache
+from pathlib import Path
+
+import joblib
+import numpy as np
+import pandas as pd
+import xgboost as xgb
+
+# BASE_DIR = Path(__file__).resolve().parent
+# M1_MODEL_PATH = BASE_DIR / "xgboost_model_M1.joblib"
+# M1_CATEGORY_ORDERS_PATH = BASE_DIR / "category_orders_train_M1.json"
+# M2_MODEL_PATH = BASE_DIR / "xgboost_model_M2.joblib"
+# M2_CATEGORY_ORDERS_PATH = BASE_DIR / "category_orders_train_M2.json"
+# M2_ISO_PATH = BASE_DIR / "isotonic_model_M2.joblib"
+
+M1_MODEL_PATH = "./xgboost_model_M1.joblib"
+M1_CATEGORY_ORDERS_PATH = "./category_orders_train_M1.json"
+M2_MODEL_PATH = "./xgboost_model_M2.joblib"
+M2_CATEGORY_ORDERS_PATH = "./category_orders_train_M2.json"
+M2_ISO_PATH = "./isotonic_model_M2.joblib"


-def processing(input_data):
+def _load_category_orders(path: Path) -> dict:
+    with open(path, "r") as f:
+        return json.load(f)
+
+
+@lru_cache(maxsize=1)
+def _load_m1_model():
+    return joblib.load(M1_MODEL_PATH)
+
+
+@lru_cache(maxsize=1)
+def _load_m2_model():
+    return joblib.load(M2_MODEL_PATH)
+
+
+@lru_cache(maxsize=1)
+def _load_m2_iso_model():
+    return joblib.load(M2_ISO_PATH)
+
+
+@lru_cache(maxsize=None)
+def _load_category_orders_cached(path: Path):
+    # Cache category orders per path to avoid disk I/O on each scoring
+    return _load_category_orders(path)
+
+
+def _prepare(df: pd.DataFrame, category_orders: dict) -> pd.DataFrame:
+    df = df.copy()
+    for col, categories in category_orders.items():
+        if col not in df.columns:
+            df[col] = np.nan
+        
+        df[col] = df[col].astype(str).str.lower()
+        df[col].replace([None, "", "null", np.nan, "nan", " "], np.nan, inplace=True)
+        df[col] = pd.Categorical(df[col], categories=categories, ordered=True)
+    return df
+
+
+def processing_m1(input_data: pd.DataFrame) -> pd.DataFrame:
    df = pd.DataFrame(input_data)
-
-    # Load Model
-    model_path = "./xgboost_model.joblib"
-    # model_path = "C:/Users/abinisha/habemco_flowx/m1_v1/xgboost_model.joblib"
-    model = joblib.load(model_path)
-    df.rename(columns={'riskrating': 'RiskRating', 'trueipgeo': 'TrueIpGeo'}, inplace=True)
-
-    # Load Category Orders
-    category_orders_path ="./category_orders_train.json"
-    # category_orders_path = "C:/Users/abinisha/habemco_flowx/m1_v1/category_orders_train.json"
-    with open(category_orders_path, 'r') as f:
-        category_orders = json.load(f)
-
    if df.empty:
        raise ValueError("Input DataFrame is empty.")

+    model = _load_m1_model()
+    df.rename(columns={"riskrating": "RiskRating", "trueipgeo": "TrueIpGeo"}, inplace=True)
+    category_orders = _load_category_orders_cached(M1_CATEGORY_ORDERS_PATH)
+    df = _prepare(df, category_orders)

-    # Ensure all expected features exist
    expected_features = model.feature_names
-
-    
-    for col, categories in category_orders.items():
-        df[col].replace([None, "", "null", np.nan, "nan", " "], np.nan, inplace=True)
-        df[col] = pd.Categorical(df[col], categories=categories, ordered=True)
-
-    # missing_features = [feature for feature in expected_features if feature not in df.columns]
-    # for feature in missing_features:
-    #     df[feature] = np.nan  # Use NaN to avoid dtype issues
-
-    # Create XGBoost DMatrix
    dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=np.nan)
-
-    # Make predictions
    predictions = model.predict(dmatrix)
-    df['prediction'] = predictions
-
+    df["prediction"] = predictions
    return df
+
+
+def processing_m2(input_data: pd.DataFrame) -> pd.DataFrame:
+    df = pd.DataFrame(input_data)
+    if df.empty:
+        raise ValueError("Input DataFrame is empty.")
+
+    model = _load_m2_model()
+    category_orders = _load_category_orders_cached(M2_CATEGORY_ORDERS_PATH)
+    df = _prepare(df, category_orders)
+
+    expected_features = model.feature_names
+    for feature in expected_features:
+        if feature not in df.columns:
+            df[feature] = np.nan
+    dmatrix = xgb.DMatrix(df[expected_features], enable_categorical=True, missing=np.nan)
+    pd_arr = model.predict(dmatrix)
+    df["pd_m2"] = pd_arr
+
+    iso_model = _load_m2_iso_model()
+    df["pd_m2_iso"] = iso_model.predict(pd_arr)
+    return df
+
+
+def processing_all(df_m1: pd.DataFrame, df_m2: pd.DataFrame, df_thx: pd.DataFrame):
+    return processing_m1(df_m1), processing_m2(df_m2), df_thx
+
+
+# Legacy single-model entry point
+def processing(input_data: pd.DataFrame) -> pd.DataFrame:
+    return processing_m1(input_data)
--- a/request_schema.json
+++ b/request_schema.json
@ -97,6 +97,10 @@
    "zip": {
      "type": ["string", "null"],
      "description": "Zip of the current residence."
+    },
+    "ReasonCode": {
+      "type": ["string", "null"],
+      "description": "Reason code from ThreatMetrix."
    }
  },
  "required": []
--- a/requirements.txt
+++ b/requirements.txt
@ -5,3 +5,4 @@ joblib == 1.4.2
 jmespath == 1.0.1
 regex == 2023.12.25
 json_repair == 0.47.6
+scikit-learn==1.5.2
--- a/response_schema.json
+++ b/response_schema.json
@ -26,6 +26,54 @@
      "type": ["number", "null"],
      "description": "HD Fraud Score M1"
    },
+    "hd_score_m2": {
+      "type": ["number", "null"],
+      "description": "HD Fraud Score M2"
+    },
+    "hd_score_iso_m2": {
+      "type": ["number", "null"],
+      "description": "HD Fraud Score M2 Scaled"
+    },
+    "digital_id_first_seen": {
+      "type": ["string",  "null"],
+      "description": "Digital ID first seen timestamp"
+    },
+    "summary_risk_score": {
+      "type": ["string",  "null"],
+      "description": "Summary risk score"
+    },
+    "cpu_clock": {
+      "type": ["string",  "null"],
+      "description": "CPU clock value from device profiling"
+    },
+    "account_login_first_seen": {
+      "type": ["string",  "null"],
+      "description": "Account login first seen timestamp"
+    },
+    "account_telephone_first_seen": {
+      "type": ["string",  "null"],
+      "description": "Account telephone first seen timestamp"
+    },
+    "true_ip_first_seen": {
+      "type": ["string",  "null"],
+      "description": "True IP first seen timestamp"
+    },
+    "ssn_hash_first_seen": {
+      "type": ["string", "null"],
+      "description": "SSN hash first seen timestamp"
+    },
+    "account_email_attributes": {
+      "type": ["string", "null"],
+      "description": "Account email attributes"
+    },
+    "tps_ip_latitude": {
+      "type": ["string", "null"],
+      "description": "TPS IP latitude"
+    },
+    "tps_ip_longitude": {
+      "type": ["string",  "null"],
+      "description": "TPS IP longitude"
+    },
    "action": {
      "type": ["string", "null"],
      "description": "Recommended Action."
--- a/test_block.py
+++ b/test_block.py
--- a/xgboost_model_M1.joblib
+++ b/xgboost_model_M1.joblib
--- a/xgboost_model_M2.joblib
+++ b/xgboost_model_M2.joblib