blocks-transformer/block.py

import logging

import jmespath
import json_repair
import pandas as pd
import regex as re
from pre_processing import pre_processing_all
from processing import processing_all
from post_processing import post_processing_all


# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
)
logger = logging.getLogger(__name__)

_JSON_LIKE = re.compile(r"^\s*\?*[\{\[].*[\}\]]\s*$", re.DOTALL)


def extract_value(blob, expression):
    try:
        return jmespath.search(expression, blob)
    except Exception:
        return None


def coalesce(*args):
    for value in args:
        if value is not None:
            return value
    return None


def deep_repair(obj):
    # 1) If it's a string that *looks* like JSON (with or without one leading '?'),
    #    strip exactly one leading '?', reparses, and recurse.
    if isinstance(obj, str):
        s = obj.strip()
        if _JSON_LIKE.match(s):
            if s.startswith("?"):
                s = s[1:]
            parsed = json_repair.loads(s)
            return deep_repair(parsed)
        return obj

    # 2) Dict – recurse on each value
    if isinstance(obj, dict):
        return {k: deep_repair(v) for k, v in obj.items()}

    # 3) List – recurse on each element
    if isinstance(obj, list):
        return [deep_repair(v) for v in obj]

    # 4) Otherwise, leave it alone
    return obj


def sanitize_blob(blob):
    try:
        return deep_repair(blob)
    except Exception as e:
        logger.error("Failed to sanitize blob: %s", e)
        return None

# Expressions to extract values (M1 + added M2 fields)
expressions = {
    # M1 (existing)
    "first_seen_days": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].first_seen_days)[0]",
        'Blob."emailage.emailriskscore.first_seen_days"',
        "Blob.tps_vendor_raw_response.query.results[0].first_seen_days",
    ],
    "ea_score": [
        "Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].EAScore",
        'Blob."emailage.emailriskscore.eascore"',
        "Blob.tps_vendor_raw_response.query.results[0].EAScore",
    ],
    "email_creation_days": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].email_creation_days)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].email_creation_days",
    ],
    "summary_risk_score": ["Blob.summary_risk_score"],
    "digital_id_trust_score_rating": ["Blob.digital_id_trust_score_rating"],
    "os_version": ["Blob.os_version"],
    "account_email_worst_score": ["Blob.account_email_worst_score"],
    "true_ip_score": ["Blob.true_ip_score"],
    "ip_net_speed_cell": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_netSpeedCell)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].ip_netSpeedCell",
    ],
    "account_email_score": ["Blob.account_email_score"],
    "true_ip_worst_score": ["Blob.true_ip_worst_score"],
    "proxy_ip_worst_score": ["Blob.proxy_ip_worst_score"],
    "proxy_ip_score": ["Blob.proxy_ip_score"],
    "fuzzy_device_score": ["Blob.fuzzy_device_score"],
    "ip_region_confidence": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_regionconf)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].ip_regionconf",
    ],
    "true_ip_state_confidence": ["Blob.true_ip_state_confidence"],
    "fuzzy_device_worst_score": ["Blob.fuzzy_device_worst_score"],
    "digital_id_confidence_rating": ["Blob.digital_id_confidence_rating"],
    "trueipgeo": ["TrueIpGeo", "Blob.true_ip_geo"],
    # M2 additions
    "policy_score": ["Blob.policy_score"],
    "digital_id_trust_score": ["Blob.digital_id_trust_score"],
    "proxy_score": ["Blob.proxy_score"],
    "browser_spoof_score": ["Blob.browser_spoof_score"],
    "input_ip_connection_type": ["Blob.input_ip_connection_type"],
    "fuzzy_device_id_confidence": ["Blob.fuzzy_device_id_confidence"],
    "fraudrisk": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].fraudRisk)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].fraudRisk",
        'Blob."emailage.emailriskscore.fraudRisk"',
    ],
    "overalldigitalidentityscore": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].overallDigitalIdentityScore)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].overallDigitalIdentityScore",
        'Blob."emailage.emailriskscore.overallDigitalIdentityScore"',
    ],
    "totalhits": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].totalhits)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].totalhits",
        'Blob."emailage.emailriskscore.totalhits"',
    ],
    "uniquehits": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].uniquehits)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].uniquehits",
        'Blob."emailage.emailriskscore.uniquehits"',
    ],
    "emailtofullnameconfidence": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].emailToFullNameConfidence)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].emailToFullNameConfidence",
        'Blob."emailage.emailriskscore.emailToFullNameConfidence"',
    ],
    "emailtolastnameconfidence": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].emailToLastNameConfidence)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].emailToLastNameConfidence",
        'Blob."emailage.emailriskscore.emailToLastNameConfidence"',
    ],
    "domain_creation_days": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].domain_creation_days)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].domain_creation_days",
        'Blob."emailage.emailriskscore.domain_creation_days"',
    ],
    "iptophoneconfidence": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ipToPhoneConfidence)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].ipToPhoneConfidence",
        'Blob."emailage.emailriskscore.ipToPhoneConfidence"',
    ],
    "di_autofill_count_login": [
        "Blob.tmx_variables.di_autofill_count_login",
        "Blob.policy_details_api.policy_detail_api.customer.rules.vars.variable.di_autofill_count_login",
    ],
    "accphone_gbl_velocity_hour": [
        "Blob.tmx_variables.accphone_gbl_velocity_hour",
        "Blob.tmx_variables._accphone_gbl_velocity_hour",
    ],
    # Lat/long fields for distance engineering
    "ip_latitude": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_latitude)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].ip_latitude",
    ],
    "ip_longitude": [
        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_longitude)[0]",
        "Blob.tps_vendor_raw_response.query.results[0].ip_longitude",
    ],
    "tps_ip_latitude": ["Blob.tps_vendor_raw_response.query.results[0].ip_latitude"],
    "tps_ip_longitude": ["Blob.tps_vendor_raw_response.query.results[0].ip_longitude"],
    "true_ip_latitude": ["Blob.true_ip_latitude"],
    "true_ip_longitude": ["Blob.true_ip_longitude"],
    "proxy_ip_latitude": ["Blob.proxy_ip_latitude"],
    "proxy_ip_longitude": ["Blob.proxy_ip_longitude"],
    "dns_ip_latitude": ["Blob.dns_ip_latitude"],
    "dns_ip_longitude": ["Blob.dns_ip_longitude"],
    "input_ip_latitude": ["Blob.input_ip_latitude"],
    "input_ip_longitude": ["Blob.input_ip_longitude"],
    # First-seen timestamps for age deltas
    "digital_id_first_seen": ["Blob.digital_id_first_seen"],
    "account_email_first_seen": ["Blob.account_email_first_seen"],
    "account_login_first_seen": ["Blob.account_login_first_seen"],
    "account_telephone_first_seen": ["Blob.account_telephone_first_seen"],
    "true_ip_first_seen": ["Blob.true_ip_first_seen"],
    "ssn_hash_first_seen": ["Blob.ssn_hash_first_seen"],
    "fuzzy_device_first_seen": ["Blob.fuzzy_device_first_seen"],
    "national_id_first_seen": ["Blob.national_id_first_seen"],
    "proxy_ip_first_seen": ["Blob.proxy_ip_first_seen"],
    # Attribute arrays (used for one-hot style parsing)
    "account_name_activities": ["Blob.account_name_activities"],
    "account_email_attributes": ["Blob.account_email_attributes"],
    "true_ip_attributes": ["Blob.true_ip_attributes"],
    "true_ip_activities": ["Blob.true_ip_activities"],
    "digital_id_attributes": ["Blob.digital_id_attributes"],
    "account_telephone_attributes": ["Blob.account_telephone_attributes"],
    "cpu_clock": ["Blob.cpu_clock"]
}


def __main__(
    # Application->
    application_key: str,
    application_timestamp: str,
    application_ssn: str,
    application_email_address: str,
    application_bank_account_number: str,
    application_is_rejected: str,
    application_date_of_birth: str,
    # uprovaloanapplication->
    educationlevel: str,
    employmentstatus: str,
    lengthatbank: str,
    lengthatjob: str,
    ownhome: str,
    payfrequency: str,
    monthsatresidence: str,
    state: str,
    zip: str,
    # thxresponse->
    EventType: str,
    DigitalIdConfidence: str,
    RiskRating: str,
    TmxSummaryReasonCode: str,
    TrueIpGeo: str,
    Blob: str,
    DeviceId: str,
    FuzzyDeviceId: str,
    ReasonCode: str,
) -> dict:
    # Convert input parameters into a flat dictionary
    data = {
        "application_key": application_key,
        "application_timestamp": application_timestamp,
        "application_ssn ": application_ssn,
        "application_email_address": application_email_address,
        "application_bank_account_number": application_bank_account_number,
        "application_is_rejected": application_is_rejected,
        "application_date_of_birth": application_date_of_birth,
        "educationlevel": educationlevel,
        "employmentstatus": employmentstatus,
        "lengthatbank": lengthatbank,
        "lengthatjob": lengthatjob,
        "ownhome": ownhome,
        "payfrequency": payfrequency,
        "monthsatresidence": monthsatresidence,
        "state": state,
        "zip": zip,
        "EventType": EventType,
        "DigitalIdConfidence": DigitalIdConfidence,
        "RiskRating": RiskRating,
        "TmxSummaryReasonCode": TmxSummaryReasonCode,
        "TrueIpGeo": TrueIpGeo,
        "Blob": Blob,
        "DeviceId": DeviceId,
        "FuzzyDeviceId": FuzzyDeviceId,
        "ReasonCode": ReasonCode,
    }

    # Convert dictionary to a single-row DataFrame
    combined_df = pd.DataFrame([data])
    combined_df.columns = combined_df.columns.str.lower()
    
    # Uncomment Below For Testing using Uprova Batch Data 
    # combined_df["educationlevel"] = None
    # combined_df["monthsatresidence"] = None
    # combined_df["ownhome"] = False
    # combined_df['lengthatbank'] = 0

    combined_df["application_email_address"] = combined_df["application_email_address"].str.lower()
    if Blob:
        combined_df["blob"] = combined_df["blob"].apply(sanitize_blob)

        # Step 2: Extract values using the expressions dictionary
        for column, expressions_list in expressions.items():
            def _extract_with_fallback(blob_obj):
                values = []
                for expr in expressions_list:
                    val = extract_value(blob_obj, expr)
                    if val is None and isinstance(expr, str) and expr.startswith("Blob."):
                        val = extract_value(blob_obj, expr[len("Blob.") :])
                    values.append(val)
                return coalesce(*values)

            extracted = combined_df["blob"].apply(_extract_with_fallback)
            if column in combined_df.columns:
                combined_df[column] = extracted.where(extracted.notnull(), combined_df[column])
            else:
                combined_df[column] = extracted

        # logger.info("pre_flowx data")
        # logger.info(combined_df.iloc[0].drop("blob").to_dict())
    else:
        for column in expressions:
            combined_df[column] = None
        # logger.info("pre_flowx data")
        # logger.info(combined_df.iloc[0].to_dict())
    df_m1, df_m2, df_thx = pre_processing_all(combined_df)
    # logger.info("pre_processed data m1")
    # logger.info(df_m1.iloc[0].to_dict())
    # logger.info("pre_processed data m2")
    # logger.info(df_m2.iloc[0].to_dict())

    processed_m1, processed_m2, df_thx = processing_all(df_m1, df_m2, df_thx)
    # logger.info("processed_data m1")
    # logger.info(processed_m1.iloc[0].to_dict())
    # logger.info("processed_data m2")
    # logger.info(processed_m2.iloc[0].to_dict())

    result = post_processing_all(processed_m1, processed_m2, df_thx)
    # State Check
    state_value = combined_df["state"].iloc[0]
    zip_value = combined_df["zip"].iloc[0]
    if (pd.notnull(state_value) and state_value == "ZZ") or (
        pd.notnull(zip_value) and zip_value == "86445"
    ):
        result["hd_score_m1"] = 1250
        result["hd_score_m2"] = 1250
        result["hd_score_iso_m2"] = 1250
        # logger.info("post_processed_data after state check")
        # logger.info(result)

    # Normalize numeric scores to built-in float so JSON encoding (e.g. Temporal)
    # does not fail on NumPy scalar types like np.float32/np.float64.
    for key in ("hd_score_m1", "hd_score_m2", "hd_score_iso_m2"):
        if key in result and result[key] is not None:
            try:
                result[key] = float(result[key])
            except (TypeError, ValueError):
                logger.warning("Failed to cast %s=%r to float", key, result[key])

    print(result)

    return result
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								import logging
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								import jmespath
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								import json_repair
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								import pandas as pd
 								import regex as re
 								from pre_processing import pre_processing_all
 								from processing import processing_all
 								from post_processing import post_processing_all
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
 								# Configure logging
 								logging.basicConfig(
 								    level=logging.INFO,
 								    format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
 								)
 								logger = logging.getLogger(__name__)
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								_JSON_LIKE = re.compile(r"^\s*\?*[\{\[].*[\}\]]\s*$", re.DOTALL)
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
 								def extract_value(blob, expression):
 								    try:
 								        return jmespath.search(expression, blob)
 								    except Exception:
 								        return None
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								def coalesce(*args):
 								    for value in args:
 								        if value is not None:
 								            return value
 								    return None
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
 								def deep_repair(obj):
 								    # 1) If it's a string that *looks* like JSON (with or without one leading '?'),
 								    #    strip exactly one leading '?', reparses, and recurse.
 								    if isinstance(obj, str):
 								        s = obj.strip()
 								        if _JSON_LIKE.match(s):
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								            if s.startswith("?"):
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								                s = s[1:]
 								            parsed = json_repair.loads(s)
 								            return deep_repair(parsed)
 								        return obj
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								    # 2) Dict – recurse on each value
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								    if isinstance(obj, dict):
 								        return {k: deep_repair(v) for k, v in obj.items()}
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								    # 3) List – recurse on each element
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								    if isinstance(obj, list):
 								        return [deep_repair(v) for v in obj]
 								    # 4) Otherwise, leave it alone
 								    return obj
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								def sanitize_blob(blob):
 								    try:
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								        return deep_repair(blob)
 								    except Exception as e:
 								        logger.error("Failed to sanitize blob: %s", e)
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								        return None
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
 								# Expressions to extract values (M1 + added M2 fields)
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								expressions = {
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								    # M1 (existing)
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    "first_seen_days": [
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].first_seen_days)[0]",
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								        'Blob."emailage.emailriskscore.first_seen_days"',
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								        "Blob.tps_vendor_raw_response.query.results[0].first_seen_days",
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    ],
 								    "ea_score": [
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								        "Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].EAScore",
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								        'Blob."emailage.emailriskscore.eascore"',
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								        "Blob.tps_vendor_raw_response.query.results[0].EAScore",
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    ],
 								    "email_creation_days": [
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].email_creation_days)[0]",
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								        "Blob.tps_vendor_raw_response.query.results[0].email_creation_days",
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    ],
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								    "summary_risk_score": ["Blob.summary_risk_score"],
 								    "digital_id_trust_score_rating": ["Blob.digital_id_trust_score_rating"],
 								    "os_version": ["Blob.os_version"],
 								    "account_email_worst_score": ["Blob.account_email_worst_score"],
 								    "true_ip_score": ["Blob.true_ip_score"],
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    "ip_net_speed_cell": [
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_netSpeedCell)[0]",
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								        "Blob.tps_vendor_raw_response.query.results[0].ip_netSpeedCell",
 								    ],
 								    "account_email_score": ["Blob.account_email_score"],
 								    "true_ip_worst_score": ["Blob.true_ip_worst_score"],
 								    "proxy_ip_worst_score": ["Blob.proxy_ip_worst_score"],
 								    "proxy_ip_score": ["Blob.proxy_ip_score"],
 								    "fuzzy_device_score": ["Blob.fuzzy_device_score"],
 								    "ip_region_confidence": [
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_regionconf)[0]",
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								        "Blob.tps_vendor_raw_response.query.results[0].ip_regionconf",
-												Add initial files

											
										
										
											2025-01-17 16:20:44 +00:00
+								    ],
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								    "true_ip_state_confidence": ["Blob.true_ip_state_confidence"],
 								    "fuzzy_device_worst_score": ["Blob.fuzzy_device_worst_score"],
 								    "digital_id_confidence_rating": ["Blob.digital_id_confidence_rating"],
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								    "trueipgeo": ["TrueIpGeo", "Blob.true_ip_geo"],
 								    # M2 additions
 								    "policy_score": ["Blob.policy_score"],
 								    "digital_id_trust_score": ["Blob.digital_id_trust_score"],
 								    "proxy_score": ["Blob.proxy_score"],
 								    "browser_spoof_score": ["Blob.browser_spoof_score"],
 								    "input_ip_connection_type": ["Blob.input_ip_connection_type"],
 								    "fuzzy_device_id_confidence": ["Blob.fuzzy_device_id_confidence"],
 								    "fraudrisk": [
 								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].fraudRisk)[0]",
 								        "Blob.tps_vendor_raw_response.query.results[0].fraudRisk",
 								        'Blob."emailage.emailriskscore.fraudRisk"',
 								    ],
 								    "overalldigitalidentityscore": [
 								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].overallDigitalIdentityScore)[0]",
 								        "Blob.tps_vendor_raw_response.query.results[0].overallDigitalIdentityScore",
 								        'Blob."emailage.emailriskscore.overallDigitalIdentityScore"',
 								    ],
 								    "totalhits": [
 								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].totalhits)[0]",
 								        "Blob.tps_vendor_raw_response.query.results[0].totalhits",
 								        'Blob."emailage.emailriskscore.totalhits"',
 								    ],
 								    "uniquehits": [
 								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].uniquehits)[0]",
 								        "Blob.tps_vendor_raw_response.query.results[0].uniquehits",
 								        'Blob."emailage.emailriskscore.uniquehits"',
 								    ],
 								    "emailtofullnameconfidence": [
 								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].emailToFullNameConfidence)[0]",
 								        "Blob.tps_vendor_raw_response.query.results[0].emailToFullNameConfidence",
 								        'Blob."emailage.emailriskscore.emailToFullNameConfidence"',
 								    ],
 								    "emailtolastnameconfidence": [
 								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].emailToLastNameConfidence)[0]",
 								        "Blob.tps_vendor_raw_response.query.results[0].emailToLastNameConfidence",
 								        'Blob."emailage.emailriskscore.emailToLastNameConfidence"',
 								    ],
 								    "domain_creation_days": [
 								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].domain_creation_days)[0]",
 								        "Blob.tps_vendor_raw_response.query.results[0].domain_creation_days",
 								        'Blob."emailage.emailriskscore.domain_creation_days"',
 								    ],
 								    "iptophoneconfidence": [
 								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ipToPhoneConfidence)[0]",
 								        "Blob.tps_vendor_raw_response.query.results[0].ipToPhoneConfidence",
 								        'Blob."emailage.emailriskscore.ipToPhoneConfidence"',
 								    ],
 								    "di_autofill_count_login": [
 								        "Blob.tmx_variables.di_autofill_count_login",
 								        "Blob.policy_details_api.policy_detail_api.customer.rules.vars.variable.di_autofill_count_login",
 								    ],
 								    "accphone_gbl_velocity_hour": [
 								        "Blob.tmx_variables.accphone_gbl_velocity_hour",
 								        "Blob.tmx_variables._accphone_gbl_velocity_hour",
 								    ],
 								    # Lat/long fields for distance engineering
 								    "ip_latitude": [
 								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_latitude)[0]",
 								        "Blob.tps_vendor_raw_response.query.results[0].ip_latitude",
 								    ],
 								    "ip_longitude": [
 								        "(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_longitude)[0]",
 								        "Blob.tps_vendor_raw_response.query.results[0].ip_longitude",
 								    ],
 								    "tps_ip_latitude": ["Blob.tps_vendor_raw_response.query.results[0].ip_latitude"],
 								    "tps_ip_longitude": ["Blob.tps_vendor_raw_response.query.results[0].ip_longitude"],
 								    "true_ip_latitude": ["Blob.true_ip_latitude"],
 								    "true_ip_longitude": ["Blob.true_ip_longitude"],
 								    "proxy_ip_latitude": ["Blob.proxy_ip_latitude"],
 								    "proxy_ip_longitude": ["Blob.proxy_ip_longitude"],
 								    "dns_ip_latitude": ["Blob.dns_ip_latitude"],
 								    "dns_ip_longitude": ["Blob.dns_ip_longitude"],
 								    "input_ip_latitude": ["Blob.input_ip_latitude"],
 								    "input_ip_longitude": ["Blob.input_ip_longitude"],
 								    # First-seen timestamps for age deltas
 								    "digital_id_first_seen": ["Blob.digital_id_first_seen"],
 								    "account_email_first_seen": ["Blob.account_email_first_seen"],
 								    "account_login_first_seen": ["Blob.account_login_first_seen"],
 								    "account_telephone_first_seen": ["Blob.account_telephone_first_seen"],
 								    "true_ip_first_seen": ["Blob.true_ip_first_seen"],
 								    "ssn_hash_first_seen": ["Blob.ssn_hash_first_seen"],
 								    "fuzzy_device_first_seen": ["Blob.fuzzy_device_first_seen"],
 								    "national_id_first_seen": ["Blob.national_id_first_seen"],
 								    "proxy_ip_first_seen": ["Blob.proxy_ip_first_seen"],
 								    # Attribute arrays (used for one-hot style parsing)
 								    "account_name_activities": ["Blob.account_name_activities"],
 								    "account_email_attributes": ["Blob.account_email_attributes"],
 								    "true_ip_attributes": ["Blob.true_ip_attributes"],
 								    "true_ip_activities": ["Blob.true_ip_activities"],
 								    "digital_id_attributes": ["Blob.digital_id_attributes"],
 								    "account_telephone_attributes": ["Blob.account_telephone_attributes"],
 								    "cpu_clock": ["Blob.cpu_clock"]
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								}
 								def __main__(
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								    # Application->
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    application_key: str,
 								    application_timestamp: str,
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								    application_ssn: str,
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    application_email_address: str,
 								    application_bank_account_number: str,
 								    application_is_rejected: str,
 								    application_date_of_birth: str,
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								    # uprovaloanapplication->
 								    educationlevel: str,
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    employmentstatus: str,
 								    lengthatbank: str,
 								    lengthatjob: str,
 								    ownhome: str,
 								    payfrequency: str,
 								    monthsatresidence: str,
-												Upload files to "/"

											
										
										
											2025-03-31 12:36:37 +00:00
+								    state: str,
-												Upload files to "/"

											
										
										
											2025-03-31 13:53:20 +00:00
+								    zip: str,
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								    # thxresponse->
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    EventType: str,
 								    DigitalIdConfidence: str,
 								    RiskRating: str,
 								    TmxSummaryReasonCode: str,
 								    TrueIpGeo: str,
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								    Blob: str,
 								    DeviceId: str,
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								    FuzzyDeviceId: str,
 								    ReasonCode: str,
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								) -> dict:
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    # Convert input parameters into a flat dictionary
 								    data = {
-												Upload files to "/"

											
										
										
											2025-07-11 14:42:06 +00:00
+								        "application_key": application_key,
 								        "application_timestamp": application_timestamp,
 								        "application_ssn ": application_ssn,
 								        "application_email_address": application_email_address,
 								        "application_bank_account_number": application_bank_account_number,
 								        "application_is_rejected": application_is_rejected,
 								        "application_date_of_birth": application_date_of_birth,
 								        "educationlevel": educationlevel,
 								        "employmentstatus": employmentstatus,
 								        "lengthatbank": lengthatbank,
 								        "lengthatjob": lengthatjob,
 								        "ownhome": ownhome,
 								        "payfrequency": payfrequency,
 								        "monthsatresidence": monthsatresidence,
 								        "state": state,
 								        "zip": zip,
 								        "EventType": EventType,
 								        "DigitalIdConfidence": DigitalIdConfidence,
 								        "RiskRating": RiskRating,
 								        "TmxSummaryReasonCode": TmxSummaryReasonCode,
 								        "TrueIpGeo": TrueIpGeo,
 								        "Blob": Blob,
 								        "DeviceId": DeviceId,
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								        "FuzzyDeviceId": FuzzyDeviceId,
 								        "ReasonCode": ReasonCode,
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    }
 								    # Convert dictionary to a single-row DataFrame
 								    combined_df = pd.DataFrame([data])
 								    combined_df.columns = combined_df.columns.str.lower()
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
 								    # Uncomment Below For Testing using Uprova Batch Data
 								    # combined_df["educationlevel"] = None
 								    # combined_df["monthsatresidence"] = None
 								    # combined_df["ownhome"] = False
 								    # combined_df['lengthatbank'] = 0
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    combined_df["application_email_address"] = combined_df["application_email_address"].str.lower()
 								    if Blob:
 								        combined_df["blob"] = combined_df["blob"].apply(sanitize_blob)
 								        # Step 2: Extract values using the expressions dictionary
 								        for column, expressions_list in expressions.items():
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								            def _extract_with_fallback(blob_obj):
 								                values = []
 								                for expr in expressions_list:
 								                    val = extract_value(blob_obj, expr)
 								                    if val is None and isinstance(expr, str) and expr.startswith("Blob."):
 								                        val = extract_value(blob_obj, expr[len("Blob.") :])
 								                    values.append(val)
 								                return coalesce(*values)
 								            extracted = combined_df["blob"].apply(_extract_with_fallback)
 								            if column in combined_df.columns:
 								                combined_df[column] = extracted.where(extracted.notnull(), combined_df[column])
 								            else:
 								                combined_df[column] = extracted
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								        # logger.info("pre_flowx data")
 								        # logger.info(combined_df.iloc[0].drop("blob").to_dict())
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								    else:
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								        for column in expressions:
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
+								            combined_df[column] = None
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								        # logger.info("pre_flowx data")
 								        # logger.info(combined_df.iloc[0].to_dict())
 								    df_m1, df_m2, df_thx = pre_processing_all(combined_df)
 								    # logger.info("pre_processed data m1")
 								    # logger.info(df_m1.iloc[0].to_dict())
 								    # logger.info("pre_processed data m2")
 								    # logger.info(df_m2.iloc[0].to_dict())
 								    processed_m1, processed_m2, df_thx = processing_all(df_m1, df_m2, df_thx)
 								    # logger.info("processed_data m1")
 								    # logger.info(processed_m1.iloc[0].to_dict())
 								    # logger.info("processed_data m2")
 								    # logger.info(processed_m2.iloc[0].to_dict())
 								    result = post_processing_all(processed_m1, processed_m2, df_thx)
-												Upload files to "/"

											
										
										
											2025-03-31 12:36:37 +00:00
+								    # State Check
 								    state_value = combined_df["state"].iloc[0]
-												Upload files to "/"

											
										
										
											2025-03-31 13:53:20 +00:00
+								    zip_value = combined_df["zip"].iloc[0]
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								    if (pd.notnull(state_value) and state_value == "ZZ") or (
 								        pd.notnull(zip_value) and zip_value == "86445"
 								    ):
-												Upload files to "/"

											
										
										
											2025-03-31 12:36:37 +00:00
+								        result["hd_score_m1"] = 1250
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								        result["hd_score_m2"] = 1250
 								        result["hd_score_iso_m2"] = 1250
 								        # logger.info("post_processed_data after state check")
 								        # logger.info(result)
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								    # Normalize numeric scores to built-in float so JSON encoding (e.g. Temporal)
 								    # does not fail on NumPy scalar types like np.float32/np.float64.
 								    for key in ("hd_score_m1", "hd_score_m2", "hd_score_iso_m2"):
 								        if key in result and result[key] is not None:
 								            try:
 								                result[key] = float(result[key])
 								            except (TypeError, ValueError):
 								                logger.warning("Failed to cast %s=%r to float", key, result[key])
-												Advanced M series V1 model block

											
										
										
											2025-03-12 16:12:18 +00:00
-												Sync m-1-v-1 block with local updates

											
										
										
											2025-11-23 23:22:32 -05:00
+								    print(result)
 								    return result