2025-03-12 16:12:18 +00:00
|
|
|
|
import logging
|
2025-11-23 23:22:32 -05:00
|
|
|
|
|
2025-03-12 16:12:18 +00:00
|
|
|
|
import jmespath
|
2025-07-11 14:42:06 +00:00
|
|
|
|
import json_repair
|
2025-11-23 23:22:32 -05:00
|
|
|
|
import pandas as pd
|
|
|
|
|
|
import regex as re
|
|
|
|
|
|
from pre_processing import pre_processing_all
|
|
|
|
|
|
from processing import processing_all
|
|
|
|
|
|
from post_processing import post_processing_all
|
2025-07-11 14:42:06 +00:00
|
|
|
|
|
2025-03-12 16:12:18 +00:00
|
|
|
|
|
|
|
|
|
|
# Configure logging
|
|
|
|
|
|
logging.basicConfig(
|
|
|
|
|
|
level=logging.INFO,
|
|
|
|
|
|
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
|
|
|
|
|
|
)
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
2025-11-23 23:22:32 -05:00
|
|
|
|
_JSON_LIKE = re.compile(r"^\s*\?*[\{\[].*[\}\]]\s*$", re.DOTALL)
|
2025-07-11 14:42:06 +00:00
|
|
|
|
|
2025-03-12 16:12:18 +00:00
|
|
|
|
|
|
|
|
|
|
def extract_value(blob, expression):
|
|
|
|
|
|
try:
|
|
|
|
|
|
return jmespath.search(expression, blob)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
2025-11-23 23:22:32 -05:00
|
|
|
|
|
2025-03-12 16:12:18 +00:00
|
|
|
|
def coalesce(*args):
|
|
|
|
|
|
for value in args:
|
|
|
|
|
|
if value is not None:
|
|
|
|
|
|
return value
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
2025-07-11 14:42:06 +00:00
|
|
|
|
|
|
|
|
|
|
def deep_repair(obj):
|
|
|
|
|
|
# 1) If it's a string that *looks* like JSON (with or without one leading '?'),
|
|
|
|
|
|
# strip exactly one leading '?', reparses, and recurse.
|
|
|
|
|
|
if isinstance(obj, str):
|
|
|
|
|
|
s = obj.strip()
|
|
|
|
|
|
if _JSON_LIKE.match(s):
|
2025-11-23 23:22:32 -05:00
|
|
|
|
if s.startswith("?"):
|
2025-07-11 14:42:06 +00:00
|
|
|
|
s = s[1:]
|
|
|
|
|
|
parsed = json_repair.loads(s)
|
|
|
|
|
|
return deep_repair(parsed)
|
|
|
|
|
|
return obj
|
|
|
|
|
|
|
2025-11-23 23:22:32 -05:00
|
|
|
|
# 2) Dict – recurse on each value
|
2025-07-11 14:42:06 +00:00
|
|
|
|
if isinstance(obj, dict):
|
|
|
|
|
|
return {k: deep_repair(v) for k, v in obj.items()}
|
|
|
|
|
|
|
2025-11-23 23:22:32 -05:00
|
|
|
|
# 3) List – recurse on each element
|
2025-07-11 14:42:06 +00:00
|
|
|
|
if isinstance(obj, list):
|
|
|
|
|
|
return [deep_repair(v) for v in obj]
|
|
|
|
|
|
|
|
|
|
|
|
# 4) Otherwise, leave it alone
|
|
|
|
|
|
return obj
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-03-12 16:12:18 +00:00
|
|
|
|
def sanitize_blob(blob):
|
|
|
|
|
|
try:
|
2025-07-11 14:42:06 +00:00
|
|
|
|
return deep_repair(blob)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error("Failed to sanitize blob: %s", e)
|
2025-03-12 16:12:18 +00:00
|
|
|
|
return None
|
2025-11-23 23:22:32 -05:00
|
|
|
|
|
|
|
|
|
|
# Expressions to extract values (M1 + added M2 fields)
|
2025-03-12 16:12:18 +00:00
|
|
|
|
expressions = {
|
2025-11-23 23:22:32 -05:00
|
|
|
|
# M1 (existing)
|
2025-03-12 16:12:18 +00:00
|
|
|
|
"first_seen_days": [
|
2025-07-11 14:42:06 +00:00
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].first_seen_days)[0]",
|
2025-11-23 23:22:32 -05:00
|
|
|
|
'Blob."emailage.emailriskscore.first_seen_days"',
|
2025-07-11 14:42:06 +00:00
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].first_seen_days",
|
2025-03-12 16:12:18 +00:00
|
|
|
|
],
|
|
|
|
|
|
"ea_score": [
|
2025-11-23 23:22:32 -05:00
|
|
|
|
"Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].EAScore",
|
2025-07-11 14:42:06 +00:00
|
|
|
|
'Blob."emailage.emailriskscore.eascore"',
|
2025-11-23 23:22:32 -05:00
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].EAScore",
|
2025-03-12 16:12:18 +00:00
|
|
|
|
],
|
|
|
|
|
|
"email_creation_days": [
|
2025-11-23 23:22:32 -05:00
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].email_creation_days)[0]",
|
2025-07-11 14:42:06 +00:00
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].email_creation_days",
|
2025-03-12 16:12:18 +00:00
|
|
|
|
],
|
2025-07-11 14:42:06 +00:00
|
|
|
|
"summary_risk_score": ["Blob.summary_risk_score"],
|
|
|
|
|
|
"digital_id_trust_score_rating": ["Blob.digital_id_trust_score_rating"],
|
|
|
|
|
|
"os_version": ["Blob.os_version"],
|
|
|
|
|
|
"account_email_worst_score": ["Blob.account_email_worst_score"],
|
|
|
|
|
|
"true_ip_score": ["Blob.true_ip_score"],
|
2025-03-12 16:12:18 +00:00
|
|
|
|
"ip_net_speed_cell": [
|
2025-11-23 23:22:32 -05:00
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_netSpeedCell)[0]",
|
2025-07-11 14:42:06 +00:00
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].ip_netSpeedCell",
|
|
|
|
|
|
],
|
|
|
|
|
|
"account_email_score": ["Blob.account_email_score"],
|
|
|
|
|
|
"true_ip_worst_score": ["Blob.true_ip_worst_score"],
|
|
|
|
|
|
"proxy_ip_worst_score": ["Blob.proxy_ip_worst_score"],
|
|
|
|
|
|
"proxy_ip_score": ["Blob.proxy_ip_score"],
|
|
|
|
|
|
"fuzzy_device_score": ["Blob.fuzzy_device_score"],
|
|
|
|
|
|
"ip_region_confidence": [
|
2025-11-23 23:22:32 -05:00
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_regionconf)[0]",
|
2025-07-11 14:42:06 +00:00
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].ip_regionconf",
|
2025-01-17 16:20:44 +00:00
|
|
|
|
],
|
2025-07-11 14:42:06 +00:00
|
|
|
|
"true_ip_state_confidence": ["Blob.true_ip_state_confidence"],
|
|
|
|
|
|
"fuzzy_device_worst_score": ["Blob.fuzzy_device_worst_score"],
|
|
|
|
|
|
"digital_id_confidence_rating": ["Blob.digital_id_confidence_rating"],
|
2025-11-23 23:22:32 -05:00
|
|
|
|
"trueipgeo": ["TrueIpGeo", "Blob.true_ip_geo"],
|
|
|
|
|
|
# M2 additions
|
|
|
|
|
|
"policy_score": ["Blob.policy_score"],
|
|
|
|
|
|
"digital_id_trust_score": ["Blob.digital_id_trust_score"],
|
|
|
|
|
|
"proxy_score": ["Blob.proxy_score"],
|
|
|
|
|
|
"browser_spoof_score": ["Blob.browser_spoof_score"],
|
|
|
|
|
|
"input_ip_connection_type": ["Blob.input_ip_connection_type"],
|
|
|
|
|
|
"fuzzy_device_id_confidence": ["Blob.fuzzy_device_id_confidence"],
|
|
|
|
|
|
"fraudrisk": [
|
|
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].fraudRisk)[0]",
|
|
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].fraudRisk",
|
|
|
|
|
|
'Blob."emailage.emailriskscore.fraudRisk"',
|
|
|
|
|
|
],
|
|
|
|
|
|
"overalldigitalidentityscore": [
|
|
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].overallDigitalIdentityScore)[0]",
|
|
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].overallDigitalIdentityScore",
|
|
|
|
|
|
'Blob."emailage.emailriskscore.overallDigitalIdentityScore"',
|
|
|
|
|
|
],
|
|
|
|
|
|
"totalhits": [
|
|
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].totalhits)[0]",
|
|
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].totalhits",
|
|
|
|
|
|
'Blob."emailage.emailriskscore.totalhits"',
|
|
|
|
|
|
],
|
|
|
|
|
|
"uniquehits": [
|
|
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].uniquehits)[0]",
|
|
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].uniquehits",
|
|
|
|
|
|
'Blob."emailage.emailriskscore.uniquehits"',
|
|
|
|
|
|
],
|
|
|
|
|
|
"emailtofullnameconfidence": [
|
|
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].emailToFullNameConfidence)[0]",
|
|
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].emailToFullNameConfidence",
|
|
|
|
|
|
'Blob."emailage.emailriskscore.emailToFullNameConfidence"',
|
|
|
|
|
|
],
|
|
|
|
|
|
"emailtolastnameconfidence": [
|
|
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].emailToLastNameConfidence)[0]",
|
|
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].emailToLastNameConfidence",
|
|
|
|
|
|
'Blob."emailage.emailriskscore.emailToLastNameConfidence"',
|
|
|
|
|
|
],
|
|
|
|
|
|
"domain_creation_days": [
|
|
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].domain_creation_days)[0]",
|
|
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].domain_creation_days",
|
|
|
|
|
|
'Blob."emailage.emailriskscore.domain_creation_days"',
|
|
|
|
|
|
],
|
|
|
|
|
|
"iptophoneconfidence": [
|
|
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ipToPhoneConfidence)[0]",
|
|
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].ipToPhoneConfidence",
|
|
|
|
|
|
'Blob."emailage.emailriskscore.ipToPhoneConfidence"',
|
|
|
|
|
|
],
|
|
|
|
|
|
"di_autofill_count_login": [
|
|
|
|
|
|
"Blob.tmx_variables.di_autofill_count_login",
|
|
|
|
|
|
"Blob.policy_details_api.policy_detail_api.customer.rules.vars.variable.di_autofill_count_login",
|
|
|
|
|
|
],
|
|
|
|
|
|
"accphone_gbl_velocity_hour": [
|
|
|
|
|
|
"Blob.tmx_variables.accphone_gbl_velocity_hour",
|
|
|
|
|
|
"Blob.tmx_variables._accphone_gbl_velocity_hour",
|
|
|
|
|
|
],
|
|
|
|
|
|
# Lat/long fields for distance engineering
|
|
|
|
|
|
"ip_latitude": [
|
|
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_latitude)[0]",
|
|
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].ip_latitude",
|
|
|
|
|
|
],
|
|
|
|
|
|
"ip_longitude": [
|
|
|
|
|
|
"(Blob.integration_hub_results.*.tps_vendor_raw_response.query.results[0].ip_longitude)[0]",
|
|
|
|
|
|
"Blob.tps_vendor_raw_response.query.results[0].ip_longitude",
|
|
|
|
|
|
],
|
|
|
|
|
|
"tps_ip_latitude": ["Blob.tps_vendor_raw_response.query.results[0].ip_latitude"],
|
|
|
|
|
|
"tps_ip_longitude": ["Blob.tps_vendor_raw_response.query.results[0].ip_longitude"],
|
|
|
|
|
|
"true_ip_latitude": ["Blob.true_ip_latitude"],
|
|
|
|
|
|
"true_ip_longitude": ["Blob.true_ip_longitude"],
|
|
|
|
|
|
"proxy_ip_latitude": ["Blob.proxy_ip_latitude"],
|
|
|
|
|
|
"proxy_ip_longitude": ["Blob.proxy_ip_longitude"],
|
|
|
|
|
|
"dns_ip_latitude": ["Blob.dns_ip_latitude"],
|
|
|
|
|
|
"dns_ip_longitude": ["Blob.dns_ip_longitude"],
|
|
|
|
|
|
"input_ip_latitude": ["Blob.input_ip_latitude"],
|
|
|
|
|
|
"input_ip_longitude": ["Blob.input_ip_longitude"],
|
|
|
|
|
|
# First-seen timestamps for age deltas
|
|
|
|
|
|
"digital_id_first_seen": ["Blob.digital_id_first_seen"],
|
|
|
|
|
|
"account_email_first_seen": ["Blob.account_email_first_seen"],
|
|
|
|
|
|
"account_login_first_seen": ["Blob.account_login_first_seen"],
|
|
|
|
|
|
"account_telephone_first_seen": ["Blob.account_telephone_first_seen"],
|
|
|
|
|
|
"true_ip_first_seen": ["Blob.true_ip_first_seen"],
|
|
|
|
|
|
"ssn_hash_first_seen": ["Blob.ssn_hash_first_seen"],
|
|
|
|
|
|
"fuzzy_device_first_seen": ["Blob.fuzzy_device_first_seen"],
|
|
|
|
|
|
"national_id_first_seen": ["Blob.national_id_first_seen"],
|
|
|
|
|
|
"proxy_ip_first_seen": ["Blob.proxy_ip_first_seen"],
|
|
|
|
|
|
# Attribute arrays (used for one-hot style parsing)
|
|
|
|
|
|
"account_name_activities": ["Blob.account_name_activities"],
|
|
|
|
|
|
"account_email_attributes": ["Blob.account_email_attributes"],
|
|
|
|
|
|
"true_ip_attributes": ["Blob.true_ip_attributes"],
|
|
|
|
|
|
"true_ip_activities": ["Blob.true_ip_activities"],
|
|
|
|
|
|
"digital_id_attributes": ["Blob.digital_id_attributes"],
|
|
|
|
|
|
"account_telephone_attributes": ["Blob.account_telephone_attributes"],
|
|
|
|
|
|
"cpu_clock": ["Blob.cpu_clock"]
|
2025-03-12 16:12:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __main__(
|
2025-07-11 14:42:06 +00:00
|
|
|
|
# Application->
|
2025-03-12 16:12:18 +00:00
|
|
|
|
application_key: str,
|
|
|
|
|
|
application_timestamp: str,
|
2025-07-11 14:42:06 +00:00
|
|
|
|
application_ssn: str,
|
2025-03-12 16:12:18 +00:00
|
|
|
|
application_email_address: str,
|
|
|
|
|
|
application_bank_account_number: str,
|
|
|
|
|
|
application_is_rejected: str,
|
|
|
|
|
|
application_date_of_birth: str,
|
2025-07-11 14:42:06 +00:00
|
|
|
|
# uprovaloanapplication->
|
|
|
|
|
|
educationlevel: str,
|
2025-03-12 16:12:18 +00:00
|
|
|
|
employmentstatus: str,
|
|
|
|
|
|
lengthatbank: str,
|
|
|
|
|
|
lengthatjob: str,
|
|
|
|
|
|
ownhome: str,
|
|
|
|
|
|
payfrequency: str,
|
|
|
|
|
|
monthsatresidence: str,
|
2025-03-31 12:36:37 +00:00
|
|
|
|
state: str,
|
2025-03-31 13:53:20 +00:00
|
|
|
|
zip: str,
|
2025-07-11 14:42:06 +00:00
|
|
|
|
# thxresponse->
|
2025-03-12 16:12:18 +00:00
|
|
|
|
EventType: str,
|
|
|
|
|
|
DigitalIdConfidence: str,
|
|
|
|
|
|
RiskRating: str,
|
|
|
|
|
|
TmxSummaryReasonCode: str,
|
|
|
|
|
|
TrueIpGeo: str,
|
2025-07-11 14:42:06 +00:00
|
|
|
|
Blob: str,
|
|
|
|
|
|
DeviceId: str,
|
2025-11-23 23:22:32 -05:00
|
|
|
|
FuzzyDeviceId: str,
|
|
|
|
|
|
ReasonCode: str,
|
2025-07-11 14:42:06 +00:00
|
|
|
|
) -> dict:
|
2025-03-12 16:12:18 +00:00
|
|
|
|
# Convert input parameters into a flat dictionary
|
|
|
|
|
|
data = {
|
2025-07-11 14:42:06 +00:00
|
|
|
|
"application_key": application_key,
|
|
|
|
|
|
"application_timestamp": application_timestamp,
|
|
|
|
|
|
"application_ssn ": application_ssn,
|
|
|
|
|
|
"application_email_address": application_email_address,
|
|
|
|
|
|
"application_bank_account_number": application_bank_account_number,
|
|
|
|
|
|
"application_is_rejected": application_is_rejected,
|
|
|
|
|
|
"application_date_of_birth": application_date_of_birth,
|
|
|
|
|
|
"educationlevel": educationlevel,
|
|
|
|
|
|
"employmentstatus": employmentstatus,
|
|
|
|
|
|
"lengthatbank": lengthatbank,
|
|
|
|
|
|
"lengthatjob": lengthatjob,
|
|
|
|
|
|
"ownhome": ownhome,
|
|
|
|
|
|
"payfrequency": payfrequency,
|
|
|
|
|
|
"monthsatresidence": monthsatresidence,
|
|
|
|
|
|
"state": state,
|
|
|
|
|
|
"zip": zip,
|
|
|
|
|
|
"EventType": EventType,
|
|
|
|
|
|
"DigitalIdConfidence": DigitalIdConfidence,
|
|
|
|
|
|
"RiskRating": RiskRating,
|
|
|
|
|
|
"TmxSummaryReasonCode": TmxSummaryReasonCode,
|
|
|
|
|
|
"TrueIpGeo": TrueIpGeo,
|
|
|
|
|
|
"Blob": Blob,
|
|
|
|
|
|
"DeviceId": DeviceId,
|
2025-11-23 23:22:32 -05:00
|
|
|
|
"FuzzyDeviceId": FuzzyDeviceId,
|
|
|
|
|
|
"ReasonCode": ReasonCode,
|
2025-03-12 16:12:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Convert dictionary to a single-row DataFrame
|
|
|
|
|
|
combined_df = pd.DataFrame([data])
|
|
|
|
|
|
combined_df.columns = combined_df.columns.str.lower()
|
2025-11-23 23:22:32 -05:00
|
|
|
|
|
|
|
|
|
|
# Uncomment Below For Testing using Uprova Batch Data
|
|
|
|
|
|
# combined_df["educationlevel"] = None
|
|
|
|
|
|
# combined_df["monthsatresidence"] = None
|
|
|
|
|
|
# combined_df["ownhome"] = False
|
|
|
|
|
|
# combined_df['lengthatbank'] = 0
|
|
|
|
|
|
|
2025-03-12 16:12:18 +00:00
|
|
|
|
combined_df["application_email_address"] = combined_df["application_email_address"].str.lower()
|
|
|
|
|
|
if Blob:
|
|
|
|
|
|
combined_df["blob"] = combined_df["blob"].apply(sanitize_blob)
|
|
|
|
|
|
|
|
|
|
|
|
# Step 2: Extract values using the expressions dictionary
|
|
|
|
|
|
for column, expressions_list in expressions.items():
|
2025-11-23 23:22:32 -05:00
|
|
|
|
def _extract_with_fallback(blob_obj):
|
|
|
|
|
|
values = []
|
|
|
|
|
|
for expr in expressions_list:
|
|
|
|
|
|
val = extract_value(blob_obj, expr)
|
|
|
|
|
|
if val is None and isinstance(expr, str) and expr.startswith("Blob."):
|
|
|
|
|
|
val = extract_value(blob_obj, expr[len("Blob.") :])
|
|
|
|
|
|
values.append(val)
|
|
|
|
|
|
return coalesce(*values)
|
|
|
|
|
|
|
|
|
|
|
|
extracted = combined_df["blob"].apply(_extract_with_fallback)
|
|
|
|
|
|
if column in combined_df.columns:
|
|
|
|
|
|
combined_df[column] = extracted.where(extracted.notnull(), combined_df[column])
|
|
|
|
|
|
else:
|
|
|
|
|
|
combined_df[column] = extracted
|
2025-03-12 16:12:18 +00:00
|
|
|
|
|
2025-11-23 23:22:32 -05:00
|
|
|
|
# logger.info("pre_flowx data")
|
|
|
|
|
|
# logger.info(combined_df.iloc[0].drop("blob").to_dict())
|
2025-03-12 16:12:18 +00:00
|
|
|
|
else:
|
2025-11-23 23:22:32 -05:00
|
|
|
|
for column in expressions:
|
2025-03-12 16:12:18 +00:00
|
|
|
|
combined_df[column] = None
|
2025-11-23 23:22:32 -05:00
|
|
|
|
# logger.info("pre_flowx data")
|
|
|
|
|
|
# logger.info(combined_df.iloc[0].to_dict())
|
|
|
|
|
|
df_m1, df_m2, df_thx = pre_processing_all(combined_df)
|
|
|
|
|
|
# logger.info("pre_processed data m1")
|
|
|
|
|
|
# logger.info(df_m1.iloc[0].to_dict())
|
|
|
|
|
|
# logger.info("pre_processed data m2")
|
|
|
|
|
|
# logger.info(df_m2.iloc[0].to_dict())
|
|
|
|
|
|
|
|
|
|
|
|
processed_m1, processed_m2, df_thx = processing_all(df_m1, df_m2, df_thx)
|
|
|
|
|
|
# logger.info("processed_data m1")
|
|
|
|
|
|
# logger.info(processed_m1.iloc[0].to_dict())
|
|
|
|
|
|
# logger.info("processed_data m2")
|
|
|
|
|
|
# logger.info(processed_m2.iloc[0].to_dict())
|
|
|
|
|
|
|
|
|
|
|
|
result = post_processing_all(processed_m1, processed_m2, df_thx)
|
2025-03-31 12:36:37 +00:00
|
|
|
|
# State Check
|
|
|
|
|
|
state_value = combined_df["state"].iloc[0]
|
2025-03-31 13:53:20 +00:00
|
|
|
|
zip_value = combined_df["zip"].iloc[0]
|
2025-11-23 23:22:32 -05:00
|
|
|
|
if (pd.notnull(state_value) and state_value == "ZZ") or (
|
|
|
|
|
|
pd.notnull(zip_value) and zip_value == "86445"
|
|
|
|
|
|
):
|
2025-03-31 12:36:37 +00:00
|
|
|
|
result["hd_score_m1"] = 1250
|
2025-11-23 23:22:32 -05:00
|
|
|
|
result["hd_score_m2"] = 1250
|
|
|
|
|
|
result["hd_score_iso_m2"] = 1250
|
|
|
|
|
|
# logger.info("post_processed_data after state check")
|
|
|
|
|
|
# logger.info(result)
|
2025-03-12 16:12:18 +00:00
|
|
|
|
|
2025-11-23 23:22:32 -05:00
|
|
|
|
# Normalize numeric scores to built-in float so JSON encoding (e.g. Temporal)
|
|
|
|
|
|
# does not fail on NumPy scalar types like np.float32/np.float64.
|
|
|
|
|
|
for key in ("hd_score_m1", "hd_score_m2", "hd_score_iso_m2"):
|
|
|
|
|
|
if key in result and result[key] is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
result[key] = float(result[key])
|
|
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
|
|
logger.warning("Failed to cast %s=%r to float", key, result[key])
|
2025-03-12 16:12:18 +00:00
|
|
|
|
|
2025-11-23 23:22:32 -05:00
|
|
|
|
print(result)
|
|
|
|
|
|
|
|
|
|
|
|
return result
|