From 26d3ab3a9e600d40d16643f20f3201c80addcdfa Mon Sep 17 00:00:00 2001 From: Ankur Malik Date: Wed, 20 May 2026 13:31:40 -0400 Subject: [PATCH] Add db lookup IP velocity context --- main.sql | 61 ++++++++++++++++++++++++++++++++++++++++++++- request_schema.json | 32 ++++++++++++++++-------- 2 files changed, 82 insertions(+), 11 deletions(-) diff --git a/main.sql b/main.sql index 7fdebe9..3bf0f20 100644 --- a/main.sql +++ b/main.sql @@ -5,6 +5,7 @@ WITH abc AS ( CAST(app.application_date_of_birth AS DATE) AS application_date_of_birth, LOWER(app.application_email_address) AS application_email_address, app.application_is_rejected, + app.application_customer_type, NULLIF(app.application_ssn, '') AS application_ssn, NULLIF(app.application_bank_account_number, '') AS application_bank_account_number, loans.originated_loan_is_fraud, @@ -19,6 +20,7 @@ WITH abc AS ( CAST(NULLIF(NULLIF(LOWER(TRIM(thx.tps_ip_longitude::text)), ''), 'none') AS DOUBLE PRECISION) AS tps_ip_longitude, CAST(NULLIF(NULLIF(thx.account_telephone_first_seen::text, ''),'None') AS DATE) AS account_telephone_first_seen, CAST(NULLIF(NULLIF(thx.account_login_first_seen::text, ''),'None') AS DATE) AS account_login_first_seen, + NULLIF(NULLIF(LOWER(TRIM(up.zip::text)), ''), 'none') AS zip, CAST(NULLIF(NULLIF(LOWER(TRIM(ref.latitute_ref::text)), ''), 'none') AS DOUBLE PRECISION) AS latitute_ref, CAST(NULLIF(NULLIF(LOWER(TRIM(ref.longitude_ref::text)), ''), 'none') AS DOUBLE PRECISION) AS longitude_ref, CAST(NULLIF(NULLIF(LOWER(TRIM(scores.hd_score_m2::text)), ''), 'none') AS DOUBLE PRECISION) AS hd_score_m2 @@ -49,6 +51,12 @@ current_app AS ( SELECT application_key AS current_app_key, application_timestamp AS current_app_created_at, + application_ssn, + application_customer_type, + zip, + $input_ip_address AS input_ip_address, + $input_ip_connection_type AS input_ip_connection_type, + $input_ip_isp AS input_ip_isp, $hd_score_m1 AS hd_score_m1, $hd_score_iso_m2 AS hd_score_iso_m2, hd_score_m2 @@ -242,9 +250,58 @@ clusters_g2 AS ( FROM current_app cur LEFT JOIN full_apps_past_g2 agg ON cur.current_app_key = agg.current_app_key +), + +input_ip_velocity_population AS ( + SELECT + cur.current_app_key, + NULLIF(app.application_ssn, '') AS application_ssn, + NULLIF(NULLIF(LOWER(TRIM(up.zip::text)), ''), 'none') AS zip + FROM current_app cur + JOIN public.thx AS thx + ON thx.input_ip_address = cur.input_ip_address + JOIN public.applications AS app + ON app.application_key = thx.application_key + LEFT JOIN public.uprova AS up + ON up.application_key = app.application_key + WHERE cur.input_ip_address IS NOT NULL + AND LOWER(TRIM(cur.input_ip_address::text)) NOT IN ('', 'nan', 'null', 'none', 'n/a', '0.0.0.0', 'unknown') + AND CAST(app.application_timestamp AS TIMESTAMP) >= cur.current_app_created_at - INTERVAL '24 hours' + AND CAST(app.application_timestamp AS TIMESTAMP) <= cur.current_app_created_at + + UNION ALL + + SELECT + current_app_key, + application_ssn, + zip + FROM current_app + WHERE input_ip_address IS NOT NULL + AND LOWER(TRIM(input_ip_address::text)) NOT IN ('', 'nan', 'null', 'none', 'n/a', '0.0.0.0', 'unknown') +), + +input_ip_velocity AS ( + SELECT + current_app_key, + COUNT(DISTINCT CASE + WHEN LOWER(TRIM(application_ssn::text)) NOT IN ('', 'nan', 'null', 'none', 'n/a') + THEN application_ssn + ELSE NULL + END) AS input_ip_distinct_ssn_24h, + COUNT(DISTINCT CASE + WHEN LOWER(TRIM(zip::text)) NOT IN ('', 'nan', 'null', 'none', 'n/a') + THEN zip + ELSE NULL + END) AS input_ip_distinct_zip_24h + FROM input_ip_velocity_population + GROUP BY current_app_key ) SELECT + (SELECT MAX(application_customer_type) FROM current_app) AS application_customer_type, + $input_ip_address AS input_ip_address, + $input_ip_connection_type AS input_ip_connection_type, + $input_ip_isp AS input_ip_isp, COALESCE(MAX(c.hd_score_m1), $hd_score_m1) AS hd_score_m1, COALESCE(MAX(c.hd_score_iso_m2), $hd_score_iso_m2) AS hd_score_iso_m2, COALESCE(MAX(c.cluster_size_users_v2), 0) AS cluster_size_users_v2, @@ -273,7 +330,9 @@ SELECT MAX(g2.true_ip_first_seen_max_conn) AS true_ip_first_seen_max_conn, MAX(g2.hd_score_m2_connected_max) AS hd_score_m2_connected_max, MAX(g2.pct_acc_email_attr_challenged_1_conn) AS pct_acc_email_attr_challenged_1_conn, - MAX(g2.dist_em_ip_ref_km_min_conn) AS dist_em_ip_ref_km_min_conn + MAX(g2.dist_em_ip_ref_km_min_conn) AS dist_em_ip_ref_km_min_conn, + COALESCE((SELECT MAX(input_ip_distinct_ssn_24h) FROM input_ip_velocity), 0) AS input_ip_distinct_ssn_24h, + COALESCE((SELECT MAX(input_ip_distinct_zip_24h) FROM input_ip_velocity), 0) AS input_ip_distinct_zip_24h FROM clusters c LEFT JOIN clusters_g2 g2 ON c.application_key = g2.application_key; diff --git a/request_schema.json b/request_schema.json index 6d5982a..10972a1 100644 --- a/request_schema.json +++ b/request_schema.json @@ -14,13 +14,25 @@ "type": ["number", "null"], "description": "HD fraud score M2." }, - "connected_application_keys": { - "type": "array", - "items": { - "type": ["string", "null"] - }, - "description": "List of connected application keys associated with this application." - } - }, - "required": [] -} + "connected_application_keys": { + "type": "array", + "items": { + "type": ["string", "null"] + }, + "description": "List of connected application keys associated with this application." + }, + "input_ip_address": { + "type": ["string", "null"], + "description": "Current application input IP address from THX." + }, + "input_ip_connection_type": { + "type": ["string", "null"], + "description": "Current application THX input IP connection type." + }, + "input_ip_isp": { + "type": ["string", "null"], + "description": "Current application THX input IP ISP." + } + }, + "required": [] +}