2025-03-12 16:18:58 +00:00
|
|
|
WITH abc AS (
|
2025-11-26 12:51:28 -05:00
|
|
|
SELECT DISTINCT ON (app.application_key)
|
2025-03-12 16:18:58 +00:00
|
|
|
app.application_key,
|
|
|
|
|
CAST(app.application_timestamp AS TIMESTAMP) AS application_timestamp,
|
2025-11-26 12:51:28 -05:00
|
|
|
CAST(app.application_date_of_birth AS DATE) AS application_date_of_birth,
|
2025-03-12 16:18:58 +00:00
|
|
|
LOWER(app.application_email_address) AS application_email_address,
|
|
|
|
|
app.application_is_rejected,
|
2025-11-26 12:51:28 -05:00
|
|
|
NULLIF(app.application_ssn, '') AS application_ssn,
|
2025-03-12 16:18:58 +00:00
|
|
|
NULLIF(app.application_bank_account_number, '') AS application_bank_account_number,
|
|
|
|
|
loans.originated_loan_is_fraud,
|
2025-11-26 12:51:28 -05:00
|
|
|
loans.originated_loan_is_never_paid,
|
|
|
|
|
CAST(CASE WHEN LOWER(TRIM(thx.digital_id_first_seen::text)) IN ('', 'none', 'null') THEN NULL ELSE thx.digital_id_first_seen::date END AS DATE) AS digital_id_first_seen,
|
|
|
|
|
CAST(NULLIF(NULLIF(LOWER(TRIM(thx.summary_risk_score::text)), ''), 'none') AS DOUBLE PRECISION) AS summary_risk_score,
|
|
|
|
|
CAST(NULLIF(NULLIF(LOWER(TRIM(thx.cpu_clock::text)), ''), 'none') AS DOUBLE PRECISION) AS cpu_clock,
|
|
|
|
|
CAST(NULLIF(NULLIF(thx.true_ip_first_seen::text, ''), 'None') AS DATE) AS true_ip_first_seen,
|
|
|
|
|
CAST(NULLIF(NULLIF(thx.ssn_hash_first_seen::text, ''),'None') AS DATE) AS ssn_hash_first_seen,
|
|
|
|
|
thx.account_email_attributes,
|
|
|
|
|
CAST(NULLIF(NULLIF(LOWER(TRIM(thx.tps_ip_latitude::text)), ''), 'none') AS DOUBLE PRECISION) AS tps_ip_latitude,
|
|
|
|
|
CAST(NULLIF(NULLIF(LOWER(TRIM(thx.tps_ip_longitude::text)), ''), 'none') AS DOUBLE PRECISION) AS tps_ip_longitude,
|
|
|
|
|
CAST(NULLIF(NULLIF(thx.account_telephone_first_seen::text, ''),'None') AS DATE) AS account_telephone_first_seen,
|
|
|
|
|
CAST(NULLIF(NULLIF(thx.account_login_first_seen::text, ''),'None') AS DATE) AS account_login_first_seen,
|
|
|
|
|
CAST(NULLIF(NULLIF(LOWER(TRIM(ref.latitute_ref::text)), ''), 'none') AS DOUBLE PRECISION) AS latitute_ref,
|
|
|
|
|
CAST(NULLIF(NULLIF(LOWER(TRIM(ref.longitude_ref::text)), ''), 'none') AS DOUBLE PRECISION) AS longitude_ref,
|
|
|
|
|
CAST(NULLIF(NULLIF(LOWER(TRIM(scores.hd_score_m2::text)), ''), 'none') AS DOUBLE PRECISION) AS hd_score_m2
|
|
|
|
|
-- CAST(NULLIF(NULLIF(LOWER(TRIM(scores.hd_score_iso_m2::text)), ''), 'none') AS DOUBLE PRECISION) AS hd_score_iso_m2
|
2025-03-12 16:18:58 +00:00
|
|
|
FROM public.applications AS app
|
|
|
|
|
LEFT JOIN public.bridge_funding_lifecycle AS master
|
|
|
|
|
ON app.application_key = master.application_key
|
|
|
|
|
LEFT JOIN public.originated_loan AS loans
|
|
|
|
|
ON master.originated_loan_key = loans.originated_loan_key
|
2025-11-26 12:51:28 -05:00
|
|
|
LEFT JOIN public.thx AS thx
|
|
|
|
|
ON app.application_key = thx.application_key
|
|
|
|
|
LEFT JOIN public.uprova AS up
|
|
|
|
|
ON app.application_key = up.application_key
|
|
|
|
|
LEFT JOIN public.latitute_longitute_reference AS ref
|
|
|
|
|
ON
|
|
|
|
|
/* safe ZIP join: strip non-digits, take first 5, cast to int on both sides */
|
|
|
|
|
CAST(SUBSTRING(REGEXP_REPLACE(up.zip::text, '[^0-9]', '', 'g') FOR 5) AS INTEGER)
|
|
|
|
|
=
|
|
|
|
|
CAST(SUBSTRING(REGEXP_REPLACE(ref.postal_code_ref::text, '[^0-9]', '', 'g') FOR 5) AS INTEGER)
|
|
|
|
|
LEFT JOIN public.scores AS scores
|
|
|
|
|
ON app.application_key = scores.application_key
|
2025-05-05 10:39:44 +00:00
|
|
|
WHERE app.application_key = $application_key
|
2025-11-26 12:51:28 -05:00
|
|
|
OR app.application_key = ANY(ARRAY$connected_application_keys::text[])
|
2025-05-05 10:39:44 +00:00
|
|
|
ORDER BY app.application_key, app.application_timestamp DESC
|
2025-03-12 16:18:58 +00:00
|
|
|
),
|
|
|
|
|
|
|
|
|
|
current_app AS (
|
|
|
|
|
SELECT
|
|
|
|
|
application_key AS current_app_key,
|
|
|
|
|
application_timestamp AS current_app_created_at,
|
2025-11-23 23:47:00 -05:00
|
|
|
$hd_score_m1 AS hd_score_m1,
|
2025-11-26 12:51:28 -05:00
|
|
|
$hd_score_iso_m2 AS hd_score_iso_m2,
|
|
|
|
|
hd_score_m2
|
2025-03-12 16:18:58 +00:00
|
|
|
FROM abc
|
2025-11-26 12:51:28 -05:00
|
|
|
WHERE application_key = $application_key
|
2025-03-12 16:18:58 +00:00
|
|
|
),
|
|
|
|
|
|
|
|
|
|
connected_apps AS (
|
|
|
|
|
SELECT
|
|
|
|
|
a.*,
|
|
|
|
|
a.application_key AS connected_app_key
|
|
|
|
|
FROM abc a
|
|
|
|
|
WHERE application_key = ANY(ARRAY$connected_application_keys::text[])
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
clusters_alt AS (
|
|
|
|
|
SELECT
|
|
|
|
|
a.*,
|
|
|
|
|
b.current_app_key,
|
|
|
|
|
b.current_app_created_at,
|
|
|
|
|
b.hd_score_m1,
|
2025-11-23 23:47:00 -05:00
|
|
|
b.hd_score_iso_m2,
|
2025-03-12 16:18:58 +00:00
|
|
|
CASE
|
|
|
|
|
WHEN LOWER(CAST(a.originated_loan_is_fraud AS TEXT)) != 'true'
|
|
|
|
|
AND a.originated_loan_is_never_paid IS NULL THEN NULL
|
|
|
|
|
WHEN ABS(EXTRACT(DAY FROM AGE(b.current_app_created_at, a.application_timestamp))) > 29
|
|
|
|
|
AND (LOWER(CAST(a.originated_loan_is_fraud AS TEXT)) = 'true'
|
|
|
|
|
OR LOWER(CAST(a.originated_loan_is_never_paid AS TEXT)) = 'true')
|
|
|
|
|
THEN 1
|
|
|
|
|
ELSE 0
|
|
|
|
|
END AS target_connected_30,
|
2025-11-26 12:51:28 -05:00
|
|
|
CONCAT(a.application_ssn, '_', a.application_bank_account_number) AS user_connected_v2,
|
2025-03-12 16:18:58 +00:00
|
|
|
a.application_timestamp AS connected_date,
|
|
|
|
|
a.application_email_address AS connected_email,
|
|
|
|
|
a.application_is_rejected AS application_is_rejected_connected
|
|
|
|
|
FROM connected_apps a
|
|
|
|
|
CROSS JOIN current_app b
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
full_apps_past AS (
|
|
|
|
|
SELECT
|
|
|
|
|
current_app_key,
|
|
|
|
|
SUM(target_connected_30) AS target_connected_30_sum,
|
|
|
|
|
COUNT(DISTINCT CASE
|
|
|
|
|
WHEN LOWER(CAST(application_is_rejected_connected AS TEXT)) = 'true'
|
|
|
|
|
THEN connected_app_key
|
|
|
|
|
ELSE NULL
|
|
|
|
|
END) AS rejected_app_count
|
|
|
|
|
FROM clusters_alt
|
|
|
|
|
WHERE current_app_key != connected_app_key
|
|
|
|
|
GROUP BY current_app_key
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
clusters AS (
|
|
|
|
|
SELECT
|
|
|
|
|
a.current_app_key AS application_key,
|
|
|
|
|
CAST(MAX(hd_score_m1) AS FLOAT) AS hd_score_m1,
|
2025-11-26 12:51:28 -05:00
|
|
|
CAST(MAX(hd_score_iso_m2) AS FLOAT) as hd_score_iso_m2,
|
2025-03-12 16:18:58 +00:00
|
|
|
COUNT(DISTINCT a.user_connected_v2) AS cluster_size_users_v2,
|
|
|
|
|
MAX(b.target_connected_30_sum) AS target_connected_30_sum,
|
|
|
|
|
COUNT(DISTINCT a.connected_email) AS email_cnt,
|
|
|
|
|
MAX(b.rejected_app_count) AS rejected_app_count,
|
|
|
|
|
COUNT(DISTINCT EXTRACT(DAY FROM DATE(a.connected_date))) AS app_dt_day_cnt,
|
|
|
|
|
COUNT(*) AS cluster_size
|
|
|
|
|
FROM clusters_alt a
|
|
|
|
|
LEFT JOIN full_apps_past b
|
|
|
|
|
ON a.current_app_key = b.current_app_key
|
|
|
|
|
GROUP BY 1
|
2025-11-26 12:51:28 -05:00
|
|
|
),
|
|
|
|
|
|
|
|
|
|
clusters_alt_g2 AS (
|
|
|
|
|
SELECT
|
|
|
|
|
b.current_app_key,
|
|
|
|
|
a.connected_app_key,
|
|
|
|
|
a.application_ssn,
|
|
|
|
|
a.application_bank_account_number,
|
|
|
|
|
a.application_is_rejected,
|
|
|
|
|
a.hd_score_m2 AS hd_score_m2_connected,
|
|
|
|
|
CASE
|
|
|
|
|
WHEN a.application_timestamp IS NULL OR a.application_date_of_birth IS NULL THEN NULL
|
|
|
|
|
ELSE DATE_PART('year', AGE(a.application_timestamp, a.application_date_of_birth))
|
|
|
|
|
END AS applicant_age_connected,
|
|
|
|
|
CASE
|
|
|
|
|
WHEN a.application_timestamp IS NULL OR a.account_telephone_first_seen IS NULL THEN NULL
|
|
|
|
|
ELSE (a.application_timestamp::date - a.account_telephone_first_seen::date)
|
|
|
|
|
END AS account_telephone_first_seen_age_connected,
|
|
|
|
|
CASE
|
|
|
|
|
WHEN a.application_timestamp IS NULL OR a.ssn_hash_first_seen IS NULL THEN NULL
|
|
|
|
|
ELSE (a.application_timestamp::date - a.ssn_hash_first_seen::date)
|
|
|
|
|
END AS ssn_hash_first_seen_age_connected,
|
|
|
|
|
CASE
|
|
|
|
|
WHEN a.application_timestamp IS NULL OR a.account_login_first_seen IS NULL THEN NULL
|
|
|
|
|
ELSE (a.application_timestamp::date - a.account_login_first_seen::date)
|
|
|
|
|
END AS account_login_first_seen_age_connected,
|
|
|
|
|
CASE
|
|
|
|
|
WHEN a.application_timestamp IS NULL OR a.digital_id_first_seen IS NULL THEN NULL
|
|
|
|
|
ELSE (a.application_timestamp::date - a.digital_id_first_seen::date)
|
|
|
|
|
END AS digital_id_first_seen_age_connected,
|
|
|
|
|
CASE
|
|
|
|
|
WHEN a.application_timestamp IS NULL OR a.true_ip_first_seen IS NULL THEN NULL
|
|
|
|
|
ELSE (a.application_timestamp::date - a.true_ip_first_seen::date)
|
|
|
|
|
END AS true_ip_first_seen_age_connected,
|
|
|
|
|
CASE
|
|
|
|
|
WHEN a.latitute_ref IS NULL OR a.longitude_ref IS NULL
|
|
|
|
|
OR a.tps_ip_latitude IS NULL OR a.tps_ip_longitude IS NULL
|
|
|
|
|
THEN NULL
|
|
|
|
|
ELSE 2 * 6371.0088 * ASIN(
|
|
|
|
|
LEAST(
|
|
|
|
|
1.0,
|
|
|
|
|
SQRT(
|
|
|
|
|
POWER(SIN(((a.tps_ip_latitude - a.latitute_ref) * 0.017453292519943295) / 2), 2)
|
|
|
|
|
+ COS(a.latitute_ref * 0.017453292519943295) * COS(a.tps_ip_latitude * 0.017453292519943295)
|
|
|
|
|
* POWER(SIN(((a.tps_ip_longitude - a.longitude_ref) * 0.017453292519943295) / 2), 2)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
END AS dist_em_ip_ref_km_connected,
|
|
|
|
|
CASE
|
|
|
|
|
WHEN LOWER(a.account_email_attributes) ILIKE '%challenged%' THEN 1
|
|
|
|
|
ELSE 0
|
|
|
|
|
END AS account_email_attributes_challenged_connected,
|
|
|
|
|
a.summary_risk_score AS summary_risk_score_connected,
|
|
|
|
|
a.cpu_clock AS cpu_clock_connected,
|
|
|
|
|
CONCAT(a.application_ssn, '_', a.application_bank_account_number) AS connected_user_id
|
|
|
|
|
FROM connected_apps a
|
|
|
|
|
CROSS JOIN current_app b
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
full_apps_past_g2 AS (
|
|
|
|
|
SELECT
|
|
|
|
|
current_app_key,
|
|
|
|
|
COUNT(DISTINCT connected_user_id) AS connected_user_count,
|
|
|
|
|
COUNT(DISTINCT CASE
|
|
|
|
|
WHEN LOWER(CAST(application_is_rejected AS TEXT)) = 'true'
|
|
|
|
|
THEN connected_user_id
|
|
|
|
|
END) AS rejected_app_count_g2,
|
|
|
|
|
MAX(hd_score_m2_connected) AS hd_score_m2_connected_max,
|
|
|
|
|
AVG(hd_score_m2_connected) AS hd_score_m2_connected_avg,
|
|
|
|
|
MAX(applicant_age_connected) AS applicant_age_connected_max,
|
|
|
|
|
AVG(applicant_age_connected) AS applicant_age_connected_avg,
|
|
|
|
|
MIN(account_telephone_first_seen_age_connected) AS account_tel_first_seen_min_conn,
|
|
|
|
|
MAX(account_telephone_first_seen_age_connected) AS account_tel_first_seen_max_conn,
|
|
|
|
|
AVG(account_telephone_first_seen_age_connected) AS account_tel_first_seen_avg_conn,
|
|
|
|
|
MIN(cpu_clock_connected) AS cpu_clock_min_conn,
|
|
|
|
|
MAX(cpu_clock_connected) AS cpu_clock_max_conn,
|
|
|
|
|
MIN(ssn_hash_first_seen_age_connected) AS ssn_hash_first_seen_min_conn,
|
|
|
|
|
AVG(ssn_hash_first_seen_age_connected) AS ssn_hash_first_seen_avg_conn,
|
|
|
|
|
MIN(account_login_first_seen_age_connected) AS account_login_first_seen_min_conn,
|
|
|
|
|
MAX(account_login_first_seen_age_connected) AS account_login_first_seen_max_conn,
|
|
|
|
|
STDDEV_SAMP(account_login_first_seen_age_connected) AS account_login_first_seen_stddev_conn,
|
|
|
|
|
MAX(digital_id_first_seen_age_connected) AS digital_id_first_seen_max_conn,
|
|
|
|
|
MIN(true_ip_first_seen_age_connected) AS true_ip_first_seen_min_conn,
|
|
|
|
|
MAX(true_ip_first_seen_age_connected) AS true_ip_first_seen_max_conn,
|
|
|
|
|
MIN(dist_em_ip_ref_km_connected) AS dist_em_ip_ref_km_min_conn,
|
|
|
|
|
MAX(summary_risk_score_connected) AS summary_risk_score_max_conn,
|
|
|
|
|
COUNT(DISTINCT CASE
|
|
|
|
|
WHEN account_email_attributes_challenged_connected = 1 THEN connected_user_id
|
|
|
|
|
END) AS acc_email_attr_challenged_1_conn
|
|
|
|
|
FROM clusters_alt_g2
|
|
|
|
|
WHERE current_app_key != connected_app_key
|
|
|
|
|
GROUP BY current_app_key
|
|
|
|
|
),
|
|
|
|
|
|
|
|
|
|
clusters_g2 AS (
|
|
|
|
|
SELECT
|
|
|
|
|
cur.current_app_key AS application_key,
|
|
|
|
|
cur.hd_score_m2 AS hd_score_m2,
|
|
|
|
|
agg.hd_score_m2_connected_avg,
|
|
|
|
|
agg.hd_score_m2_connected_max,
|
|
|
|
|
agg.applicant_age_connected_max,
|
|
|
|
|
agg.applicant_age_connected_avg,
|
|
|
|
|
agg.account_tel_first_seen_min_conn,
|
|
|
|
|
agg.account_tel_first_seen_max_conn,
|
|
|
|
|
agg.account_tel_first_seen_avg_conn,
|
|
|
|
|
agg.ssn_hash_first_seen_min_conn,
|
|
|
|
|
agg.ssn_hash_first_seen_avg_conn,
|
|
|
|
|
agg.account_login_first_seen_min_conn,
|
|
|
|
|
agg.account_login_first_seen_stddev_conn,
|
|
|
|
|
(agg.account_login_first_seen_max_conn - agg.account_login_first_seen_min_conn) AS account_login_first_seen_range_conn,
|
|
|
|
|
agg.digital_id_first_seen_max_conn,
|
|
|
|
|
agg.true_ip_first_seen_min_conn,
|
|
|
|
|
agg.true_ip_first_seen_max_conn,
|
|
|
|
|
(agg.cpu_clock_max_conn - agg.cpu_clock_min_conn) AS cpu_clock_range_conn,
|
|
|
|
|
agg.summary_risk_score_max_conn,
|
|
|
|
|
CASE
|
|
|
|
|
WHEN agg.connected_user_count IS NULL OR agg.connected_user_count = 0 THEN NULL
|
|
|
|
|
ELSE (agg.acc_email_attr_challenged_1_conn::DOUBLE PRECISION / agg.connected_user_count::DOUBLE PRECISION) * 100
|
|
|
|
|
END AS pct_acc_email_attr_challenged_1_conn,
|
|
|
|
|
agg.rejected_app_count_g2,
|
|
|
|
|
agg.dist_em_ip_ref_km_min_conn
|
|
|
|
|
FROM current_app cur
|
|
|
|
|
LEFT JOIN full_apps_past_g2 agg
|
|
|
|
|
ON cur.current_app_key = agg.current_app_key
|
2025-03-12 16:18:58 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
SELECT
|
2025-11-26 12:51:28 -05:00
|
|
|
COALESCE(MAX(c.hd_score_m1), $hd_score_m1) AS hd_score_m1,
|
|
|
|
|
COALESCE(MAX(c.hd_score_iso_m2), $hd_score_iso_m2) AS hd_score_iso_m2,
|
|
|
|
|
COALESCE(MAX(c.cluster_size_users_v2), 0) AS cluster_size_users_v2,
|
|
|
|
|
COALESCE(MAX(c.target_connected_30_sum), 0) AS target_connected_30_sum,
|
|
|
|
|
COALESCE(MAX(c.email_cnt), 0) AS email_cnt,
|
|
|
|
|
COALESCE(MAX(c.rejected_app_count), 0) AS rejected_app_count,
|
|
|
|
|
COALESCE(MAX(c.app_dt_day_cnt), 0) AS app_dt_day_cnt,
|
|
|
|
|
COALESCE(MAX(c.cluster_size), 0) AS cluster_size,
|
|
|
|
|
MAX(g2.hd_score_m2) AS hd_score_m2,
|
|
|
|
|
MAX(g2.hd_score_m2_connected_avg) AS hd_score_m2_connected_avg,
|
|
|
|
|
MAX(g2.rejected_app_count_g2) AS rejected_app_count_g2,
|
|
|
|
|
MAX(g2.summary_risk_score_max_conn) AS summary_risk_score_max_conn,
|
|
|
|
|
MAX(g2.cpu_clock_range_conn) AS cpu_clock_range_conn,
|
|
|
|
|
MAX(g2.ssn_hash_first_seen_avg_conn) AS ssn_hash_first_seen_avg_conn,
|
|
|
|
|
MAX(g2.account_tel_first_seen_avg_conn) AS account_tel_first_seen_avg_conn,
|
|
|
|
|
MAX(g2.account_tel_first_seen_min_conn) AS account_tel_first_seen_min_conn,
|
|
|
|
|
MAX(g2.account_login_first_seen_stddev_conn) AS account_login_first_seen_stddev_conn,
|
|
|
|
|
MAX(g2.account_login_first_seen_min_conn) AS account_login_first_seen_min_conn,
|
|
|
|
|
MAX(g2.ssn_hash_first_seen_min_conn) AS ssn_hash_first_seen_min_conn,
|
|
|
|
|
MAX(g2.account_tel_first_seen_max_conn) AS account_tel_first_seen_max_conn,
|
|
|
|
|
MAX(g2.applicant_age_connected_max) AS applicant_age_connected_max,
|
|
|
|
|
MAX(g2.applicant_age_connected_avg) AS applicant_age_connected_avg,
|
|
|
|
|
MAX(g2.digital_id_first_seen_max_conn) AS digital_id_first_seen_max_conn,
|
|
|
|
|
MAX(g2.account_login_first_seen_range_conn) AS account_login_first_seen_range_conn,
|
|
|
|
|
MAX(g2.true_ip_first_seen_min_conn) AS true_ip_first_seen_min_conn,
|
|
|
|
|
MAX(g2.true_ip_first_seen_max_conn) AS true_ip_first_seen_max_conn,
|
|
|
|
|
MAX(g2.hd_score_m2_connected_max) AS hd_score_m2_connected_max,
|
|
|
|
|
MAX(g2.pct_acc_email_attr_challenged_1_conn) AS pct_acc_email_attr_challenged_1_conn,
|
|
|
|
|
MAX(g2.dist_em_ip_ref_km_min_conn) AS dist_em_ip_ref_km_min_conn
|
|
|
|
|
FROM clusters c
|
|
|
|
|
LEFT JOIN clusters_g2 g2
|
|
|
|
|
ON c.application_key = g2.application_key;
|