From 88b296a25a46e5ce541b106c4eef2dfd4caebafe Mon Sep 17 00:00:00 2001 From: admin user Date: Fri, 11 Jul 2025 15:10:24 +0000 Subject: [PATCH] Upload files to "/" --- block.py | 18 +++++------ post_processing.py | 75 ++++++++++++++++++++++------------------------ 2 files changed, 45 insertions(+), 48 deletions(-) diff --git a/block.py b/block.py index e92617b..151fe81 100644 --- a/block.py +++ b/block.py @@ -2,8 +2,7 @@ import logging import json import os from pre_processing import process_record -from processing import processing -from post_processing import post_processing, post_processing_duplicate +from post_processing import post_processing # Configure logging logging.basicConfig( @@ -44,6 +43,7 @@ def __main__( cluster_name = get_cluster_name(namespace) url_post = f"{base_url}/{cluster_name}/records" url_get = f"{base_url}/{cluster_name}/record" + url_get_for_graph = f"{base_url}/{cluster_name}/graph" data = { "application_key": application_key, @@ -65,15 +65,15 @@ def __main__( # post_process_output = post_processing(process_output, url_get) # is_duplicate = pre_process_output.get("record_id") in pre_process_output.get("connected_records", []) is_duplicate = str(pre_process_output.get("is_duplicate", "false")).strip().lower() == "true" - print(is_duplicate) + # print(is_duplicate) # Run the appropriate processing function - if is_duplicate: - process_output = processing(url_get, pre_process_output) - post_process_output = post_processing_duplicate(process_output, url_get) - else: - post_process_output = post_processing(pre_process_output, url_get) - + # if is_duplicate: + # process_output = processing(url_get, pre_process_output) + # post_process_output = post_processing_duplicate(process_output, url_get) + # else: + # post_process_output = post_processing(pre_process_output, url_get) + post_process_output = post_processing(pre_process_output, url_get, url_get_for_graph) # Conditionally override the keys if they are missing or None (or the string "null") current_app_key = post_process_output.get("application_key") if current_app_key is None: diff --git a/post_processing.py b/post_processing.py index 59fc1df..6f354a0 100644 --- a/post_processing.py +++ b/post_processing.py @@ -21,54 +21,51 @@ def fetch_data(url, record_id): except requests.RequestException as e: logger.error(f"Error fetching {record_id}: {e}") return None + +def fetch_all_nodes_data(url, cluster_id): + try: + response = requests.get(f"{url}/{cluster_id}", headers={"Content-Type": "application/json"}) + if response.status_code == 200: + output= response.json() + return output + else: + logger.error(f"Failed to fetch {cluster_id}: {response.status_code}") + return None + except requests.RequestException as e: + logger.error(f"Error fetching {cluster_id}: {e}") + return None -def post_processing_duplicate(data, url): - """Main function to fetch and extract required fields.""" - record_id = data.get('record_id') - connected_records = data.get('connected_records', []) - - # Fetch main record data - record_data = fetch_data(url, record_id) - application_key = None - hd_score_m1 = None - if record_data: - application_key = record_data['data'].get('application_key') - hd_score_m1 = record_data['data'].get('hd_score_m1') - - # Fetch application_key for connected records - connected_keys = [ - fetch_data(url, rec_id)['data'].get('application_key') - for rec_id in connected_records - if fetch_data(url, rec_id) and fetch_data(url, rec_id)['data'].get('application_key') - ] - - return { - "application_key": application_key, - "hd_score_m1": hd_score_m1, - "connected_application_keys": connected_keys - } - - - -def post_processing(data, url): +def post_processing(data, url, url_get_for_graph): """Main function to fetch and extract required fields.""" record_id = data.get('hd_key') - connected_records = data.get('matches', []) + [record_id] # Matches + hd_key + # connected_records = data.get('matches', []) + [record_id] # Matches + hd_key + cluster_id = data.get('cluster_id') # Fetch main record data - record_data = fetch_data(url, record_id) application_key = None hd_score_m1 = None - if record_data: - application_key = record_data['data'].get('application_key') - hd_score_m1 = record_data['data'].get('hd_score_m1') + if record_id: + record_data = fetch_data(url, record_id) + if record_data and 'data' in record_data: + application_key = record_data['data'].get('application_key') + hd_score_m1 = record_data['data'].get('hd_score_m1') # Fetch application_key for connected records - connected_keys = [ - fetch_data(url, rec_id)['data'].get('application_key') - for rec_id in connected_records - if fetch_data(url, rec_id) and fetch_data(url, rec_id)['data'].get('application_key') - ] + connected_keys = [] + if cluster_id: + response_obj = fetch_all_nodes_data(url_get_for_graph, cluster_id) + if response_obj and "nodes" in response_obj: + # Extract and filter application_keys + all_keys = [ + node.get('data', {}).get('application_key') + for node in response_obj['nodes'] + if node.get('data') and node['data'].get('application_key') is not None + ] + # Exclude the current application_key and limit to 500 entries + connected_keys = [ + key for key in all_keys if key != application_key + ][:500] + return { "application_key": application_key,