From 88b296a25a46e5ce541b106c4eef2dfd4caebafe Mon Sep 17 00:00:00 2001
From: admin user <habemco_admin_user@centurion.com>
Date: Fri, 11 Jul 2025 15:10:24 +0000
Subject: [PATCH] Upload files to "/"

---
 block.py           | 18 +++++------
 post_processing.py | 75 ++++++++++++++++++++++------------------------
 2 files changed, 45 insertions(+), 48 deletions(-)

diff --git a/block.py b/block.py
index e92617b..151fe81 100644
--- a/block.py
+++ b/block.py
@@ -2,8 +2,7 @@ import logging
 import json
 import os
 from pre_processing import process_record
-from processing import processing
-from post_processing import post_processing, post_processing_duplicate
+from post_processing import post_processing
 
 # Configure logging
 logging.basicConfig(
@@ -44,6 +43,7 @@ def __main__(
     cluster_name = get_cluster_name(namespace)
     url_post = f"{base_url}/{cluster_name}/records"
     url_get  = f"{base_url}/{cluster_name}/record"
+    url_get_for_graph = f"{base_url}/{cluster_name}/graph"
 
     data = {
         "application_key": application_key,
@@ -65,15 +65,15 @@ def __main__(
     # post_process_output = post_processing(process_output, url_get)
     # is_duplicate = pre_process_output.get("record_id") in pre_process_output.get("connected_records", [])
     is_duplicate = str(pre_process_output.get("is_duplicate", "false")).strip().lower() == "true"
-    print(is_duplicate)
+    # print(is_duplicate)
 
     # Run the appropriate processing function
-    if is_duplicate:
-        process_output = processing(url_get, pre_process_output)
-        post_process_output = post_processing_duplicate(process_output, url_get)
-    else:
-        post_process_output = post_processing(pre_process_output, url_get)
-
+    # if is_duplicate:
+    #     process_output = processing(url_get, pre_process_output)
+    #     post_process_output = post_processing_duplicate(process_output, url_get)
+    # else:
+    #     post_process_output = post_processing(pre_process_output, url_get)
+    post_process_output = post_processing(pre_process_output, url_get, url_get_for_graph)
     # Conditionally override the keys if they are missing or None (or the string "null")
     current_app_key = post_process_output.get("application_key")
     if current_app_key is None:
diff --git a/post_processing.py b/post_processing.py
index 59fc1df..6f354a0 100644
--- a/post_processing.py
+++ b/post_processing.py
@@ -21,54 +21,51 @@ def fetch_data(url, record_id):
     except requests.RequestException as e:
         logger.error(f"Error fetching {record_id}: {e}")
         return None
+    
+def fetch_all_nodes_data(url, cluster_id):
+    try:
+        response = requests.get(f"{url}/{cluster_id}", headers={"Content-Type": "application/json"})
+        if response.status_code == 200:
+            output= response.json()  
+            return output  
+        else:
+            logger.error(f"Failed to fetch {cluster_id}: {response.status_code}")
+            return None
+    except requests.RequestException as e:
+        logger.error(f"Error fetching {cluster_id}: {e}")
+        return None
 
-def post_processing_duplicate(data, url):
-    """Main function to fetch and extract required fields."""
-    record_id = data.get('record_id')
-    connected_records = data.get('connected_records', [])
-
-    # Fetch main record data
-    record_data = fetch_data(url, record_id)
-    application_key = None
-    hd_score_m1 = None
-    if record_data:
-        application_key = record_data['data'].get('application_key')
-        hd_score_m1 = record_data['data'].get('hd_score_m1')
-
-    # Fetch application_key for connected records
-    connected_keys = [
-        fetch_data(url, rec_id)['data'].get('application_key')
-        for rec_id in connected_records
-        if fetch_data(url, rec_id) and fetch_data(url, rec_id)['data'].get('application_key')
-    ]
-
-    return {
-        "application_key": application_key,
-        "hd_score_m1": hd_score_m1,
-        "connected_application_keys": connected_keys
-    }
-
-
-
-def post_processing(data, url):
+def post_processing(data, url, url_get_for_graph):
     """Main function to fetch and extract required fields."""
     record_id = data.get('hd_key')
-    connected_records = data.get('matches', []) + [record_id]  # Matches + hd_key
+    # connected_records = data.get('matches', []) + [record_id]  # Matches + hd_key
+    cluster_id = data.get('cluster_id')
 
     # Fetch main record data
-    record_data = fetch_data(url, record_id)
     application_key = None
     hd_score_m1 = None
-    if record_data:
-        application_key = record_data['data'].get('application_key')
-        hd_score_m1 = record_data['data'].get('hd_score_m1')
+    if record_id:
+        record_data = fetch_data(url, record_id)
+        if record_data and 'data' in record_data:
+            application_key = record_data['data'].get('application_key')
+            hd_score_m1 = record_data['data'].get('hd_score_m1')
 
     # Fetch application_key for connected records
-    connected_keys = [
-        fetch_data(url, rec_id)['data'].get('application_key')
-        for rec_id in connected_records
-        if fetch_data(url, rec_id) and fetch_data(url, rec_id)['data'].get('application_key')
-    ]
+    connected_keys = []
+    if cluster_id:
+        response_obj = fetch_all_nodes_data(url_get_for_graph, cluster_id)
+        if response_obj and "nodes" in response_obj:
+            # Extract and filter application_keys
+            all_keys = [
+                node.get('data', {}).get('application_key')
+                for node in response_obj['nodes']
+                if node.get('data') and node['data'].get('application_key') is not None
+            ]
+            # Exclude the current application_key and limit to 500 entries
+            connected_keys = [
+                key for key in all_keys if key != application_key
+            ][:500]
+    
 
     return {
         "application_key": application_key,