blocks-transformer/post_processing.py

77 lines
2.8 KiB
Python
Raw Permalink Normal View History

2025-03-12 16:14:28 +00:00
import requests
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
)
logger = logging.getLogger(__name__)
def fetch_data(url, record_id):
"""Fetches data for a given record_id from the API."""
try:
response = requests.get(f"{url}/{record_id}", headers={"Content-Type": "application/json"})
if response.status_code == 200:
output= response.json()
return output
else:
logger.error(f"Failed to fetch {record_id}: {response.status_code}")
return None
except requests.RequestException as e:
logger.error(f"Error fetching {record_id}: {e}")
return None
2025-07-11 15:10:24 +00:00
def fetch_all_nodes_data(url, cluster_id):
try:
response = requests.get(f"{url}/{cluster_id}", headers={"Content-Type": "application/json"})
if response.status_code == 200:
output= response.json()
return output
else:
logger.error(f"Failed to fetch {cluster_id}: {response.status_code}")
return None
except requests.RequestException as e:
logger.error(f"Error fetching {cluster_id}: {e}")
return None
2025-03-12 16:14:28 +00:00
2025-07-11 15:10:24 +00:00
def post_processing(data, url, url_get_for_graph):
2025-03-12 16:14:28 +00:00
"""Main function to fetch and extract required fields."""
record_id = data.get('hd_key')
2025-07-11 15:10:24 +00:00
# connected_records = data.get('matches', []) + [record_id] # Matches + hd_key
cluster_id = data.get('cluster_id')
2025-03-12 16:14:28 +00:00
# Fetch main record data
application_key = None
hd_score_m1 = None
2025-07-11 15:10:24 +00:00
if record_id:
record_data = fetch_data(url, record_id)
if record_data and 'data' in record_data:
application_key = record_data['data'].get('application_key')
hd_score_m1 = record_data['data'].get('hd_score_m1')
2025-03-12 16:14:28 +00:00
# Fetch application_key for connected records
2025-07-11 15:10:24 +00:00
connected_keys = []
if cluster_id:
response_obj = fetch_all_nodes_data(url_get_for_graph, cluster_id)
if response_obj and "nodes" in response_obj:
# Extract and filter application_keys
all_keys = [
node.get('data', {}).get('application_key')
for node in response_obj['nodes']
if node.get('data') and node['data'].get('application_key') is not None
]
# Exclude the current application_key and limit to 500 entries
connected_keys = [
key for key in all_keys if key != application_key
][:500]
if application_key is not None and application_key not in connected_keys:
connected_keys.append(application_key)
2025-03-12 16:14:28 +00:00
return {
"application_key": application_key,
"hd_score_m1": hd_score_m1,
"connected_application_keys": connected_keys
}