diff --git a/block.py b/block.py index d56597f..e92617b 100644 --- a/block.py +++ b/block.py @@ -1,4 +1,6 @@ import logging +import json +import os from pre_processing import process_record from processing import processing from post_processing import post_processing, post_processing_duplicate @@ -10,12 +12,23 @@ logging.basicConfig( ) logger = logging.getLogger(__name__) -# API URLs -# base_url = "http://localhost:8080/api/v1/clusters" -base_url = "http://centurion-mlg.default.svc.cluster.local:8080/api/v1/clusters" -cluster_name = "cluster_deviceid_email_fuzzydevice_direct_new" -url_post = f"{base_url}/{cluster_name}/records" -url_get = f"{base_url}/{cluster_name}/record" +def load_schema(path: str): + """Helper to read the JSON config file.""" + with open(path, "r") as f: + return json.load(f) + +def get_cluster_name(namespace: str) -> str: + """ + Fetches the clusterName for the given namespace from config.json. + Expects config.json to contain a list of { "namespace": "...", "clusterName": "..." }. + """ + response_schema = load_schema("/app/config.json") # Update path if needed + for item in response_schema: + if item.get("namespace") == namespace: + logger.info("Got the clusterName for namespace '%s'", namespace) + return item.get("clusterName") + logger.error("Provided Namespace '%s' not found in config.json.", namespace) + raise ValueError(f"Namespace '{namespace}' not found") def __main__( application_key: str, @@ -26,6 +39,12 @@ def __main__( hd_score_m1: float ) -> dict: + namespace = os.getenv("NAMESPACE", "staging") + base_url = "http://centurion-mlg.default.svc.cluster.local:8080/api/v1/clusters" + cluster_name = get_cluster_name(namespace) + url_post = f"{base_url}/{cluster_name}/records" + url_get = f"{base_url}/{cluster_name}/record" + data = { "application_key": application_key, "application_timestamp": application_timestamp, diff --git a/config.json b/config.json new file mode 100644 index 0000000..2f4e5c2 --- /dev/null +++ b/config.json @@ -0,0 +1,10 @@ +[ + { + "namespace": "staging", + "clusterName": "cluster_deviceid_email_fuzzydevice_direct_new" + }, + { + "namespace": "production", + "clusterName": "cluster_deviceid_email_fuzzydevice_direct_new_prod" + } +] \ No newline at end of file diff --git a/pre_processing.py b/pre_processing.py index 925a025..2cf6541 100644 --- a/pre_processing.py +++ b/pre_processing.py @@ -20,6 +20,21 @@ def process_record(url, record): output = response.json() logger.info(f"Pre Processed record: {record['application_key']} - Response: {output}") return output + elif response.status_code == 404: + # Special-case handling for "CLUSTER_NOT_FOUND" (or any 404). + # Return a benign structure that includes 'hd_key' and 'matches' + # so post_processing does not fail when it does: + # record_id = data.get('hd_key') + # connected_records = data.get('matches', []) + [record_id] + logger.warning( + f"Ignoring 404 for record {record['application_key']}. " + f"Status: 404, Response: {response.text}" + ) + return { + "application_key": record.get("application_key"), + "hd_key": None, + "matches": [] + } else: logger.error(f"Failed to process record {record['application_key']}. Status: {response.status_code}, Response: {response.text}") return {"error": response.text}