From 98620e96c5969d969ebdcc27dfd22e376f3c6191 Mon Sep 17 00:00:00 2001 From: Muneesha Yadla Date: Tue, 18 Nov 2025 09:57:40 -0500 Subject: [PATCH 1/7] Add synthetic date generation to telemetry_chargeback role in FVT repo Generate synthetic data and write results to a json file Push json file to log directory so that during debug file is available for review Controller/ci-framework-data/tests/feature-verification-tests Do not overwrite syth data json if it already exists Using Gemini and Cursor Closes https://issues.redhat.com/browse/OSPRH-23746 --- ci/run_chargeback_tests.yml | 3 +- roles/telemetry_chargeback/defaults/main.yml | 10 ++ .../files/gen_synth_loki_data.py | 153 ++++++++++++++++++ .../files/loki_data_templ.j2 | 16 ++ .../tasks/gen_synth_loki_data.yml | 43 +++++ roles/telemetry_chargeback/tasks/main.yml | 3 + 6 files changed, 227 insertions(+), 1 deletion(-) create mode 100755 roles/telemetry_chargeback/files/gen_synth_loki_data.py create mode 100644 roles/telemetry_chargeback/files/loki_data_templ.j2 create mode 100644 roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml diff --git a/ci/run_chargeback_tests.yml b/ci/run_chargeback_tests.yml index 8bfd9bd09..bfa3a4bbb 100644 --- a/ci/run_chargeback_tests.yml +++ b/ci/run_chargeback_tests.yml @@ -1,12 +1,13 @@ --- - name: "Verify all the applicable projects, endpoints, pods & services for cloudkitty" hosts: "{{ cifmw_target_hook_host | default('localhost') }}" - gather_facts: no + gather_facts: true ignore_errors: true environment: KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" PATH: "{{ cifmw_path }}" vars_files: + - vars/common.yml - vars/osp18_env.yml vars: common_pod_status_str: "Running" diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 64f07b7a1..4ac2ee634 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -1,2 +1,12 @@ --- openstack_cmd: "openstack" + +output_file_local: "{{ role_path }}/files/loki_synth_data.json" +ck_py_script_path: "{{ role_path }}/files/gen_synth_loki_data.py" +ck_data_template_path: "{{ role_path }}/files/loki_data_templ.j2" +ck_days: 30 +ck_step: 300 + +# Output directory for test artifacts +tests_dir: "{{ ansible_env.HOME }}/ci-framework-data/tests" +logs_dir: "/home/zuul/ci-framework-data/tests/feature-verification-tests" diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py new file mode 100755 index 000000000..da07aa69d --- /dev/null +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -0,0 +1,153 @@ +import logging +import argparse +from datetime import datetime, timezone, timedelta +from pathlib import Path +from typing import Union +from jinja2 import Template + +# --- Configure logging with a default level that can be changed --- +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%dT%H:%M:%S+00:00' +) +logger = logging.getLogger() + +def _format_timestamp(epoch_seconds: float) -> str: + """ + Converts an epoch timestamp into a human-readable UTC string. + + Args: + epoch_seconds (float): The timestamp in seconds since the epoch. + + Returns: + str: The formatted datetime string (e.g., "2023-10-26T14:30:00+00:00"). + """ + try: + dt_object = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + return dt_object.isoformat() + except (ValueError, TypeError): + logger.warning(f"Invalid epoch value provided: {epoch_seconds}") + return "INVALID_TIMESTAMP" + +def generate_loki_data( + template_path: Path, + output_path: Path, + start_time: datetime, + end_time: datetime, + time_step_seconds: int +): + """ + Generates synthetic Loki log data by first preparing a data list + and then rendering it with a single template. + + Args: + template_path (Path): Path to the main log template file. + output_path (Path): Path for the generated output JSON file. + start_time (datetime): The start time for data generation. + end_time (datetime): The end time for data generation. + time_step_seconds (int): The duration of each log entry in seconds. + """ + + # --- Step 1: Generate the data structure first --- + logger.info( + f"Generating data from {start_time.strftime('%Y-%m-%d')} to " + f"{end_time.strftime('%Y-%m-%d')} with a {time_step_seconds}s step." + ) + start_epoch = int(start_time.timestamp()) + end_epoch = int(end_time.timestamp()) + logger.debug(f"Time range in epoch seconds: {start_epoch} to {end_epoch}") + + log_data_list = [] # This list will hold all our data points + + # Loop through the time range and generate data points + for current_epoch in range(start_epoch, end_epoch, time_step_seconds): + end_of_step_epoch = current_epoch + time_step_seconds - 1 + + # Prepare replacement values + nanoseconds = int(current_epoch * 1_000_000_000) + start_str = _format_timestamp(current_epoch) + end_str = _format_timestamp(end_of_step_epoch) + + logger.debug(f"Processing epoch: {current_epoch} -> nanoseconds: {nanoseconds}") + + # Create a dictionary for this time step and add it to the list + log_data_list.append({ + "nanoseconds": nanoseconds, + "start_time": start_str, + "end_time": end_str + }) + + logger.info(f"Generated {len(log_data_list)} data points to be rendered.") + + # --- Step 2: Load template and render --- + try: + logger.info(f"Loading main template from: {template_path}") + template_content = template_path.read_text() + template = Template(template_content, trim_blocks=True, lstrip_blocks=True) + + except FileNotFoundError as e: + logger.error(f"Error loading template file: {e}. Aborting.") + raise # Re-raise the exception to be caught in main() + + # --- Render the template in one pass with all the data --- + logger.info("Rendering final output...") + # The template expects a variable named 'log_data' + final_output = template.render(log_data=log_data_list) + + # --- Step 3: Write the final string to the file --- + try: + with output_path.open('w') as f_out: + f_out.write(final_output) + logger.info(f"Successfully generated synthetic data to '{output_path}'") + except IOError as e: + logger.error(f"Failed to write to output file '{output_path}': {e}") + except Exception as e: + logger.error(f"An unexpected error occurred during file write: {e}") + +def main(): + """Main entry point for the script.""" + parser = argparse.ArgumentParser( + description="Generate synthetic Loki log data from a single main template.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + # --- Required File Path Arguments --- + parser.add_argument("-o", "--output", required=True, help="Path to the output file.") + # --- Only one template argument is needed now --- + parser.add_argument("--template", required=True, help="Path to the main log template file (e.g., loki_main.tmpl).") + + # --- Optional Generation Arguments --- + parser.add_argument("--days", type=int, default=30, help="How many days of data to generate, ending today.") + parser.add_argument("--step", type=int, default=300, help="Time step in seconds for each log entry.") + + # --- Optional Utility Arguments --- + parser.add_argument("--debug", action="store_true", help="Enable debug level logging for verbose output.") + + args = parser.parse_args() + + if args.debug: + logger.setLevel(logging.DEBUG) + logger.debug("Debug mode enabled.") + + # Define the time range for data generation + end_time_utc = datetime.now(timezone.utc) + start_time_utc = end_time_utc - timedelta(days=args.days) + logger.debug(f"Time range calculated: {start_time_utc} to {end_time_utc}") + + # Run the generator + try: + generate_loki_data( + template_path=Path(args.template), + output_path=Path(args.output), + start_time=start_time_utc, + end_time=end_time_utc, + time_step_seconds=args.step + ) + except FileNotFoundError: + logger.error("Process aborted because the template file was not found.") + except Exception as e: + logger.critical(f"A critical, unhandled error stopped the script: {e}") + + +if __name__ == "__main__": + main() diff --git a/roles/telemetry_chargeback/files/loki_data_templ.j2 b/roles/telemetry_chargeback/files/loki_data_templ.j2 new file mode 100644 index 000000000..7a676282e --- /dev/null +++ b/roles/telemetry_chargeback/files/loki_data_templ.j2 @@ -0,0 +1,16 @@ +{"streams": [{ "stream": { "service": "cloudkitty" }, "values": [ +{%- for item in log_data %} +[ +"{{ item.nanoseconds }}", +"{\"start\": \"{{ item.start_time }}\", \"end\": \"{{ item.end_time }}\", \"type\": \"ceilometer_image_size\", \"unit\": \"MiB\", \"description\": null, \"qty\": 20.6875, \"price\": 0.0206875, \"groupby\": {\"id\": \"cd65d30f-8b94-4fa3-95dc-e3b429f479b2\", \"project_id\": \"0030775de80e4d84a4fd0d73e0a1b3a7\", \"user_id\": null, \"week_of_the_year\": \"37\", \"day_of_the_year\": \"258\", \"month\": \"9\", \"year\": \"2025\"}, \"metadata\": {\"container_format\": \"bare\", \"disk_format\": \"qcow2\"}}" +], +[ +"{{ item.nanoseconds }}", +"{\"start\": \"{{ item.start_time }}\", \"end\": \"{{ item.end_time }}\", \"type\": \"instance\", \"unit\": \"instance\", \"description\": null, \"qty\": 1.0, \"price\": 0.3, \"groupby\": {\"id\": \"de168c31-ed44-4a1a-a079-51bd238a91d6\", \"project_id\": \"9cf5bcfc61a24682acc448af2d062ad2\", \"user_id\": \"c29ab6e886354bbd88ee9899e62d1d40\", \"week_of_the_year\": \"37\", \"day_of_the_year\": \"258\", \"month\": \"9\", \"year\": \"2025\"}, \"metadata\": {\"flavor_name\": \"m1.tiny\", \"flavor_id\": \"1\", \"vcpus\": \"\"}}" +] +{#- This logic adds a comma after every pair, *except* for the very last one. #} +{%- if not loop.last -%} +, +{%- endif -%} +{%- endfor %} +]}]} diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml new file mode 100644 index 000000000..64e91169b --- /dev/null +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -0,0 +1,43 @@ +- name: Define Synthetic Data Variables + ansible.builtin.set_fact: + output_file_remote: "{{ logs_dir }}/gen_loki_synth_data.log" + +- name: Check for preexisting output file + ansible.builtin.stat: + path: "{{ output_file_local }}" + register: file_preexists + +- name: TEST Generate Synthetic Data + ansible.builtin.command: + cmd: > + python3 "{{ ck_py_script_path }}" + --template "{{ ck_data_template_path }}" + -o "{{ output_file_local }}" + --days "{{ ck_days }}" + --step "{{ ck_step }}" + register: script_output + when: not file_preexists.stat.exists | bool + changed_when: script_output.rc == 0 + +- name: Read the content of the file + ansible.builtin.slurp: + src: "{{ output_file_local }}" + register: slurped_file + +- name: TEST Validate JSON format of syntheticc data file + ansible.builtin.assert: + that: + # This filter will trigger a task failure if the string isn't valid JSON + - slurped_file.content | b64decode | from_json is defined + fail_msg: "The file does not contain valid JSON format." + success_msg: "JSON format validated successfully." + +- name: Print output_file_remote path + ansible.builtin.debug: + msg: "Sythetic data file: {{ output_file_remote }}" + +- name: Copy output file to remote host + ansible.builtin.copy: + src: "{{ output_file_local }}" + dest: "{{ output_file_remote }}" + mode: '0644' diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 969188b71..98a94b233 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -1,3 +1,6 @@ --- - name: "Validate Chargeback Feature" ansible.builtin.include_tasks: "chargeback_tests.yml" + +- name: "Generate Synthetic Data" + ansible.builtin.include_tasks: "gen_synth_loki_data.yml" From 31e85bf78494f826ac5a2209dec5cb4c93af729b Mon Sep 17 00:00:00 2001 From: ayefimov Date: Thu, 15 Jan 2026 15:44:50 -0500 Subject: [PATCH 2/7] Enhance telemetry_chargeback role with improved synthetic data handling Update the synthetic data generation process to ensure that existing JSON files are not overwritten. This change allows for better debugging by preserving previous data outputs. Closes https://issues.redhat.com/browse/OSPRH-23746 --- roles/telemetry_chargeback/defaults/main.yml | 10 +- .../files/gen_synth_loki_data.py | 302 +++++++++++++++--- .../files/loki_data_templ.j2 | 34 +- .../files/test_static.yml | 61 ++++ roles/telemetry_chargeback/meta/main.yml | 2 +- .../tasks/chargeback_tests.yml | 2 +- .../tasks/gen_synth_loki_data.yml | 19 +- 7 files changed, 364 insertions(+), 66 deletions(-) create mode 100644 roles/telemetry_chargeback/files/test_static.yml diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 4ac2ee634..3895bd967 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -1,12 +1,10 @@ --- openstack_cmd: "openstack" -output_file_local: "{{ role_path }}/files/loki_synth_data.json" -ck_py_script_path: "{{ role_path }}/files/gen_synth_loki_data.py" -ck_data_template_path: "{{ role_path }}/files/loki_data_templ.j2" -ck_days: 30 -ck_step: 300 +ck_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" +ck_data_template: "{{ role_path }}/files/loki_data_templ.j2" +ck_data_config: "{{ role_path }}/files/test_static.yml" +ck_output_file_local: "{{ role_path }}/files/loki_synth_data.json" # Output directory for test artifacts -tests_dir: "{{ ansible_env.HOME }}/ci-framework-data/tests" logs_dir: "/home/zuul/ci-framework-data/tests/feature-verification-tests" diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py index da07aa69d..1d8bcaf93 100755 --- a/roles/telemetry_chargeback/files/gen_synth_loki_data.py +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -1,9 +1,13 @@ +"""Generate synthetic Loki log data from a Jinja2 template.""" import logging import argparse +import json +import yaml from datetime import datetime, timezone, timedelta from pathlib import Path -from typing import Union -from jinja2 import Template +from typing import Dict, Any +from jinja2 import Environment + # --- Configure logging with a default level that can be changed --- logging.basicConfig( @@ -13,12 +17,14 @@ ) logger = logging.getLogger() -def _format_timestamp(epoch_seconds: float) -> str: + +def _format_timestamp(epoch_seconds: float, invalid_timestamp: str) -> str: """ - Converts an epoch timestamp into a human-readable UTC string. + Convert an epoch timestamp into a human-readable UTC string. Args: epoch_seconds (float): The timestamp in seconds since the epoch. + invalid_timestamp (str): String to return for invalid timestamps. Returns: str: The formatted datetime string (e.g., "2023-10-26T14:30:00+00:00"). @@ -28,18 +34,49 @@ def _format_timestamp(epoch_seconds: float) -> str: return dt_object.isoformat() except (ValueError, TypeError): logger.warning(f"Invalid epoch value provided: {epoch_seconds}") - return "INVALID_TIMESTAMP" + return invalid_timestamp + + +def load_config(config_path: Path) -> Dict[str, Any]: + """ + Load configuration from YAML file. + + Args: + config_path: Path to the config file. + + Returns: + Dict containing configuration values. + + Raises: + FileNotFoundError: If config file does not exist. + ValueError: If config file cannot be parsed. + """ + if not config_path.exists(): + logger.error(f"Config file not found at {config_path}") + raise FileNotFoundError(f"Config file not found: {config_path}") + + try: + with config_path.open('r') as f: + config = yaml.safe_load(f) + logger.debug(f"Loaded config from {config_path}") + if not config: + raise ValueError(f"Config file {config_path} is empty") + return config + except yaml.YAMLError as e: + logger.error(f"Error parsing config file {config_path}: {e}") + raise ValueError(f"Error parsing config file: {e}") + def generate_loki_data( template_path: Path, output_path: Path, start_time: datetime, end_time: datetime, - time_step_seconds: int + time_step_seconds: int, + config: Dict[str, Any] ): """ - Generates synthetic Loki log data by first preparing a data list - and then rendering it with a single template. + Generate synthetic Loki log data by preparing a data list and rendering. Args: template_path (Path): Path to the main log template file. @@ -47,8 +84,12 @@ def generate_loki_data( start_time (datetime): The start time for data generation. end_time (datetime): The end time for data generation. time_step_seconds (int): The duration of each log entry in seconds. + config (Dict[str, Any]): Configuration dictionary loaded from file. """ - + # Get constants from config + constants = config.get("constants", {}) + invalid_timestamp = constants.get("invalid_timestamp", "INVALID_TIMESTAMP") + # --- Step 1: Generate the data structure first --- logger.info( f"Generating data from {start_time.strftime('%Y-%m-%d')} to " @@ -58,18 +99,25 @@ def generate_loki_data( end_epoch = int(end_time.timestamp()) logger.debug(f"Time range in epoch seconds: {start_epoch} to {end_epoch}") - log_data_list = [] # This list will hold all our data points + log_data_list = [] # This list will hold all our data points # Loop through the time range and generate data points - for current_epoch in range(start_epoch, end_epoch, time_step_seconds): - end_of_step_epoch = current_epoch + time_step_seconds - 1 + for current_epoch in range( + start_epoch, + end_epoch - time_step_seconds, + time_step_seconds + ): + end_of_step_epoch = min( + current_epoch + time_step_seconds - 1, end_epoch - 1) # Prepare replacement values nanoseconds = int(current_epoch * 1_000_000_000) - start_str = _format_timestamp(current_epoch) - end_str = _format_timestamp(end_of_step_epoch) + start_str = _format_timestamp(current_epoch, invalid_timestamp) + end_str = _format_timestamp(end_of_step_epoch, invalid_timestamp) - logger.debug(f"Processing epoch: {current_epoch} -> nanoseconds: {nanoseconds}") + logger.debug( + f"Processing epoch: {current_epoch} -> nanoseconds: {nanoseconds}" + ) # Create a dictionary for this time step and add it to the list log_data_list.append({ @@ -78,73 +126,249 @@ def generate_loki_data( "end_time": end_str }) + # Add final entry that ends at end_epoch (current time) + if log_data_list and end_epoch > start_epoch: + # Calculate start of final entry based on end of last generated entry + last_entry_end = log_data_list[-1]["end_time"] + # Parse the last entry's end time to get the epoch + last_end_dt = datetime.fromisoformat(last_entry_end) + final_start_epoch = int(last_end_dt.timestamp()) + 1 + final_nanoseconds = int(final_start_epoch * 1_000_000_000) + + # Only add if the final entry would have a valid duration + if final_start_epoch < end_epoch: + log_data_list.append({ + "nanoseconds": final_nanoseconds, + "start_time": _format_timestamp( + final_start_epoch, invalid_timestamp + ), + "end_time": _format_timestamp(end_epoch - 1, invalid_timestamp) + }) + logger.info(f"Generated {len(log_data_list)} data points to be rendered.") - # --- Step 2: Load template and render --- + # --- Step 2: Load log type configurations from config --- + log_types_config = config.get("log_types", []) + if not log_types_config: + logger.error("No log_types configuration found in config.") + raise ValueError("log_types section is required in config") + + if not isinstance(log_types_config, list): + logger.error("log_types must be a list in config") + raise ValueError("log_types must be a list") + + # Get required fields from config + required_fields = config.get("required_fields", []) + if not required_fields: + logger.error("No required_fields configuration found in config") + raise ValueError("required_fields section is required in config") + + # Get date field names from config + date_field_names = config.get("date_fields", []) + if not date_field_names: + logger.error("No date_fields configuration found in config") + raise ValueError("date_fields section is required in config") + + # Build log_types dictionary from config + log_types = {} + for log_type_config in log_types_config: + if not isinstance(log_type_config, dict): + logger.error(f"Invalid log type configuration: {log_type_config}") + raise ValueError("Each log type in log_types must be a dictionary") + + log_type_name = log_type_config.get("name") + if not log_type_name: + logger.error("Each log type must have a 'name' field") + raise ValueError("Each log type must have a 'name' field") + + # Validate required fields + missing = [f for f in required_fields if f not in log_type_config] + if missing: + logger.error( + f"Missing required fields in {log_type_name} config: {missing}" + ) + raise ValueError( + f"Missing required fields in {log_type_name}: {missing}" + ) + + # Build groupby from config + groupby = log_type_config.get("groupby", {}) + if not isinstance(groupby, dict): + logger.error( + f"groupby must be a dictionary for {log_type_name}" + ) + raise ValueError( + f"groupby must be a dictionary for {log_type_name}" + ) + + log_types[log_type_name] = { + "type": log_type_config["type"], + "unit": log_type_config["unit"], + "description": log_type_config.get("description"), + "qty": log_type_config["qty"], + "price": log_type_config["price"], + "groupby": groupby.copy(), + "metadata": log_type_config.get("metadata", {}) + } + + # --- Step 3: Load template and render --- try: logger.info(f"Loading main template from: {template_path}") template_content = template_path.read_text() - template = Template(template_content, trim_blocks=True, lstrip_blocks=True) + + # Create Jinja2 environment with custom filter + def tojson_preserve_order(obj): + """Convert object to JSON string preserving dictionary order.""" + return json.dumps(obj, sort_keys=False, ensure_ascii=False) + + env = Environment(trim_blocks=True, lstrip_blocks=True) + env.filters['tojson'] = tojson_preserve_order + template = env.from_string(template_content) except FileNotFoundError as e: logger.error(f"Error loading template file: {e}. Aborting.") - raise # Re-raise the exception to be caught in main() + raise # --- Render the template in one pass with all the data --- logger.info("Rendering final output...") - # The template expects a variable named 'log_data' - final_output = template.render(log_data=log_data_list) - - # --- Step 3: Write the final string to the file --- + + # Pre-calculate log types with date fields for each time step + log_types_list = [] + for idx, item in enumerate(log_data_list): + # For the last entry, use end_time to ensure it shows today's date + if idx == len(log_data_list) - 1: + dt = end_time + else: + epoch_seconds = item["nanoseconds"] / 1_000_000_000 + dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + + iso_year, iso_week, _ = dt.isocalendar() + day_of_year = dt.timetuple().tm_yday + + # Build date fields dynamically from config + date_field_mapping = { + "week_of_the_year": str(iso_week), + "day_of_the_year": str(day_of_year), + "month": str(dt.month), + "year": str(dt.year) + } + + date_fields = {} + for field_name in date_field_names: + if field_name in date_field_mapping: + date_fields[field_name] = date_field_mapping[field_name] + else: + logger.warning( + f"Unknown date field name in config: {field_name}" + ) + + # Create log types with date fields for this time step + log_types_with_dates = {} + for log_type_name, log_type_data in log_types.items(): + log_type_with_dates = log_type_data.copy() + log_type_with_dates["groupby"] = log_type_data["groupby"].copy() + log_type_with_dates["groupby"].update(date_fields) + log_types_with_dates[log_type_name] = log_type_with_dates + + log_types_list.append(log_types_with_dates) + + # Get loki_stream configuration + loki_stream = config.get("loki_stream", {}) + if not loki_stream: + logger.warning("No loki_stream configuration found, using defaults") + loki_stream = {"service": "cloudkitty"} + + # Build template context with generic log type information + template_context = { + "log_data": log_data_list, + "log_type_names": list(log_types.keys()), + "all_log_entries": log_types_list, + "loki_stream": loki_stream + } + + final_output = template.render(**template_context) + + # --- Step 4: Write the final string to the file --- try: with output_path.open('w') as f_out: f_out.write(final_output) - logger.info(f"Successfully generated synthetic data to '{output_path}'") + logger.info( + f"Successfully generated synthetic data to '{output_path}'" + ) except IOError as e: logger.error(f"Failed to write to output file '{output_path}': {e}") except Exception as e: logger.error(f"An unexpected error occurred during file write: {e}") + def main(): """Main entry point for the script.""" parser = argparse.ArgumentParser( - description="Generate synthetic Loki log data from a single main template.", + description="Generate synthetic Loki log data from a main template.", formatter_class=argparse.ArgumentDefaultsHelpFormatter ) - # --- Required File Path Arguments --- - parser.add_argument("-o", "--output", required=True, help="Path to the output file.") - # --- Only one template argument is needed now --- - parser.add_argument("--template", required=True, help="Path to the main log template file (e.g., loki_main.tmpl).") - - # --- Optional Generation Arguments --- - parser.add_argument("--days", type=int, default=30, help="How many days of data to generate, ending today.") - parser.add_argument("--step", type=int, default=300, help="Time step in seconds for each log entry.") - - # --- Optional Utility Arguments --- - parser.add_argument("--debug", action="store_true", help="Enable debug level logging for verbose output.") + # --- Required Arguments --- + parser.add_argument( + "--tmpl", + required=True, + help="Path to the main log template file." + ) + parser.add_argument( + "-t", "--test", + type=Path, + required=True, + help="Path to YAML config file (e.g., scenario.yml)." + ) + parser.add_argument( + "-o", "--output", + required=True, + help="Path to the output file." + ) + + # --- Optional Utility Arguments --- + parser.add_argument( + "--debug", + action="store_true", + help="Enable debug level logging for verbose output." + ) args = parser.parse_args() if args.debug: logger.setLevel(logging.DEBUG) logger.debug("Debug mode enabled.") + # Load config first to get generation parameters + try: + config = load_config(args.test) + except (FileNotFoundError, ValueError) as e: + logger.critical(f"Failed to load config: {e}") + return + + # Get generation parameters from config + generation_config = config.get("generation", {}) + days = generation_config.get("days", 30) + step_seconds = generation_config.get("step_seconds", 300) + # Define the time range for data generation end_time_utc = datetime.now(timezone.utc) - start_time_utc = end_time_utc - timedelta(days=args.days) + start_time_utc = end_time_utc - timedelta(days=days) logger.debug(f"Time range calculated: {start_time_utc} to {end_time_utc}") # Run the generator try: generate_loki_data( - template_path=Path(args.template), + template_path=Path(args.tmpl), output_path=Path(args.output), start_time=start_time_utc, end_time=end_time_utc, - time_step_seconds=args.step + time_step_seconds=step_seconds, + config=config ) except FileNotFoundError: - logger.error("Process aborted because the template file was not found.") + logger.error( + "Process aborted because the template file was not found." + ) except Exception as e: logger.critical(f"A critical, unhandled error stopped the script: {e}") diff --git a/roles/telemetry_chargeback/files/loki_data_templ.j2 b/roles/telemetry_chargeback/files/loki_data_templ.j2 index 7a676282e..e3c18e9e7 100644 --- a/roles/telemetry_chargeback/files/loki_data_templ.j2 +++ b/roles/telemetry_chargeback/files/loki_data_templ.j2 @@ -1,16 +1,32 @@ -{"streams": [{ "stream": { "service": "cloudkitty" }, "values": [ +{"streams": [{ "stream": { "service": "{{ loki_stream.service }}" }, "values": [ {%- for item in log_data %} +{%- set outer_idx = loop.index0 %} +{%- set is_last_outer = loop.last %} +{%- for log_type_name in log_type_names %} +{%- set entry_data = all_log_entries[outer_idx][log_type_name] -%} +{%- set entry = { + "start": item.start_time, + "end": item.end_time, + "type": entry_data.type, + "unit": entry_data.unit, + "description": entry_data.description, + "qty": entry_data.qty, + "price": entry_data.price, + "groupby": entry_data.groupby, + "metadata": entry_data.metadata +} -%} +{%- if loop.first %} [ "{{ item.nanoseconds }}", -"{\"start\": \"{{ item.start_time }}\", \"end\": \"{{ item.end_time }}\", \"type\": \"ceilometer_image_size\", \"unit\": \"MiB\", \"description\": null, \"qty\": 20.6875, \"price\": 0.0206875, \"groupby\": {\"id\": \"cd65d30f-8b94-4fa3-95dc-e3b429f479b2\", \"project_id\": \"0030775de80e4d84a4fd0d73e0a1b3a7\", \"user_id\": null, \"week_of_the_year\": \"37\", \"day_of_the_year\": \"258\", \"month\": \"9\", \"year\": \"2025\"}, \"metadata\": {\"container_format\": \"bare\", \"disk_format\": \"qcow2\"}}" -], +{{ entry | tojson | tojson }} +] +{%- else %} [ -"{{ item.nanoseconds }}", -"{\"start\": \"{{ item.start_time }}\", \"end\": \"{{ item.end_time }}\", \"type\": \"instance\", \"unit\": \"instance\", \"description\": null, \"qty\": 1.0, \"price\": 0.3, \"groupby\": {\"id\": \"de168c31-ed44-4a1a-a079-51bd238a91d6\", \"project_id\": \"9cf5bcfc61a24682acc448af2d062ad2\", \"user_id\": \"c29ab6e886354bbd88ee9899e62d1d40\", \"week_of_the_year\": \"37\", \"day_of_the_year\": \"258\", \"month\": \"9\", \"year\": \"2025\"}, \"metadata\": {\"flavor_name\": \"m1.tiny\", \"flavor_id\": \"1\", \"vcpus\": \"\"}}" +{{ entry | tojson | tojson }} ] -{#- This logic adds a comma after every pair, *except* for the very last one. #} -{%- if not loop.last -%} -, -{%- endif -%} +{%- endif %} +{%- if not (loop.last and is_last_outer) %}, +{% endif -%} +{%- endfor %} {%- endfor %} ]}]} diff --git a/roles/telemetry_chargeback/files/test_static.yml b/roles/telemetry_chargeback/files/test_static.yml new file mode 100644 index 000000000..733b7581b --- /dev/null +++ b/roles/telemetry_chargeback/files/test_static.yml @@ -0,0 +1,61 @@ +# Scenario configuration for synthetic Loki log data generation + +# Time range configuration +generation: + days: 5 + step_seconds: 300 + +# Log type definitions +log_types: + - name: ceilometer_image_size + type: ceilometer_image_size + unit: MiB + description: null + qty: 20.6 + price: 0.02 + groupby: + id: cd65d30f-8b94-4fa3-95dc-e3b429f479b2 + project_id: 0030775de80e4d84a4fd0d73e0a1b3a7 + user_id: null + metadata: + container_format: bare + disk_format: qcow2 + + - name: instance + type: instance + unit: instance + description: null + qty: 1.0 + price: 0.3 + groupby: + id: de168c31-ed44-4a1a-a079-51bd238a91d6 + project_id: 9cf5bcfc61a24682acc448af2d062ad2 + user_id: c29ab6e886354bbd88ee9899e62d1d40 + metadata: + flavor_name: m1.tiny + flavor_id: "1" + vcpus: "" + +# Required fields for validation (top-level fields only, not nested in groupby) +required_fields: + - type + - unit + - qty + - price + - groupby + - metadata + +# Date field names to add to groupby +date_fields: + - week_of_the_year + - day_of_the_year + - month + - year + +# Loki stream configuration +loki_stream: + service: cloudkitty + +# Error messages and constants +constants: + invalid_timestamp: "INVALID_TIMESTAMP" diff --git a/roles/telemetry_chargeback/meta/main.yml b/roles/telemetry_chargeback/meta/main.yml index 8c63de8fa..ddb049119 100644 --- a/roles/telemetry_chargeback/meta/main.yml +++ b/roles/telemetry_chargeback/meta/main.yml @@ -6,7 +6,7 @@ galaxy_info: license: Apache-2.0 - min_ansible_version: "2.1" + min_ansible_version: "2.9" galaxy_tags: [] diff --git a/roles/telemetry_chargeback/tasks/chargeback_tests.yml b/roles/telemetry_chargeback/tasks/chargeback_tests.yml index cda17be17..b121019c9 100644 --- a/roles/telemetry_chargeback/tasks/chargeback_tests.yml +++ b/roles/telemetry_chargeback/tasks/chargeback_tests.yml @@ -17,7 +17,7 @@ cmd: "{{ openstack_cmd }} rating module set priority hashmap 100" register: set_hashmap_priority when: get_hashmap_priority.stdout | trim != '100' - failed_when: set_hashmap_priority.rc >= 1 or get_hashmap_priority.stdout == "" + failed_when: (set_hashmap_priority.rc | default(0)) >= 1 or get_hashmap_priority.stdout == "" changed_when: True - name: Get status of all CloudKitty rating modules diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml index 64e91169b..ae467dd9f 100644 --- a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -4,27 +4,26 @@ - name: Check for preexisting output file ansible.builtin.stat: - path: "{{ output_file_local }}" + path: "{{ ck_output_file_local }}" register: file_preexists - name: TEST Generate Synthetic Data ansible.builtin.command: cmd: > - python3 "{{ ck_py_script_path }}" - --template "{{ ck_data_template_path }}" - -o "{{ output_file_local }}" - --days "{{ ck_days }}" - --step "{{ ck_step }}" + python3 "{{ ck_synth_script }}" + --tmpl "{{ ck_data_template }}" + -t "{{ ck_data_config }}" + -o "{{ ck_output_file_local }}" register: script_output when: not file_preexists.stat.exists | bool changed_when: script_output.rc == 0 - name: Read the content of the file ansible.builtin.slurp: - src: "{{ output_file_local }}" + src: "{{ ck_output_file_local }}" register: slurped_file -- name: TEST Validate JSON format of syntheticc data file +- name: TEST Validate JSON format of synthetic data file ansible.builtin.assert: that: # This filter will trigger a task failure if the string isn't valid JSON @@ -34,10 +33,10 @@ - name: Print output_file_remote path ansible.builtin.debug: - msg: "Sythetic data file: {{ output_file_remote }}" + msg: "Synthetic data file: {{ output_file_remote }}" - name: Copy output file to remote host ansible.builtin.copy: - src: "{{ output_file_local }}" + src: "{{ ck_output_file_local }}" dest: "{{ output_file_remote }}" mode: '0644' From 09fd308902d434be445f9d2e953c1caf21b7c243 Mon Sep 17 00:00:00 2001 From: Muneesha Yadla Date: Fri, 23 Jan 2026 10:47:49 -0500 Subject: [PATCH 3/7] Create a playbook that pushes CK data file to loki storage --- roles/telemetry_chargeback/defaults/main.yml | 16 +++- .../tasks/ingest_loki_data.yml | 74 +++++++++++++++++++ roles/telemetry_chargeback/tasks/main.yml | 3 + 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 roles/telemetry_chargeback/tasks/ingest_loki_data.yml diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 3895bd967..ad1996a73 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -7,4 +7,18 @@ ck_data_config: "{{ role_path }}/files/test_static.yml" ck_output_file_local: "{{ role_path }}/files/loki_synth_data.json" # Output directory for test artifacts -logs_dir: "/home/zuul/ci-framework-data/tests/feature-verification-tests" +logs_dir: "/home/zuul/ci-framework-data/tests/feature-verification-tests" + +# loki url's +loki_base_url: "https://cloudkitty-lokistack-openstack.apps-crc.testing" +loki_push_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/push" +loki_query_url: "{{ loki_base_url }}/api/logs/v1/cloudkitty/loki/api/v1/query_range" + +# Cloudkitty certificates +cert_secret_name: "cert-cloudkitty-client-internal" +cert_dir: "{{ ansible_user_dir }}/ck-certs" + +# Test log files: +source_log_file: "{{ logs_dir }}/gen_loki_synth_data.log" +temp_log_file: "{{ logs_dir }}/temp_log_updated_ts.json" +output_log_file: "{{ logs_dir }}/retrieve_loki_op.json" diff --git a/roles/telemetry_chargeback/tasks/ingest_loki_data.yml b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml new file mode 100644 index 000000000..e38a0e9a4 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/ingest_loki_data.yml @@ -0,0 +1,74 @@ +--- +# Ingest data log to Loki that is generated from gen_synth_loki_data.yml" + +# Create a directory to add certs +- name: Create certificates directory + ansible.builtin.file: + path: "{{ cert_dir }}" + state: directory + mode: '0755' + +# Extract certificates +- name: Extract certificates from openshift secret + ansible.builtin.command: + cmd: > + oc extract secret/{{ cert_secret_name }} + --to={{ cert_dir }} + --confirm + changed_when: true + +# Copy file +- name: Copy source log file to temp location + ansible.builtin.copy: + src: "{{ source_log_file }}" + dest: "{{ temp_log_file }}" + mode: '0644' + +# Get time in nanoseconds +- name: Get Current time in Nanoseconds + ansible.builtin.set_fact: + current_nano: "{{ lookup('pipe', 'date +%s%N') }}" + +# Update time to current +- name: Update timestamp in the temp log file + ansible.builtin.replace: + path: "{{ temp_log_file }}" + regexp: '[0-9]{19}' + replace: "{{ current_nano }}" + +# Push the json format data log to loki +- name: Ingest data log to Loki via API + block: + - name: Push data to Loki + ansible.builtin.uri: + url: "{{ loki_push_url }}" + method: POST + body: "{{ lookup('file', temp_log_file) }}" + body_format: json + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + validate_certs: false + status_code: 204 + return_content: yes + register: loki_response + + # Success + - name: Confirm Success + ansible.builtin.debug: + msg: "Ingestion Successful! Timestamp used: {{ current_nano }}" + + rescue: + # Failure + - name: Report Ingestion Failure + ansible.builtin.fail: + msg: > + Ingestion Failed. + Status: {{ loki_response.status | default('Unknown') }} + Response: {{ loki_response.msg | default('No response') }} + + always: + # Cleanup temp file + - name: Clean up temp log file + ansible.builtin.file: + path: "{{ temp_log_file }}" + state: absent diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 98a94b233..f0b240e90 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -4,3 +4,6 @@ - name: "Generate Synthetic Data" ansible.builtin.include_tasks: "gen_synth_loki_data.yml" + +- name: "Ingest Data log to loki" + ansible.builtin.include_tasks: "ingest_loki_data.yml" \ No newline at end of file From 6ee107b7919d1bb47dc0e502829c7e95f25055eb Mon Sep 17 00:00:00 2001 From: Muneesha Yadla Date: Tue, 18 Nov 2025 09:57:40 -0500 Subject: [PATCH 4/7] Add synthetic date generation to telemetry_chargeback role in FVT repo Generate synthetic data and write results to a json file Push json file to log directory so that during debug file is available for review Controller/ci-framework-data/tests/feature-verification-tests Do not overwrite syth data json if it already exists Using Gemini and Cursor Closes https://issues.redhat.com/browse/OSPRH-23746 Update the synthetic data generation process to ensure that existing JSON files are not overwritten. This change allows for better debugging by preserving previous data outputs. Closes https://issues.redhat.com/browse/OSPRH-23746 --- ci/run_chargeback_tests.yml | 3 +- roles/telemetry_chargeback/defaults/main.yml | 8 + .../files/gen_synth_loki_data.py | 377 ++++++++++++++++++ .../files/loki_data_templ.j2 | 32 ++ .../files/test_static.yml | 61 +++ roles/telemetry_chargeback/meta/main.yml | 2 +- .../tasks/chargeback_tests.yml | 2 +- .../tasks/gen_synth_loki_data.yml | 42 ++ roles/telemetry_chargeback/tasks/main.yml | 3 + 9 files changed, 527 insertions(+), 3 deletions(-) create mode 100755 roles/telemetry_chargeback/files/gen_synth_loki_data.py create mode 100644 roles/telemetry_chargeback/files/loki_data_templ.j2 create mode 100644 roles/telemetry_chargeback/files/test_static.yml create mode 100644 roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml diff --git a/ci/run_chargeback_tests.yml b/ci/run_chargeback_tests.yml index 8bfd9bd09..bfa3a4bbb 100644 --- a/ci/run_chargeback_tests.yml +++ b/ci/run_chargeback_tests.yml @@ -1,12 +1,13 @@ --- - name: "Verify all the applicable projects, endpoints, pods & services for cloudkitty" hosts: "{{ cifmw_target_hook_host | default('localhost') }}" - gather_facts: no + gather_facts: true ignore_errors: true environment: KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" PATH: "{{ cifmw_path }}" vars_files: + - vars/common.yml - vars/osp18_env.yml vars: common_pod_status_str: "Running" diff --git a/roles/telemetry_chargeback/defaults/main.yml b/roles/telemetry_chargeback/defaults/main.yml index 64f07b7a1..3895bd967 100644 --- a/roles/telemetry_chargeback/defaults/main.yml +++ b/roles/telemetry_chargeback/defaults/main.yml @@ -1,2 +1,10 @@ --- openstack_cmd: "openstack" + +ck_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" +ck_data_template: "{{ role_path }}/files/loki_data_templ.j2" +ck_data_config: "{{ role_path }}/files/test_static.yml" +ck_output_file_local: "{{ role_path }}/files/loki_synth_data.json" + +# Output directory for test artifacts +logs_dir: "/home/zuul/ci-framework-data/tests/feature-verification-tests" diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py new file mode 100755 index 000000000..1d8bcaf93 --- /dev/null +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -0,0 +1,377 @@ +"""Generate synthetic Loki log data from a Jinja2 template.""" +import logging +import argparse +import json +import yaml +from datetime import datetime, timezone, timedelta +from pathlib import Path +from typing import Dict, Any +from jinja2 import Environment + + +# --- Configure logging with a default level that can be changed --- +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%dT%H:%M:%S+00:00' +) +logger = logging.getLogger() + + +def _format_timestamp(epoch_seconds: float, invalid_timestamp: str) -> str: + """ + Convert an epoch timestamp into a human-readable UTC string. + + Args: + epoch_seconds (float): The timestamp in seconds since the epoch. + invalid_timestamp (str): String to return for invalid timestamps. + + Returns: + str: The formatted datetime string (e.g., "2023-10-26T14:30:00+00:00"). + """ + try: + dt_object = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + return dt_object.isoformat() + except (ValueError, TypeError): + logger.warning(f"Invalid epoch value provided: {epoch_seconds}") + return invalid_timestamp + + +def load_config(config_path: Path) -> Dict[str, Any]: + """ + Load configuration from YAML file. + + Args: + config_path: Path to the config file. + + Returns: + Dict containing configuration values. + + Raises: + FileNotFoundError: If config file does not exist. + ValueError: If config file cannot be parsed. + """ + if not config_path.exists(): + logger.error(f"Config file not found at {config_path}") + raise FileNotFoundError(f"Config file not found: {config_path}") + + try: + with config_path.open('r') as f: + config = yaml.safe_load(f) + logger.debug(f"Loaded config from {config_path}") + if not config: + raise ValueError(f"Config file {config_path} is empty") + return config + except yaml.YAMLError as e: + logger.error(f"Error parsing config file {config_path}: {e}") + raise ValueError(f"Error parsing config file: {e}") + + +def generate_loki_data( + template_path: Path, + output_path: Path, + start_time: datetime, + end_time: datetime, + time_step_seconds: int, + config: Dict[str, Any] +): + """ + Generate synthetic Loki log data by preparing a data list and rendering. + + Args: + template_path (Path): Path to the main log template file. + output_path (Path): Path for the generated output JSON file. + start_time (datetime): The start time for data generation. + end_time (datetime): The end time for data generation. + time_step_seconds (int): The duration of each log entry in seconds. + config (Dict[str, Any]): Configuration dictionary loaded from file. + """ + # Get constants from config + constants = config.get("constants", {}) + invalid_timestamp = constants.get("invalid_timestamp", "INVALID_TIMESTAMP") + + # --- Step 1: Generate the data structure first --- + logger.info( + f"Generating data from {start_time.strftime('%Y-%m-%d')} to " + f"{end_time.strftime('%Y-%m-%d')} with a {time_step_seconds}s step." + ) + start_epoch = int(start_time.timestamp()) + end_epoch = int(end_time.timestamp()) + logger.debug(f"Time range in epoch seconds: {start_epoch} to {end_epoch}") + + log_data_list = [] # This list will hold all our data points + + # Loop through the time range and generate data points + for current_epoch in range( + start_epoch, + end_epoch - time_step_seconds, + time_step_seconds + ): + end_of_step_epoch = min( + current_epoch + time_step_seconds - 1, end_epoch - 1) + + # Prepare replacement values + nanoseconds = int(current_epoch * 1_000_000_000) + start_str = _format_timestamp(current_epoch, invalid_timestamp) + end_str = _format_timestamp(end_of_step_epoch, invalid_timestamp) + + logger.debug( + f"Processing epoch: {current_epoch} -> nanoseconds: {nanoseconds}" + ) + + # Create a dictionary for this time step and add it to the list + log_data_list.append({ + "nanoseconds": nanoseconds, + "start_time": start_str, + "end_time": end_str + }) + + # Add final entry that ends at end_epoch (current time) + if log_data_list and end_epoch > start_epoch: + # Calculate start of final entry based on end of last generated entry + last_entry_end = log_data_list[-1]["end_time"] + # Parse the last entry's end time to get the epoch + last_end_dt = datetime.fromisoformat(last_entry_end) + final_start_epoch = int(last_end_dt.timestamp()) + 1 + final_nanoseconds = int(final_start_epoch * 1_000_000_000) + + # Only add if the final entry would have a valid duration + if final_start_epoch < end_epoch: + log_data_list.append({ + "nanoseconds": final_nanoseconds, + "start_time": _format_timestamp( + final_start_epoch, invalid_timestamp + ), + "end_time": _format_timestamp(end_epoch - 1, invalid_timestamp) + }) + + logger.info(f"Generated {len(log_data_list)} data points to be rendered.") + + # --- Step 2: Load log type configurations from config --- + log_types_config = config.get("log_types", []) + if not log_types_config: + logger.error("No log_types configuration found in config.") + raise ValueError("log_types section is required in config") + + if not isinstance(log_types_config, list): + logger.error("log_types must be a list in config") + raise ValueError("log_types must be a list") + + # Get required fields from config + required_fields = config.get("required_fields", []) + if not required_fields: + logger.error("No required_fields configuration found in config") + raise ValueError("required_fields section is required in config") + + # Get date field names from config + date_field_names = config.get("date_fields", []) + if not date_field_names: + logger.error("No date_fields configuration found in config") + raise ValueError("date_fields section is required in config") + + # Build log_types dictionary from config + log_types = {} + for log_type_config in log_types_config: + if not isinstance(log_type_config, dict): + logger.error(f"Invalid log type configuration: {log_type_config}") + raise ValueError("Each log type in log_types must be a dictionary") + + log_type_name = log_type_config.get("name") + if not log_type_name: + logger.error("Each log type must have a 'name' field") + raise ValueError("Each log type must have a 'name' field") + + # Validate required fields + missing = [f for f in required_fields if f not in log_type_config] + if missing: + logger.error( + f"Missing required fields in {log_type_name} config: {missing}" + ) + raise ValueError( + f"Missing required fields in {log_type_name}: {missing}" + ) + + # Build groupby from config + groupby = log_type_config.get("groupby", {}) + if not isinstance(groupby, dict): + logger.error( + f"groupby must be a dictionary for {log_type_name}" + ) + raise ValueError( + f"groupby must be a dictionary for {log_type_name}" + ) + + log_types[log_type_name] = { + "type": log_type_config["type"], + "unit": log_type_config["unit"], + "description": log_type_config.get("description"), + "qty": log_type_config["qty"], + "price": log_type_config["price"], + "groupby": groupby.copy(), + "metadata": log_type_config.get("metadata", {}) + } + + # --- Step 3: Load template and render --- + try: + logger.info(f"Loading main template from: {template_path}") + template_content = template_path.read_text() + + # Create Jinja2 environment with custom filter + def tojson_preserve_order(obj): + """Convert object to JSON string preserving dictionary order.""" + return json.dumps(obj, sort_keys=False, ensure_ascii=False) + + env = Environment(trim_blocks=True, lstrip_blocks=True) + env.filters['tojson'] = tojson_preserve_order + template = env.from_string(template_content) + + except FileNotFoundError as e: + logger.error(f"Error loading template file: {e}. Aborting.") + raise + + # --- Render the template in one pass with all the data --- + logger.info("Rendering final output...") + + # Pre-calculate log types with date fields for each time step + log_types_list = [] + for idx, item in enumerate(log_data_list): + # For the last entry, use end_time to ensure it shows today's date + if idx == len(log_data_list) - 1: + dt = end_time + else: + epoch_seconds = item["nanoseconds"] / 1_000_000_000 + dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + + iso_year, iso_week, _ = dt.isocalendar() + day_of_year = dt.timetuple().tm_yday + + # Build date fields dynamically from config + date_field_mapping = { + "week_of_the_year": str(iso_week), + "day_of_the_year": str(day_of_year), + "month": str(dt.month), + "year": str(dt.year) + } + + date_fields = {} + for field_name in date_field_names: + if field_name in date_field_mapping: + date_fields[field_name] = date_field_mapping[field_name] + else: + logger.warning( + f"Unknown date field name in config: {field_name}" + ) + + # Create log types with date fields for this time step + log_types_with_dates = {} + for log_type_name, log_type_data in log_types.items(): + log_type_with_dates = log_type_data.copy() + log_type_with_dates["groupby"] = log_type_data["groupby"].copy() + log_type_with_dates["groupby"].update(date_fields) + log_types_with_dates[log_type_name] = log_type_with_dates + + log_types_list.append(log_types_with_dates) + + # Get loki_stream configuration + loki_stream = config.get("loki_stream", {}) + if not loki_stream: + logger.warning("No loki_stream configuration found, using defaults") + loki_stream = {"service": "cloudkitty"} + + # Build template context with generic log type information + template_context = { + "log_data": log_data_list, + "log_type_names": list(log_types.keys()), + "all_log_entries": log_types_list, + "loki_stream": loki_stream + } + + final_output = template.render(**template_context) + + # --- Step 4: Write the final string to the file --- + try: + with output_path.open('w') as f_out: + f_out.write(final_output) + logger.info( + f"Successfully generated synthetic data to '{output_path}'" + ) + except IOError as e: + logger.error(f"Failed to write to output file '{output_path}': {e}") + except Exception as e: + logger.error(f"An unexpected error occurred during file write: {e}") + + +def main(): + """Main entry point for the script.""" + parser = argparse.ArgumentParser( + description="Generate synthetic Loki log data from a main template.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + # --- Required Arguments --- + parser.add_argument( + "--tmpl", + required=True, + help="Path to the main log template file." + ) + parser.add_argument( + "-t", "--test", + type=Path, + required=True, + help="Path to YAML config file (e.g., scenario.yml)." + ) + parser.add_argument( + "-o", "--output", + required=True, + help="Path to the output file." + ) + + # --- Optional Utility Arguments --- + parser.add_argument( + "--debug", + action="store_true", + help="Enable debug level logging for verbose output." + ) + args = parser.parse_args() + + if args.debug: + logger.setLevel(logging.DEBUG) + logger.debug("Debug mode enabled.") + + # Load config first to get generation parameters + try: + config = load_config(args.test) + except (FileNotFoundError, ValueError) as e: + logger.critical(f"Failed to load config: {e}") + return + + # Get generation parameters from config + generation_config = config.get("generation", {}) + days = generation_config.get("days", 30) + step_seconds = generation_config.get("step_seconds", 300) + + # Define the time range for data generation + end_time_utc = datetime.now(timezone.utc) + start_time_utc = end_time_utc - timedelta(days=days) + logger.debug(f"Time range calculated: {start_time_utc} to {end_time_utc}") + + # Run the generator + try: + generate_loki_data( + template_path=Path(args.tmpl), + output_path=Path(args.output), + start_time=start_time_utc, + end_time=end_time_utc, + time_step_seconds=step_seconds, + config=config + ) + except FileNotFoundError: + logger.error( + "Process aborted because the template file was not found." + ) + except Exception as e: + logger.critical(f"A critical, unhandled error stopped the script: {e}") + + +if __name__ == "__main__": + main() diff --git a/roles/telemetry_chargeback/files/loki_data_templ.j2 b/roles/telemetry_chargeback/files/loki_data_templ.j2 new file mode 100644 index 000000000..e3c18e9e7 --- /dev/null +++ b/roles/telemetry_chargeback/files/loki_data_templ.j2 @@ -0,0 +1,32 @@ +{"streams": [{ "stream": { "service": "{{ loki_stream.service }}" }, "values": [ +{%- for item in log_data %} +{%- set outer_idx = loop.index0 %} +{%- set is_last_outer = loop.last %} +{%- for log_type_name in log_type_names %} +{%- set entry_data = all_log_entries[outer_idx][log_type_name] -%} +{%- set entry = { + "start": item.start_time, + "end": item.end_time, + "type": entry_data.type, + "unit": entry_data.unit, + "description": entry_data.description, + "qty": entry_data.qty, + "price": entry_data.price, + "groupby": entry_data.groupby, + "metadata": entry_data.metadata +} -%} +{%- if loop.first %} +[ +"{{ item.nanoseconds }}", +{{ entry | tojson | tojson }} +] +{%- else %} +[ +{{ entry | tojson | tojson }} +] +{%- endif %} +{%- if not (loop.last and is_last_outer) %}, +{% endif -%} +{%- endfor %} +{%- endfor %} +]}]} diff --git a/roles/telemetry_chargeback/files/test_static.yml b/roles/telemetry_chargeback/files/test_static.yml new file mode 100644 index 000000000..4ff9a1fd7 --- /dev/null +++ b/roles/telemetry_chargeback/files/test_static.yml @@ -0,0 +1,61 @@ +# Scenario configuration for synthetic Loki log data generation + +# Time range configuration +generation: + days: 1 + step_seconds: 7200 + +# Log type definitions +log_types: + - name: ceilometer_image_size + type: ceilometer_image_size + unit: MiB + description: null + qty: 20.6 + price: 0.02 + groupby: + id: cd65d30f-8b94-4fa3-95dc-e3b429f479b2 + project_id: 0030775de80e4d84a4fd0d73e0a1b3a7 + user_id: null + metadata: + container_format: bare + disk_format: qcow2 + + - name: instance + type: instance + unit: instance + description: null + qty: 1.0 + price: 0.3 + groupby: + id: de168c31-ed44-4a1a-a079-51bd238a91d6 + project_id: 9cf5bcfc61a24682acc448af2d062ad2 + user_id: c29ab6e886354bbd88ee9899e62d1d40 + metadata: + flavor_name: m1.tiny + flavor_id: "1" + vcpus: "" + +# Required fields for validation (top-level fields only, not nested in groupby) +required_fields: + - type + - unit + - qty + - price + - groupby + - metadata + +# Date field names to add to groupby +date_fields: + - week_of_the_year + - day_of_the_year + - month + - year + +# Loki stream configuration +loki_stream: + service: cloudkitty + +# Error messages and constants +constants: + invalid_timestamp: "INVALID_TIMESTAMP" diff --git a/roles/telemetry_chargeback/meta/main.yml b/roles/telemetry_chargeback/meta/main.yml index 8c63de8fa..ddb049119 100644 --- a/roles/telemetry_chargeback/meta/main.yml +++ b/roles/telemetry_chargeback/meta/main.yml @@ -6,7 +6,7 @@ galaxy_info: license: Apache-2.0 - min_ansible_version: "2.1" + min_ansible_version: "2.9" galaxy_tags: [] diff --git a/roles/telemetry_chargeback/tasks/chargeback_tests.yml b/roles/telemetry_chargeback/tasks/chargeback_tests.yml index cda17be17..b121019c9 100644 --- a/roles/telemetry_chargeback/tasks/chargeback_tests.yml +++ b/roles/telemetry_chargeback/tasks/chargeback_tests.yml @@ -17,7 +17,7 @@ cmd: "{{ openstack_cmd }} rating module set priority hashmap 100" register: set_hashmap_priority when: get_hashmap_priority.stdout | trim != '100' - failed_when: set_hashmap_priority.rc >= 1 or get_hashmap_priority.stdout == "" + failed_when: (set_hashmap_priority.rc | default(0)) >= 1 or get_hashmap_priority.stdout == "" changed_when: True - name: Get status of all CloudKitty rating modules diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml new file mode 100644 index 000000000..ae467dd9f --- /dev/null +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -0,0 +1,42 @@ +- name: Define Synthetic Data Variables + ansible.builtin.set_fact: + output_file_remote: "{{ logs_dir }}/gen_loki_synth_data.log" + +- name: Check for preexisting output file + ansible.builtin.stat: + path: "{{ ck_output_file_local }}" + register: file_preexists + +- name: TEST Generate Synthetic Data + ansible.builtin.command: + cmd: > + python3 "{{ ck_synth_script }}" + --tmpl "{{ ck_data_template }}" + -t "{{ ck_data_config }}" + -o "{{ ck_output_file_local }}" + register: script_output + when: not file_preexists.stat.exists | bool + changed_when: script_output.rc == 0 + +- name: Read the content of the file + ansible.builtin.slurp: + src: "{{ ck_output_file_local }}" + register: slurped_file + +- name: TEST Validate JSON format of synthetic data file + ansible.builtin.assert: + that: + # This filter will trigger a task failure if the string isn't valid JSON + - slurped_file.content | b64decode | from_json is defined + fail_msg: "The file does not contain valid JSON format." + success_msg: "JSON format validated successfully." + +- name: Print output_file_remote path + ansible.builtin.debug: + msg: "Synthetic data file: {{ output_file_remote }}" + +- name: Copy output file to remote host + ansible.builtin.copy: + src: "{{ ck_output_file_local }}" + dest: "{{ output_file_remote }}" + mode: '0644' diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 969188b71..98a94b233 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -1,3 +1,6 @@ --- - name: "Validate Chargeback Feature" ansible.builtin.include_tasks: "chargeback_tests.yml" + +- name: "Generate Synthetic Data" + ansible.builtin.include_tasks: "gen_synth_loki_data.yml" From b099f106fde02a92a804afa16fab80fef402fecf Mon Sep 17 00:00:00 2001 From: Muneesha Yadla Date: Tue, 18 Nov 2025 09:57:40 -0500 Subject: [PATCH 5/7] Add synthetic date generation to telemetry_chargeback role in FVT repo Generate synthetic data and write results to a json file Push json file to log directory so that during debug file is available for review Controller/ci-framework-data/tests/feature-verification-tests Do not overwrite syth data json if it already exists Using Gemini and Cursor Closes https://issues.redhat.com/browse/OSPRH-23746 Update the synthetic data generation process to ensure that existing JSON files are not overwritten. This change allows for better debugging by preserving previous data outputs. Closes https://issues.redhat.com/browse/OSPRH-23746 --- ci/run_chargeback_tests.yml | 3 +- roles/telemetry_chargeback/README.md | 2 +- .../files/gen_synth_loki_data.py | 376 ++++++++++++++++++ .../files/test_static.yml | 57 +++ roles/telemetry_chargeback/meta/main.yml | 2 +- .../tasks/chargeback_tests.yml | 2 +- .../tasks/gen_synth_loki_data.yml | 39 ++ roles/telemetry_chargeback/tasks/main.yml | 3 + .../template/loki_data_templ.j2 | 26 ++ roles/telemetry_chargeback/vars/main.yml | 9 + 10 files changed, 515 insertions(+), 4 deletions(-) create mode 100755 roles/telemetry_chargeback/files/gen_synth_loki_data.py create mode 100644 roles/telemetry_chargeback/files/test_static.yml create mode 100644 roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml create mode 100644 roles/telemetry_chargeback/template/loki_data_templ.j2 create mode 100644 roles/telemetry_chargeback/vars/main.yml diff --git a/ci/run_chargeback_tests.yml b/ci/run_chargeback_tests.yml index 8bfd9bd09..bfa3a4bbb 100644 --- a/ci/run_chargeback_tests.yml +++ b/ci/run_chargeback_tests.yml @@ -1,12 +1,13 @@ --- - name: "Verify all the applicable projects, endpoints, pods & services for cloudkitty" hosts: "{{ cifmw_target_hook_host | default('localhost') }}" - gather_facts: no + gather_facts: true ignore_errors: true environment: KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" PATH: "{{ cifmw_path }}" vars_files: + - vars/common.yml - vars/osp18_env.yml vars: common_pod_status_str: "Running" diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index f999de857..1272792e9 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -18,7 +18,7 @@ It is expected to be run **after** a successful deployment and configuration of Role Variables -------------- -The role uses a few primary variables to control the testing environment and execution. +The role uses the following variables to control the testing environment and execution. | Variable | Default Value | Description | |----------|---------------|-------------| diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py new file mode 100755 index 000000000..f05796e29 --- /dev/null +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -0,0 +1,376 @@ +"""Generate synthetic Loki log data from a Jinja2 template.""" +import logging +import argparse +import json +import yaml +from datetime import datetime, timezone, timedelta +from pathlib import Path +from typing import Dict, Any +from jinja2 import Environment + + +# --- Configure logging with a default level that can be changed --- +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%dT%H:%M:%S+00:00' +) +logger = logging.getLogger() + + +def _format_timestamp(epoch_seconds: float, invalid_timestamp: str) -> str: + """ + Convert an epoch timestamp into a human-readable UTC string. + + Args: + epoch_seconds (float): The timestamp in seconds since the epoch. + invalid_timestamp (str): String to return for invalid timestamps. + + Returns: + str: The formatted datetime string (e.g., "2023-10-26T14:30:00+00:00"). + """ + try: + dt_object = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + return dt_object.isoformat() + except (ValueError, TypeError): + logger.warning(f"Invalid epoch value provided: {epoch_seconds}") + return invalid_timestamp + + +def load_config(config_path: Path) -> Dict[str, Any]: + """ + Load configuration from YAML file. + + Args: + config_path: Path to the config file. + + Returns: + Dict containing configuration values. + + Raises: + FileNotFoundError: If config file does not exist. + ValueError: If config file cannot be parsed. + """ + if not config_path.exists(): + logger.error(f"Config file not found at {config_path}") + raise FileNotFoundError(f"Config file not found: {config_path}") + + try: + with config_path.open('r') as f: + config = yaml.safe_load(f) + logger.debug(f"Loaded config from {config_path}") + if not config: + raise ValueError(f"Config file {config_path} is empty") + return config + except yaml.YAMLError as e: + logger.error(f"Error parsing config file {config_path}: {e}") + raise ValueError(f"Error parsing config file: {e}") + + +def generate_loki_data( + template_path: Path, + output_path: Path, + start_time: datetime, + end_time: datetime, + time_step_seconds: int, + config: Dict[str, Any] +): + """ + Generate synthetic Loki log data by preparing a data list and rendering. + + Args: + template_path (Path): Path to the main log template file. + output_path (Path): Path for the generated output JSON file. + start_time (datetime): The start time for data generation. + end_time (datetime): The end time for data generation. + time_step_seconds (int): The duration of each log entry in seconds. + config (Dict[str, Any]): Configuration dictionary loaded from file. + """ + # Hardcoded constant for invalid timestamps + invalid_timestamp = "INVALID_TIMESTAMP" + + # --- Step 1: Generate the data structure first --- + logger.info( + f"Generating data from {start_time.strftime('%Y-%m-%d')} to " + f"{end_time.strftime('%Y-%m-%d')} with a {time_step_seconds}s step." + ) + start_epoch = int(start_time.timestamp()) + end_epoch = int(end_time.timestamp()) + logger.debug(f"Time range in epoch seconds: {start_epoch} to {end_epoch}") + + log_data_list = [] # This list will hold all our data points + + # Loop through the time range and generate data points + for current_epoch in range( + start_epoch, + end_epoch - time_step_seconds, + time_step_seconds + ): + end_of_step_epoch = min( + current_epoch + time_step_seconds - 1, end_epoch - 1) + + # Prepare replacement values + nanoseconds = int(current_epoch * 1_000_000_000) + start_str = _format_timestamp(current_epoch, invalid_timestamp) + end_str = _format_timestamp(end_of_step_epoch, invalid_timestamp) + + logger.debug( + f"Processing epoch: {current_epoch} -> nanoseconds: {nanoseconds}" + ) + + # Create a dictionary for this time step and add it to the list + log_data_list.append({ + "nanoseconds": nanoseconds, + "start_time": start_str, + "end_time": end_str + }) + + # Add final entry that ends at end_epoch (current time) + if log_data_list and end_epoch > start_epoch: + # Calculate start of final entry based on end of last generated entry + last_entry_end = log_data_list[-1]["end_time"] + # Parse the last entry's end time to get the epoch + last_end_dt = datetime.fromisoformat(last_entry_end) + final_start_epoch = int(last_end_dt.timestamp()) + 1 + final_nanoseconds = int(final_start_epoch * 1_000_000_000) + + # Only add if the final entry would have a valid duration + if final_start_epoch < end_epoch: + log_data_list.append({ + "nanoseconds": final_nanoseconds, + "start_time": _format_timestamp( + final_start_epoch, invalid_timestamp + ), + "end_time": _format_timestamp(end_epoch - 1, invalid_timestamp) + }) + + logger.info(f"Generated {len(log_data_list)} data points to be rendered.") + + # --- Step 2: Load log type configurations from config --- + log_types_config = config.get("log_types", []) + if not log_types_config: + logger.error("No log_types configuration found in config.") + raise ValueError("log_types section is required in config") + + if not isinstance(log_types_config, list): + logger.error("log_types must be a list in config") + raise ValueError("log_types must be a list") + + # Get required fields from config + required_fields = config.get("required_fields", []) + if not required_fields: + logger.error("No required_fields configuration found in config") + raise ValueError("required_fields section is required in config") + + # Get date field names from config + date_field_names = config.get("date_fields", []) + if not date_field_names: + logger.error("No date_fields configuration found in config") + raise ValueError("date_fields section is required in config") + + # Build log_types dictionary from config + log_types = {} + for log_type_config in log_types_config: + if not isinstance(log_type_config, dict): + logger.error(f"Invalid log type configuration: {log_type_config}") + raise ValueError("Each log type in log_types must be a dictionary") + + log_type_name = log_type_config.get("name") + if not log_type_name: + logger.error("Each log type must have a 'name' field") + raise ValueError("Each log type must have a 'name' field") + + # Validate required fields + missing = [f for f in required_fields if f not in log_type_config] + if missing: + logger.error( + f"Missing required fields in {log_type_name} config: {missing}" + ) + raise ValueError( + f"Missing required fields in {log_type_name}: {missing}" + ) + + # Build groupby from config + groupby = log_type_config.get("groupby", {}) + if not isinstance(groupby, dict): + logger.error( + f"groupby must be a dictionary for {log_type_name}" + ) + raise ValueError( + f"groupby must be a dictionary for {log_type_name}" + ) + + log_types[log_type_name] = { + "type": log_type_config["type"], + "unit": log_type_config["unit"], + "description": log_type_config.get("description"), + "qty": log_type_config["qty"], + "price": log_type_config["price"], + "groupby": groupby.copy(), + "metadata": log_type_config.get("metadata", {}) + } + + # --- Step 3: Load template and render --- + try: + logger.info(f"Loading main template from: {template_path}") + template_content = template_path.read_text() + + # Create Jinja2 environment with custom filter + def tojson_preserve_order(obj): + """Convert object to JSON string preserving dictionary order.""" + return json.dumps(obj, sort_keys=False, ensure_ascii=False) + + env = Environment(trim_blocks=True, lstrip_blocks=True) + env.filters['tojson'] = tojson_preserve_order + template = env.from_string(template_content) + + except FileNotFoundError as e: + logger.error(f"Error loading template file: {e}. Aborting.") + raise + + # --- Render the template in one pass with all the data --- + logger.info("Rendering final output...") + + # Pre-calculate log types with date fields for each time step + log_types_list = [] + for idx, item in enumerate(log_data_list): + # For the last entry, use end_time to ensure it shows today's date + if idx == len(log_data_list) - 1: + dt = end_time + else: + epoch_seconds = item["nanoseconds"] / 1_000_000_000 + dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + + iso_year, iso_week, _ = dt.isocalendar() + day_of_year = dt.timetuple().tm_yday + + # Build date fields dynamically from config + date_field_mapping = { + "week_of_the_year": str(iso_week), + "day_of_the_year": str(day_of_year), + "month": str(dt.month), + "year": str(dt.year) + } + + date_fields = {} + for field_name in date_field_names: + if field_name in date_field_mapping: + date_fields[field_name] = date_field_mapping[field_name] + else: + logger.warning( + f"Unknown date field name in config: {field_name}" + ) + + # Create log types with date fields for this time step + log_types_with_dates = {} + for log_type_name, log_type_data in log_types.items(): + log_type_with_dates = log_type_data.copy() + log_type_with_dates["groupby"] = log_type_data["groupby"].copy() + log_type_with_dates["groupby"].update(date_fields) + log_types_with_dates[log_type_name] = log_type_with_dates + + log_types_list.append(log_types_with_dates) + + # Get loki_stream configuration + loki_stream = config.get("loki_stream", {}) + if not loki_stream: + logger.warning("No loki_stream configuration found, using defaults") + loki_stream = {"service": "cloudkitty"} + + # Build template context with generic log type information + template_context = { + "log_data": log_data_list, + "log_type_names": list(log_types.keys()), + "all_log_entries": log_types_list, + "loki_stream": loki_stream + } + + final_output = template.render(**template_context) + + # --- Step 4: Write the final string to the file --- + try: + with output_path.open('w') as f_out: + f_out.write(final_output) + logger.info( + f"Successfully generated synthetic data to '{output_path}'" + ) + except IOError as e: + logger.error(f"Failed to write to output file '{output_path}': {e}") + except Exception as e: + logger.error(f"An unexpected error occurred during file write: {e}") + + +def main(): + """Main entry point for the script.""" + parser = argparse.ArgumentParser( + description="Generate synthetic Loki log data from a main template.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + # --- Required Arguments --- + parser.add_argument( + "--tmpl", + required=True, + help="Path to the main log template file." + ) + parser.add_argument( + "-t", "--test", + type=Path, + required=True, + help="Path to YAML config file (e.g., scenario.yml)." + ) + parser.add_argument( + "-o", "--output", + required=True, + help="Path to the output file." + ) + + # --- Optional Utility Arguments --- + parser.add_argument( + "--debug", + action="store_true", + help="Enable debug level logging for verbose output." + ) + args = parser.parse_args() + + if args.debug: + logger.setLevel(logging.DEBUG) + logger.debug("Debug mode enabled.") + + # Load config first to get generation parameters + try: + config = load_config(args.test) + except (FileNotFoundError, ValueError) as e: + logger.critical(f"Failed to load config: {e}") + return + + # Get generation parameters from config + generation_config = config.get("generation", {}) + days = generation_config.get("days", 30) + step_seconds = generation_config.get("step_seconds", 300) + + # Define the time range for data generation + end_time_utc = datetime.now(timezone.utc) + start_time_utc = end_time_utc - timedelta(days=days) + logger.debug(f"Time range calculated: {start_time_utc} to {end_time_utc}") + + # Run the generator + try: + generate_loki_data( + template_path=Path(args.tmpl), + output_path=Path(args.output), + start_time=start_time_utc, + end_time=end_time_utc, + time_step_seconds=step_seconds, + config=config + ) + except FileNotFoundError: + logger.error( + "Process aborted because the template file was not found." + ) + except Exception as e: + logger.critical(f"A critical, unhandled error stopped the script: {e}") + + +if __name__ == "__main__": + main() diff --git a/roles/telemetry_chargeback/files/test_static.yml b/roles/telemetry_chargeback/files/test_static.yml new file mode 100644 index 000000000..f94a3c1d2 --- /dev/null +++ b/roles/telemetry_chargeback/files/test_static.yml @@ -0,0 +1,57 @@ +# Scenario configuration for synthetic Loki log data generation + +# Time range configuration +generation: + days: 1 + step_seconds: 7200 + +# Log type definitions +log_types: + - name: ceilometer_image_size + type: ceilometer_image_size + unit: MiB + description: null + qty: 20.6 + price: 0.02 + groupby: + id: cd65d30f-8b94-4fa3-95dc-e3b429f479b2 + project_id: 0030775de80e4d84a4fd0d73e0a1b3a7 + user_id: null + metadata: + container_format: bare + disk_format: qcow2 + + - name: instance + type: instance + unit: instance + description: null + qty: 1.0 + price: 0.3 + groupby: + id: de168c31-ed44-4a1a-a079-51bd238a91d6 + project_id: 9cf5bcfc61a24682acc448af2d062ad2 + user_id: c29ab6e886354bbd88ee9899e62d1d40 + metadata: + flavor_name: m1.tiny + flavor_id: "1" + vcpus: "" + +# Required fields for validation (top-level fields only, not nested in groupby) +required_fields: + - type + - unit + - qty + - price + - groupby + - metadata + +# Date field names to add to groupby +date_fields: + - week_of_the_year + - day_of_the_year + - month + - year + +# Loki stream configuration +loki_stream: + service: cloudkitty diff --git a/roles/telemetry_chargeback/meta/main.yml b/roles/telemetry_chargeback/meta/main.yml index 8c63de8fa..ddb049119 100644 --- a/roles/telemetry_chargeback/meta/main.yml +++ b/roles/telemetry_chargeback/meta/main.yml @@ -6,7 +6,7 @@ galaxy_info: license: Apache-2.0 - min_ansible_version: "2.1" + min_ansible_version: "2.9" galaxy_tags: [] diff --git a/roles/telemetry_chargeback/tasks/chargeback_tests.yml b/roles/telemetry_chargeback/tasks/chargeback_tests.yml index cda17be17..df07fb503 100644 --- a/roles/telemetry_chargeback/tasks/chargeback_tests.yml +++ b/roles/telemetry_chargeback/tasks/chargeback_tests.yml @@ -17,7 +17,7 @@ cmd: "{{ openstack_cmd }} rating module set priority hashmap 100" register: set_hashmap_priority when: get_hashmap_priority.stdout | trim != '100' - failed_when: set_hashmap_priority.rc >= 1 or get_hashmap_priority.stdout == "" + failed_when: (set_hashmap_priority.rc | default(42)) >= 1 or get_hashmap_priority.stdout == "" changed_when: True - name: Get status of all CloudKitty rating modules diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml new file mode 100644 index 000000000..e37b54c6b --- /dev/null +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -0,0 +1,39 @@ +--- +- name: Check for preexisting output file + ansible.builtin.stat: + path: "{{ ck_output_file_local }}" + register: file_preexists + +- name: TEST Generate Synthetic Data + ansible.builtin.command: + cmd: > + python3 "{{ ck_synth_script }}" + --tmpl "{{ ck_data_template }}" + -t "{{ ck_data_config }}" + -o "{{ ck_output_file_local }}" + register: script_output + when: not file_preexists.stat.exists | bool + changed_when: script_output.rc == 0 + +- name: Read the content of the file + ansible.builtin.slurp: + src: "{{ ck_output_file_local }}" + register: slurped_file + +- name: TEST Validate JSON format of synthetic data file + ansible.builtin.assert: + that: + # This filter will trigger a task failure if the string isn't valid JSON + - slurped_file.content | b64decode | from_json is defined + fail_msg: "The file does not contain valid JSON format." + success_msg: "JSON format validated successfully." + +- name: Print output_file_remote path + ansible.builtin.debug: + msg: "Synthetic data file: {{ ck_output_file_remote }}" + +- name: Copy output file to remote host + ansible.builtin.copy: + src: "{{ ck_output_file_local }}" + dest: "{{ ck_output_file_remote }}" + mode: '0644' diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 969188b71..98a94b233 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -1,3 +1,6 @@ --- - name: "Validate Chargeback Feature" ansible.builtin.include_tasks: "chargeback_tests.yml" + +- name: "Generate Synthetic Data" + ansible.builtin.include_tasks: "gen_synth_loki_data.yml" diff --git a/roles/telemetry_chargeback/template/loki_data_templ.j2 b/roles/telemetry_chargeback/template/loki_data_templ.j2 new file mode 100644 index 000000000..b676f3013 --- /dev/null +++ b/roles/telemetry_chargeback/template/loki_data_templ.j2 @@ -0,0 +1,26 @@ +{"streams": [{ "stream": { "service": "{{ loki_stream.service }}" }, "values": [ +{%- for item in log_data %} +{%- set outer_idx = loop.index0 %} +{%- set is_last_outer = loop.last %} +{%- for log_type_name in log_type_names %} +{%- set entry_data = all_log_entries[outer_idx][log_type_name] -%} +{%- set entry = { + "start": item.start_time, + "end": item.end_time, + "type": entry_data.type, + "unit": entry_data.unit, + "description": entry_data.description, + "qty": entry_data.qty, + "price": entry_data.price, + "groupby": entry_data.groupby, + "metadata": entry_data.metadata +} -%} +[ +"{{ item.nanoseconds }}", +{{ entry | tojson | tojson }} +] +{%- if not (loop.last and is_last_outer) %}, +{% endif -%} +{%- endfor %} +{%- endfor %} +]}]} diff --git a/roles/telemetry_chargeback/vars/main.yml b/roles/telemetry_chargeback/vars/main.yml new file mode 100644 index 000000000..fbd089244 --- /dev/null +++ b/roles/telemetry_chargeback/vars/main.yml @@ -0,0 +1,9 @@ +--- +logs_dir: "/home/zuul/ci-framework-data/logs" +output_file_remote: "{{ logs_dir_zuul }}/gen_loki_synth_data.log" + +ck_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" +ck_data_template: "{{ role_path }}/template/loki_data_templ.j2" +ck_data_config: "{{ role_path }}/files/test_static.yml" +ck_output_file_local: "{{ role_path }}/files/loki_synth_data.json" +ck_output_file_remote: "{{ logs_dir_zuul }}/gen_loki_synth_data.log" From 9e8924e92be9709f0517650a635d99821eedc8e5 Mon Sep 17 00:00:00 2001 From: Muneesha Yadla Date: Tue, 18 Nov 2025 09:57:40 -0500 Subject: [PATCH 6/7] Add synthetic date generation to telemetry_chargeback role in FVT repo Generate synthetic data and write results to a json file Push json file to log directory so that during debug file is available for review Controller/ci-framework-data/tests/feature-verification-tests Do not overwrite syth data json if it already exists Using Gemini and Cursor Closes https://issues.redhat.com/browse/OSPRH-23746 Update the synthetic data generation process to ensure that existing JSON files are not overwritten. This change allows for better debugging by preserving previous data outputs. Closes https://issues.redhat.com/browse/OSPRH-23746 --- ci/run_chargeback_tests.yml | 3 +- roles/telemetry_chargeback/README.md | 38 +- .../files/gen_synth_loki_data.py | 376 ++++++++++++++++++ .../files/test_static.yml | 57 +++ roles/telemetry_chargeback/meta/main.yml | 2 +- .../tasks/chargeback_tests.yml | 2 +- .../tasks/gen_synth_loki_data.yml | 39 ++ roles/telemetry_chargeback/tasks/main.yml | 3 + .../template/loki_data_templ.j2 | 26 ++ roles/telemetry_chargeback/vars/main.yml | 9 + 10 files changed, 550 insertions(+), 5 deletions(-) create mode 100755 roles/telemetry_chargeback/files/gen_synth_loki_data.py create mode 100644 roles/telemetry_chargeback/files/test_static.yml create mode 100644 roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml create mode 100644 roles/telemetry_chargeback/template/loki_data_templ.j2 create mode 100644 roles/telemetry_chargeback/vars/main.yml diff --git a/ci/run_chargeback_tests.yml b/ci/run_chargeback_tests.yml index 8bfd9bd09..bfa3a4bbb 100644 --- a/ci/run_chargeback_tests.yml +++ b/ci/run_chargeback_tests.yml @@ -1,12 +1,13 @@ --- - name: "Verify all the applicable projects, endpoints, pods & services for cloudkitty" hosts: "{{ cifmw_target_hook_host | default('localhost') }}" - gather_facts: no + gather_facts: true ignore_errors: true environment: KUBECONFIG: "{{ cifmw_openshift_kubeconfig }}" PATH: "{{ cifmw_path }}" vars_files: + - vars/common.yml - vars/osp18_env.yml vars: common_pod_status_str: "Running" diff --git a/roles/telemetry_chargeback/README.md b/roles/telemetry_chargeback/README.md index f999de857..192b72a3d 100644 --- a/roles/telemetry_chargeback/README.md +++ b/roles/telemetry_chargeback/README.md @@ -2,6 +2,11 @@ telemetry_chargeback ========= The **`telemetry_chargeback`** role is designed to test the **RHOSO Cloudkitty** feature. These tests are specific to the Cloudkitty feature. Tests that are not specific to this feature (e.g., standard OpenStack deployment validation, basic networking) should be added to a common role. +The role performs two main functions: + +1. **CloudKitty Validation** - Enables and configures the CloudKitty hashmap rating module, then validates its state. +2. **Synthetic Data Generation** - Generates synthetic Loki log data for testing chargeback scenarios using a Python script and Jinja2 template. + Requirements ------------ It relies on the following being available on the target or control host: @@ -10,6 +15,9 @@ It relies on the following being available on the target or control host: * The **OpenStack CLI client** must be installed and configured with administrative credentials. * Required Python libraries for the `openstack` CLI (e.g., `python3-openstackclient`). * Connectivity to the OpenStack API endpoint. +* **Python 3** with the following libraries for synthetic data generation: + * `PyYAML` + * `Jinja2` It is expected to be run **after** a successful deployment and configuration of the following components: @@ -18,12 +26,38 @@ It is expected to be run **after** a successful deployment and configuration of Role Variables -------------- -The role uses a few primary variables to control the testing environment and execution. +The role uses the following variables to control the testing environment and execution. + +### User-Configurable Variables (defaults/main.yml) | Variable | Default Value | Description | |----------|---------------|-------------| | `openstack_cmd` | `openstack` | The command used to execute OpenStack CLI calls. This can be customized if the binary is not in the standard PATH. | +### Internal Variables (vars/main.yml) + +These variables are used internally by the role and typically do not need to be modified. + +| Variable | Default Value | Description | +|----------|---------------|-------------| +| `logs_dir_zuul` | `/home/zuul/ci-framework-data/logs` | Remote directory for log files. | +| `artifacts_dir_zuul` | `/home/zuul/ci-framework-data/artifacts` | Directory for generated artifacts. | +| `ck_synth_script` | `{{ role_path }}/files/gen_synth_loki_data.py` | Path to the synthetic data generation script. | +| `ck_data_template` | `{{ role_path }}/template/loki_data_templ.j2` | Path to the Jinja2 template for Loki data format. | +| `ck_data_config` | `{{ role_path }}/files/test_static.yml` | Path to the scenario configuration file. | +| `ck_output_file_local` | `{{ artifacts_dir_zuul }}/loki_synth_data.json` | Local path for generated synthetic data. | +| `ck_output_file_remote` | `{{ logs_dir_zuul }}/gen_loki_synth_data.log` | Remote destination for synthetic data. | + +Scenario Configuration +---------------------- +The synthetic data generation is controlled by a YAML configuration file (`files/test_static.yml`). This file defines: + +* **generation** - Time range configuration (days, step_seconds) +* **log_types** - List of log type definitions with name, type, unit, qty, price, groupby, and metadata +* **required_fields** - Fields required for validation +* **date_fields** - Date fields to add to groupby (week_of_the_year, day_of_the_year, month, year) +* **loki_stream** - Loki stream configuration (service name) + Dependencies ------------ This role has no direct hard dependencies on other Ansible roles. @@ -36,7 +70,7 @@ Example Playbook gather_facts: no tasks: - - name: "Run chargeback specific tests" + - name: "Run chargeback specific tests" ansible.builtin.import_role: name: telemetry_chargeback ``` diff --git a/roles/telemetry_chargeback/files/gen_synth_loki_data.py b/roles/telemetry_chargeback/files/gen_synth_loki_data.py new file mode 100755 index 000000000..f05796e29 --- /dev/null +++ b/roles/telemetry_chargeback/files/gen_synth_loki_data.py @@ -0,0 +1,376 @@ +"""Generate synthetic Loki log data from a Jinja2 template.""" +import logging +import argparse +import json +import yaml +from datetime import datetime, timezone, timedelta +from pathlib import Path +from typing import Dict, Any +from jinja2 import Environment + + +# --- Configure logging with a default level that can be changed --- +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%dT%H:%M:%S+00:00' +) +logger = logging.getLogger() + + +def _format_timestamp(epoch_seconds: float, invalid_timestamp: str) -> str: + """ + Convert an epoch timestamp into a human-readable UTC string. + + Args: + epoch_seconds (float): The timestamp in seconds since the epoch. + invalid_timestamp (str): String to return for invalid timestamps. + + Returns: + str: The formatted datetime string (e.g., "2023-10-26T14:30:00+00:00"). + """ + try: + dt_object = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + return dt_object.isoformat() + except (ValueError, TypeError): + logger.warning(f"Invalid epoch value provided: {epoch_seconds}") + return invalid_timestamp + + +def load_config(config_path: Path) -> Dict[str, Any]: + """ + Load configuration from YAML file. + + Args: + config_path: Path to the config file. + + Returns: + Dict containing configuration values. + + Raises: + FileNotFoundError: If config file does not exist. + ValueError: If config file cannot be parsed. + """ + if not config_path.exists(): + logger.error(f"Config file not found at {config_path}") + raise FileNotFoundError(f"Config file not found: {config_path}") + + try: + with config_path.open('r') as f: + config = yaml.safe_load(f) + logger.debug(f"Loaded config from {config_path}") + if not config: + raise ValueError(f"Config file {config_path} is empty") + return config + except yaml.YAMLError as e: + logger.error(f"Error parsing config file {config_path}: {e}") + raise ValueError(f"Error parsing config file: {e}") + + +def generate_loki_data( + template_path: Path, + output_path: Path, + start_time: datetime, + end_time: datetime, + time_step_seconds: int, + config: Dict[str, Any] +): + """ + Generate synthetic Loki log data by preparing a data list and rendering. + + Args: + template_path (Path): Path to the main log template file. + output_path (Path): Path for the generated output JSON file. + start_time (datetime): The start time for data generation. + end_time (datetime): The end time for data generation. + time_step_seconds (int): The duration of each log entry in seconds. + config (Dict[str, Any]): Configuration dictionary loaded from file. + """ + # Hardcoded constant for invalid timestamps + invalid_timestamp = "INVALID_TIMESTAMP" + + # --- Step 1: Generate the data structure first --- + logger.info( + f"Generating data from {start_time.strftime('%Y-%m-%d')} to " + f"{end_time.strftime('%Y-%m-%d')} with a {time_step_seconds}s step." + ) + start_epoch = int(start_time.timestamp()) + end_epoch = int(end_time.timestamp()) + logger.debug(f"Time range in epoch seconds: {start_epoch} to {end_epoch}") + + log_data_list = [] # This list will hold all our data points + + # Loop through the time range and generate data points + for current_epoch in range( + start_epoch, + end_epoch - time_step_seconds, + time_step_seconds + ): + end_of_step_epoch = min( + current_epoch + time_step_seconds - 1, end_epoch - 1) + + # Prepare replacement values + nanoseconds = int(current_epoch * 1_000_000_000) + start_str = _format_timestamp(current_epoch, invalid_timestamp) + end_str = _format_timestamp(end_of_step_epoch, invalid_timestamp) + + logger.debug( + f"Processing epoch: {current_epoch} -> nanoseconds: {nanoseconds}" + ) + + # Create a dictionary for this time step and add it to the list + log_data_list.append({ + "nanoseconds": nanoseconds, + "start_time": start_str, + "end_time": end_str + }) + + # Add final entry that ends at end_epoch (current time) + if log_data_list and end_epoch > start_epoch: + # Calculate start of final entry based on end of last generated entry + last_entry_end = log_data_list[-1]["end_time"] + # Parse the last entry's end time to get the epoch + last_end_dt = datetime.fromisoformat(last_entry_end) + final_start_epoch = int(last_end_dt.timestamp()) + 1 + final_nanoseconds = int(final_start_epoch * 1_000_000_000) + + # Only add if the final entry would have a valid duration + if final_start_epoch < end_epoch: + log_data_list.append({ + "nanoseconds": final_nanoseconds, + "start_time": _format_timestamp( + final_start_epoch, invalid_timestamp + ), + "end_time": _format_timestamp(end_epoch - 1, invalid_timestamp) + }) + + logger.info(f"Generated {len(log_data_list)} data points to be rendered.") + + # --- Step 2: Load log type configurations from config --- + log_types_config = config.get("log_types", []) + if not log_types_config: + logger.error("No log_types configuration found in config.") + raise ValueError("log_types section is required in config") + + if not isinstance(log_types_config, list): + logger.error("log_types must be a list in config") + raise ValueError("log_types must be a list") + + # Get required fields from config + required_fields = config.get("required_fields", []) + if not required_fields: + logger.error("No required_fields configuration found in config") + raise ValueError("required_fields section is required in config") + + # Get date field names from config + date_field_names = config.get("date_fields", []) + if not date_field_names: + logger.error("No date_fields configuration found in config") + raise ValueError("date_fields section is required in config") + + # Build log_types dictionary from config + log_types = {} + for log_type_config in log_types_config: + if not isinstance(log_type_config, dict): + logger.error(f"Invalid log type configuration: {log_type_config}") + raise ValueError("Each log type in log_types must be a dictionary") + + log_type_name = log_type_config.get("name") + if not log_type_name: + logger.error("Each log type must have a 'name' field") + raise ValueError("Each log type must have a 'name' field") + + # Validate required fields + missing = [f for f in required_fields if f not in log_type_config] + if missing: + logger.error( + f"Missing required fields in {log_type_name} config: {missing}" + ) + raise ValueError( + f"Missing required fields in {log_type_name}: {missing}" + ) + + # Build groupby from config + groupby = log_type_config.get("groupby", {}) + if not isinstance(groupby, dict): + logger.error( + f"groupby must be a dictionary for {log_type_name}" + ) + raise ValueError( + f"groupby must be a dictionary for {log_type_name}" + ) + + log_types[log_type_name] = { + "type": log_type_config["type"], + "unit": log_type_config["unit"], + "description": log_type_config.get("description"), + "qty": log_type_config["qty"], + "price": log_type_config["price"], + "groupby": groupby.copy(), + "metadata": log_type_config.get("metadata", {}) + } + + # --- Step 3: Load template and render --- + try: + logger.info(f"Loading main template from: {template_path}") + template_content = template_path.read_text() + + # Create Jinja2 environment with custom filter + def tojson_preserve_order(obj): + """Convert object to JSON string preserving dictionary order.""" + return json.dumps(obj, sort_keys=False, ensure_ascii=False) + + env = Environment(trim_blocks=True, lstrip_blocks=True) + env.filters['tojson'] = tojson_preserve_order + template = env.from_string(template_content) + + except FileNotFoundError as e: + logger.error(f"Error loading template file: {e}. Aborting.") + raise + + # --- Render the template in one pass with all the data --- + logger.info("Rendering final output...") + + # Pre-calculate log types with date fields for each time step + log_types_list = [] + for idx, item in enumerate(log_data_list): + # For the last entry, use end_time to ensure it shows today's date + if idx == len(log_data_list) - 1: + dt = end_time + else: + epoch_seconds = item["nanoseconds"] / 1_000_000_000 + dt = datetime.fromtimestamp(epoch_seconds, tz=timezone.utc) + + iso_year, iso_week, _ = dt.isocalendar() + day_of_year = dt.timetuple().tm_yday + + # Build date fields dynamically from config + date_field_mapping = { + "week_of_the_year": str(iso_week), + "day_of_the_year": str(day_of_year), + "month": str(dt.month), + "year": str(dt.year) + } + + date_fields = {} + for field_name in date_field_names: + if field_name in date_field_mapping: + date_fields[field_name] = date_field_mapping[field_name] + else: + logger.warning( + f"Unknown date field name in config: {field_name}" + ) + + # Create log types with date fields for this time step + log_types_with_dates = {} + for log_type_name, log_type_data in log_types.items(): + log_type_with_dates = log_type_data.copy() + log_type_with_dates["groupby"] = log_type_data["groupby"].copy() + log_type_with_dates["groupby"].update(date_fields) + log_types_with_dates[log_type_name] = log_type_with_dates + + log_types_list.append(log_types_with_dates) + + # Get loki_stream configuration + loki_stream = config.get("loki_stream", {}) + if not loki_stream: + logger.warning("No loki_stream configuration found, using defaults") + loki_stream = {"service": "cloudkitty"} + + # Build template context with generic log type information + template_context = { + "log_data": log_data_list, + "log_type_names": list(log_types.keys()), + "all_log_entries": log_types_list, + "loki_stream": loki_stream + } + + final_output = template.render(**template_context) + + # --- Step 4: Write the final string to the file --- + try: + with output_path.open('w') as f_out: + f_out.write(final_output) + logger.info( + f"Successfully generated synthetic data to '{output_path}'" + ) + except IOError as e: + logger.error(f"Failed to write to output file '{output_path}': {e}") + except Exception as e: + logger.error(f"An unexpected error occurred during file write: {e}") + + +def main(): + """Main entry point for the script.""" + parser = argparse.ArgumentParser( + description="Generate synthetic Loki log data from a main template.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + + # --- Required Arguments --- + parser.add_argument( + "--tmpl", + required=True, + help="Path to the main log template file." + ) + parser.add_argument( + "-t", "--test", + type=Path, + required=True, + help="Path to YAML config file (e.g., scenario.yml)." + ) + parser.add_argument( + "-o", "--output", + required=True, + help="Path to the output file." + ) + + # --- Optional Utility Arguments --- + parser.add_argument( + "--debug", + action="store_true", + help="Enable debug level logging for verbose output." + ) + args = parser.parse_args() + + if args.debug: + logger.setLevel(logging.DEBUG) + logger.debug("Debug mode enabled.") + + # Load config first to get generation parameters + try: + config = load_config(args.test) + except (FileNotFoundError, ValueError) as e: + logger.critical(f"Failed to load config: {e}") + return + + # Get generation parameters from config + generation_config = config.get("generation", {}) + days = generation_config.get("days", 30) + step_seconds = generation_config.get("step_seconds", 300) + + # Define the time range for data generation + end_time_utc = datetime.now(timezone.utc) + start_time_utc = end_time_utc - timedelta(days=days) + logger.debug(f"Time range calculated: {start_time_utc} to {end_time_utc}") + + # Run the generator + try: + generate_loki_data( + template_path=Path(args.tmpl), + output_path=Path(args.output), + start_time=start_time_utc, + end_time=end_time_utc, + time_step_seconds=step_seconds, + config=config + ) + except FileNotFoundError: + logger.error( + "Process aborted because the template file was not found." + ) + except Exception as e: + logger.critical(f"A critical, unhandled error stopped the script: {e}") + + +if __name__ == "__main__": + main() diff --git a/roles/telemetry_chargeback/files/test_static.yml b/roles/telemetry_chargeback/files/test_static.yml new file mode 100644 index 000000000..f94a3c1d2 --- /dev/null +++ b/roles/telemetry_chargeback/files/test_static.yml @@ -0,0 +1,57 @@ +# Scenario configuration for synthetic Loki log data generation + +# Time range configuration +generation: + days: 1 + step_seconds: 7200 + +# Log type definitions +log_types: + - name: ceilometer_image_size + type: ceilometer_image_size + unit: MiB + description: null + qty: 20.6 + price: 0.02 + groupby: + id: cd65d30f-8b94-4fa3-95dc-e3b429f479b2 + project_id: 0030775de80e4d84a4fd0d73e0a1b3a7 + user_id: null + metadata: + container_format: bare + disk_format: qcow2 + + - name: instance + type: instance + unit: instance + description: null + qty: 1.0 + price: 0.3 + groupby: + id: de168c31-ed44-4a1a-a079-51bd238a91d6 + project_id: 9cf5bcfc61a24682acc448af2d062ad2 + user_id: c29ab6e886354bbd88ee9899e62d1d40 + metadata: + flavor_name: m1.tiny + flavor_id: "1" + vcpus: "" + +# Required fields for validation (top-level fields only, not nested in groupby) +required_fields: + - type + - unit + - qty + - price + - groupby + - metadata + +# Date field names to add to groupby +date_fields: + - week_of_the_year + - day_of_the_year + - month + - year + +# Loki stream configuration +loki_stream: + service: cloudkitty diff --git a/roles/telemetry_chargeback/meta/main.yml b/roles/telemetry_chargeback/meta/main.yml index 8c63de8fa..ddb049119 100644 --- a/roles/telemetry_chargeback/meta/main.yml +++ b/roles/telemetry_chargeback/meta/main.yml @@ -6,7 +6,7 @@ galaxy_info: license: Apache-2.0 - min_ansible_version: "2.1" + min_ansible_version: "2.9" galaxy_tags: [] diff --git a/roles/telemetry_chargeback/tasks/chargeback_tests.yml b/roles/telemetry_chargeback/tasks/chargeback_tests.yml index cda17be17..df07fb503 100644 --- a/roles/telemetry_chargeback/tasks/chargeback_tests.yml +++ b/roles/telemetry_chargeback/tasks/chargeback_tests.yml @@ -17,7 +17,7 @@ cmd: "{{ openstack_cmd }} rating module set priority hashmap 100" register: set_hashmap_priority when: get_hashmap_priority.stdout | trim != '100' - failed_when: set_hashmap_priority.rc >= 1 or get_hashmap_priority.stdout == "" + failed_when: (set_hashmap_priority.rc | default(42)) >= 1 or get_hashmap_priority.stdout == "" changed_when: True - name: Get status of all CloudKitty rating modules diff --git a/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml new file mode 100644 index 000000000..e37b54c6b --- /dev/null +++ b/roles/telemetry_chargeback/tasks/gen_synth_loki_data.yml @@ -0,0 +1,39 @@ +--- +- name: Check for preexisting output file + ansible.builtin.stat: + path: "{{ ck_output_file_local }}" + register: file_preexists + +- name: TEST Generate Synthetic Data + ansible.builtin.command: + cmd: > + python3 "{{ ck_synth_script }}" + --tmpl "{{ ck_data_template }}" + -t "{{ ck_data_config }}" + -o "{{ ck_output_file_local }}" + register: script_output + when: not file_preexists.stat.exists | bool + changed_when: script_output.rc == 0 + +- name: Read the content of the file + ansible.builtin.slurp: + src: "{{ ck_output_file_local }}" + register: slurped_file + +- name: TEST Validate JSON format of synthetic data file + ansible.builtin.assert: + that: + # This filter will trigger a task failure if the string isn't valid JSON + - slurped_file.content | b64decode | from_json is defined + fail_msg: "The file does not contain valid JSON format." + success_msg: "JSON format validated successfully." + +- name: Print output_file_remote path + ansible.builtin.debug: + msg: "Synthetic data file: {{ ck_output_file_remote }}" + +- name: Copy output file to remote host + ansible.builtin.copy: + src: "{{ ck_output_file_local }}" + dest: "{{ ck_output_file_remote }}" + mode: '0644' diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 969188b71..98a94b233 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -1,3 +1,6 @@ --- - name: "Validate Chargeback Feature" ansible.builtin.include_tasks: "chargeback_tests.yml" + +- name: "Generate Synthetic Data" + ansible.builtin.include_tasks: "gen_synth_loki_data.yml" diff --git a/roles/telemetry_chargeback/template/loki_data_templ.j2 b/roles/telemetry_chargeback/template/loki_data_templ.j2 new file mode 100644 index 000000000..b676f3013 --- /dev/null +++ b/roles/telemetry_chargeback/template/loki_data_templ.j2 @@ -0,0 +1,26 @@ +{"streams": [{ "stream": { "service": "{{ loki_stream.service }}" }, "values": [ +{%- for item in log_data %} +{%- set outer_idx = loop.index0 %} +{%- set is_last_outer = loop.last %} +{%- for log_type_name in log_type_names %} +{%- set entry_data = all_log_entries[outer_idx][log_type_name] -%} +{%- set entry = { + "start": item.start_time, + "end": item.end_time, + "type": entry_data.type, + "unit": entry_data.unit, + "description": entry_data.description, + "qty": entry_data.qty, + "price": entry_data.price, + "groupby": entry_data.groupby, + "metadata": entry_data.metadata +} -%} +[ +"{{ item.nanoseconds }}", +{{ entry | tojson | tojson }} +] +{%- if not (loop.last and is_last_outer) %}, +{% endif -%} +{%- endfor %} +{%- endfor %} +]}]} diff --git a/roles/telemetry_chargeback/vars/main.yml b/roles/telemetry_chargeback/vars/main.yml new file mode 100644 index 000000000..1014a6a9e --- /dev/null +++ b/roles/telemetry_chargeback/vars/main.yml @@ -0,0 +1,9 @@ +--- +logs_dir_zuul: "/home/zuul/ci-framework-data/logs" +artifacts_dir_zuul: "/home/zuul/ci-framework-data/artifacts" + +ck_synth_script: "{{ role_path }}/files/gen_synth_loki_data.py" +ck_data_template: "{{ role_path }}/template/loki_data_templ.j2" +ck_data_config: "{{ role_path }}/files/test_static.yml" +ck_output_file_local: "{{ artifacts_dir_zuul }}/loki_synth_data.json" +ck_output_file_remote: "{{ logs_dir_zuul }}/gen_loki_synth_data.log" From 68751caf74149aca9ae9c77a9b5a96f1339e3289 Mon Sep 17 00:00:00 2001 From: Muneesha Yadla Date: Mon, 2 Feb 2026 15:34:59 -0500 Subject: [PATCH 7/7] Added 2 playbooks to push and retreive ck data to/from loki --- roles/telemetry_chargeback/tasks/main.yml | 3 + .../tasks/retreive_loki_data.yaml | 80 +++++++++++++++++++ roles/telemetry_chargeback/vars/main.yml | 1 + 3 files changed, 84 insertions(+) create mode 100644 roles/telemetry_chargeback/tasks/retreive_loki_data.yaml diff --git a/roles/telemetry_chargeback/tasks/main.yml b/roles/telemetry_chargeback/tasks/main.yml index 301b1aecb..f7501a407 100644 --- a/roles/telemetry_chargeback/tasks/main.yml +++ b/roles/telemetry_chargeback/tasks/main.yml @@ -7,3 +7,6 @@ - name: "Ingest Data log to loki" ansible.builtin.include_tasks: "ingest_loki_data.yml" + +- name: "Retreive Data log from loki" + ansible.builtin.include_tasks: "retreive_loki_data.yml" diff --git a/roles/telemetry_chargeback/tasks/retreive_loki_data.yaml b/roles/telemetry_chargeback/tasks/retreive_loki_data.yaml new file mode 100644 index 000000000..6cb714c09 --- /dev/null +++ b/roles/telemetry_chargeback/tasks/retreive_loki_data.yaml @@ -0,0 +1,80 @@ +--- +- name: Automate Loki Log Retrieval + hosts: localhost + connection: local + gather_facts: yes + + vars: + # LogQL Query + logql_query: "{{ loki_query | default('{test=\"chargeback\"}') }}" + + # Time window settings + lookback_days: "{{ lookback | default(6) }}" + limit_logs: "{{ limit | default(50) }}" + + tasks: + # Calculate Time + - name: Calculate Start Time in nanoseconds + ansible.builtin.command: date -d "{{ lookback_days }} days ago" +%s000000000 + register: nano_time + changed_when: false + + - name: Set Start Time + ansible.builtin.set_fact: + start_time: "{{ nano_time.stdout }}" + + - name: Display Query Parameters + ansible.builtin.debug: + msg: + - "Query: {{ logql_query }}" + - "Start Time: {{ start_time }}" + - "Limit: {{ limit_logs }}" + + # Query Loki + - name: Retrieve Logs from Loki via API + block: + - name: Query Loki API + ansible.builtin.uri: + url: "{{ loki_query_url }}?query={{ logql_query | urlencode }}&start={{ start_time }}&limit={{ limit_logs }}" + method: GET + client_cert: "{{ cert_dir }}/tls.crt" + client_key: "{{ cert_dir }}/tls.key" + ca_path: "{{ cert_dir }}/ca.crt" + validate_certs: false + status_code: 200 + return_content: yes + body_format: json + register: loki_response + + # Save data + - name: Save Loki Data to JSON file + ansible.builtin.copy: + content: "{{ loki_response.json | to_nice_json }}" + dest: "{{ ck_loki_retreive_file }}" + + # Validate + - name: Verify Data Integrity + ansible.builtin.assert: + that: + - loki_response.json.status == 'success' + - loki_response.json.data.result | length > 0 + fail_msg: "Query returned success but found ZERO logs. Check ingestion pipeline." + success_msg: "SUCCESS: Found {{ loki_response.json.data.result | length }} log streams." + + # Success + - name: Confirm Success + ansible.builtin.debug: + msg: "Retrieval Successful! Data saved to {{ ck_loki_retreive_file }}" + + rescue: + - name: Debug failure + ansible.builtin.debug: + msg: + - "Status: {{ loki_response.status | default('Unknown') }}" + - "Body: {{ loki_response.content | default('No Content') }}" + - "Msg: {{ loki_response.msg | default('Request failed') }}" + + # Failure + - name: Report Retrieval Failure + ansible.builtin.fail: + msg: "Retrieval Failed! See debug output for details." \ No newline at end of file diff --git a/roles/telemetry_chargeback/vars/main.yml b/roles/telemetry_chargeback/vars/main.yml index 1014a6a9e..2a06a2ee7 100644 --- a/roles/telemetry_chargeback/vars/main.yml +++ b/roles/telemetry_chargeback/vars/main.yml @@ -7,3 +7,4 @@ ck_data_template: "{{ role_path }}/template/loki_data_templ.j2" ck_data_config: "{{ role_path }}/files/test_static.yml" ck_output_file_local: "{{ artifacts_dir_zuul }}/loki_synth_data.json" ck_output_file_remote: "{{ logs_dir_zuul }}/gen_loki_synth_data.log" +ck_loki_retreive_file: "{{ logs_dir_zuul }}/reteive_loki_op.json"