diff --git a/roles/setup/defaults/main.yml b/roles/setup/defaults/main.yml index 1e716e7..b7a6098 100644 --- a/roles/setup/defaults/main.yml +++ b/roles/setup/defaults/main.yml @@ -5,4 +5,27 @@ current_year: '{{ ansible_date_time.year | int }}' setup_template_path: '/tmp/cost-mgmt-operator-collect' setup_template_dir: "{{ lookup('password', '/dev/null chars=ascii_letters') }}" setup_delete_after: 'true' -upload_cycle_seconds: 21600 \ No newline at end of file +upload_cycle_seconds: 21600 +collect_format: 'csv' +collect_manifest_uuid: '{{ 99999999999999999999 | random | to_uuid }}' +collect_archive_name: cost-mgmt +ocp_validate_cert: 'true' +ocp_cluster_id: '' +reporting_operator_token_name: '' +collect_reports: + - 'cm-openshift-usage-lookback-' + - 'cm-openshift-persistentvolumeclaim-lookback-' + - 'cm-openshift-node-labels-lookback-' +collect_download_path: '/tmp/cost-mgmt-operator-collect' +collect_delete_after: 'true' +collect_ocp_report_timeout: 60 +collect_max_csvfile_size: 99 +api_prefix: 'https://' +ingress_url: 'https://cloud.redhat.com/api/ingress/v1/upload' +authentication: 'token' +authentication_token: '' +username: '' +password: '' +cacert_path: '{{ collect_download_path }}/ca-bundle.crt' +debug: 'true' +collect_upload_wait: '{{ 2100 | random(step=10) }}' diff --git a/roles/setup/files/package_report.py b/roles/setup/files/package_report.py new file mode 100755 index 0000000..6ec0d88 --- /dev/null +++ b/roles/setup/files/package_report.py @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Red Hat, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +"""Create a tarball for metering reports downloaded from an OpenShift cluster.""" + +import argparse +import csv +import logging +import json +import os +import sys +import tarfile +from datetime import datetime +from uuid import uuid4 + +DEFAULT_MAX_SIZE = 100 +MEGABYTE = 1024 * 1024 + +TEMPLATE = { + "files": None, + "date": datetime.utcnow().isoformat(), + "uuid": None, + "cluster_id": None +} + + +# the csv module doesn't expose the bytes-offset of the +# underlying file object. +# +# instead, the script estimates the size of the data as VARIANCE percent larger than a +# naïve string concatenation of the CSV fields to cover the overhead of quoting +# and delimiters. This gets close enough for now. +VARIANCE = 0.03 + +# Flag to use when writing to a file. Changed to "w" by the -o flag. +FILE_FLAG = "x" + +# if we're creating more than 1k files, something is probably wrong. +MAX_SPLITS = 1000 + +# logging +LOG = logging.getLogger(__name__) +LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s" +LOG_VERBOSITY = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG] +logging.basicConfig(format=LOG_FORMAT, level=logging.ERROR, stream=sys.stdout) + + +def parse_args(): + """Handle CLI arg parsing.""" + parser = argparse.ArgumentParser( + description="Cost Management CSV file packaging script", prog=sys.argv[0]) + + # required args + parser.add_argument("-f", "--filepath", required=True, + help="path to files to package") + parser.add_argument( + "-s", + "--max-size", + type=int, + default=DEFAULT_MAX_SIZE, + help=f"Maximum size of packages in MiB. (Default: {DEFAULT_MAX_SIZE} MiB)", + ) + parser.add_argument( + "-o", "--overwrite", action="store_true", default=False, help="whether to overwrite existing files." + ) + parser.add_argument("--ocp-cluster-id", required=True, + help="OCP Cluster ID") + parser.add_argument("-v", "--verbosity", action="count", + default=0, help="increase verbosity (up to -vvv)") + return parser.parse_args() + + +def write_part(filename, csvreader, header, num=0, size=(DEFAULT_MAX_SIZE * MEGABYTE)): + """Split a part of the file into a new file. + + Args: + filename (str) name of original file + csvreader (CSVReader) the csvreader object of original file + header (list) the CSV file's header list + num (int) the current split file index + size (int) the maximum size of the split file in bytes + + Returns: + (str) the name of the new split file + (bool) whether the split reached the end of the csvreader + + """ + fname_part, ext = os.path.splitext(filename) + size_estimate = 0 + split_filename = f"{fname_part}_{num}{ext}" + try: + with open(split_filename, FILE_FLAG) as split_part: + LOG.info(f"Writing new file: {split_filename}") + csvwriter = csv.writer(split_part) + csvwriter.writerow(header) + for row in csvreader: + csvwriter.writerow(row) + + row_len = len(",".join(row)) + size_estimate += row_len + (row_len * VARIANCE) + + LOG.debug(f"file size (est): {size_estimate}") + if size_estimate >= size: + return (split_filename, False) + except (IOError, FileExistsError) as exc: + LOG.critical(f"Fatal error: {exc}") + sys.exit(2) + return (split_filename, True) + + +def need_split(filepath, max_size): + """Determine whether to split up the CSV files. + + Args: + filepath (str) a directory + max_size (int) maximum split size in MiB + + Returns: + True if any single file OR the total sum of files exceeds the MAX_SIZE + False if each single file AND the total file size is below MAX_SIZE + + """ + total_size = 0 + max_bytes = max_size * MEGABYTE + for filename in os.listdir(filepath): + this_size = os.stat(f"{filepath}/{filename}").st_size + total_size += this_size + if this_size >= max_bytes or total_size >= max_bytes: + return True + return False + + +def split_files(filepath, max_size): + """Split any files that exceed the file size threshold. + + Args: + filepath (str) file path containing the CSV files + max_size (int) the maximum size in MiB for each file + + """ + for filename in os.listdir(filepath): + abspath = f"{filepath}/{filename}" + if os.stat(abspath).st_size >= max_size * MEGABYTE: + csvheader = None + split_files = [] + with open(abspath, "r") as fhandle: + csvreader = csv.reader(fhandle) + csvheader = next(csvreader) + LOG.debug(f"Header: {csvheader}") + + part = 1 + while True: + newfile, eof = write_part( + abspath, csvreader, csvheader, num=part, size=(max_size * MEGABYTE)) + split_files.append(newfile) + part += 1 + if eof or part >= MAX_SPLITS: + break + + os.remove(abspath) + + # return the list of split files to stdout + LOG.info(f"Split files: {split_files}") + + +def render_manifest(args, archivefiles=[]): + """Render the manifest template and write it to a file. + + Args: + args (Namespace) an ArgumentParser Namespace object + + Returns: + (str) the rendered manifest file name + (str) the manifest uuid + """ + manifest = TEMPLATE + manifest_uuid = str(uuid4()) + manifest["cluster_id"] = args.ocp_cluster_id + manifest["uuid"] = manifest_uuid + manifest_files = [] + for idx in range(len(archivefiles)): + upload_name = f"{manifest_uuid}_openshift_usage_report.{idx}.csv" + manifest_files.append(upload_name) + manifest["files"] = manifest_files + LOG.debug(f"rendered manifest: {manifest}") + manifest_filename = f"{args.filepath}/manifest.json" + + if not os.path.exists(args.filepath): + os.makedirs(args.filepath) + LOG.info(f"Created dirs: {args.filepath}") + + try: + with open(manifest_filename, FILE_FLAG) as mfile: + json.dump(manifest, mfile) + except FileExistsError as exc: + LOG.critical(f"Fatal error: {exc}") + sys.exit(2) + LOG.info(f"manifest generated") + return (manifest_filename, manifest_uuid) + + +def write_tarball(args, tarfilename, manifest_filename, manifest_uuid, archivefiles, file_count=0): + """Write a tarball, adding the given files to the archive. + + Args: + args (Namespace) an ArgumentParser Namespace object + tarfilename (str) the name of the tarball to create + manifest_filename (str) the name of the report manifest + manifest_uuid (str) the unique identifier of the manifest + archivefiles (list) the list of files to include in the archive + file_count (int) file number initializer + + Returns: + (str) full filepath of the created tarball + + Raises: + FileExistsError if tarfilename already exists + """ + if not archivefiles: + return None + + try: + with tarfile.open(tarfilename, f"{FILE_FLAG}:gz") as tarball: + for fname in archivefiles: + LOG.debug(f"Adding {fname} to {tarfilename}: ") + if fname.endswith(".csv"): + upload_name = f"{manifest_uuid}_openshift_usage_report.{file_count}.csv" + tarball.add(fname, arcname=upload_name) + file_count += 1 + tarball.add(manifest_filename, arcname="manifest.json") + except FileExistsError as exc: + LOG.critical(exc) + sys.exit(2) + LOG.info(f"Wrote: {tarfilename}") + return f"{tarfilename}", file_count + + +def build_local_csv_file_list(staging_directory): + """Build a list of all report csv files in staging directory.""" + file_list = [] + for csv_file in os.listdir(staging_directory): + if ".csv" in csv_file: + file_list.append(f"{staging_directory}/{csv_file}") + return file_list + +if "__main__" in __name__: + args = parse_args() + if args.verbosity: + LOG.setLevel(LOG_VERBOSITY[args.verbosity]) + LOG.debug("CLI Args: %s", args) + + if args.overwrite: + FILE_FLAG = "w" + + out_files = [] + need_split = need_split(args.filepath, args.max_size) + if need_split: + split_files(args.filepath, args.max_size) + tarpath = args.filepath + "/../" + tarfiletmpl = "cost-mgmt{}.tar.gz" + + file_list = build_local_csv_file_list(args.filepath) + manifest_filename, manifest_uuid = render_manifest(args, file_list) + file_count = 0 + for idx, filename in enumerate(file_list): + if ".csv" in filename: + tarfilename = os.path.abspath( + tarpath + tarfiletmpl.format(idx)) + output_tar, file_count = write_tarball(args, + tarfilename, manifest_filename, manifest_uuid, [filename], file_count) + if output_tar: + out_files.append(output_tar) + + else: + tarfilename = os.path.abspath(args.filepath + "/../cost-mgmt.tar.gz") + + file_list = build_local_csv_file_list(args.filepath) + if file_list: + manifest_filename, manifest_uuid = render_manifest(args, file_list) + output_tar, _ = write_tarball(args, tarfilename, manifest_filename, manifest_uuid, file_list) + if output_tar: + out_files.append(output_tar) + + for fname in out_files: + print(fname) diff --git a/roles/setup/files/trusted_ca_certmap.yaml b/roles/setup/files/trusted_ca_certmap.yaml new file mode 100644 index 0000000..233b3b0 --- /dev/null +++ b/roles/setup/files/trusted_ca_certmap.yaml @@ -0,0 +1,11 @@ +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: openshift-metering + name: trusted-ca-bundle + annotations: + release.openshift.io/create-only: "true" + labels: + config.openshift.io/inject-trusted-cabundle: "true" diff --git a/roles/setup/tasks/collect.yml b/roles/setup/tasks/collect.yml new file mode 100644 index 0000000..9527737 --- /dev/null +++ b/roles/setup/tasks/collect.yml @@ -0,0 +1,334 @@ +--- + +- name: Set cluster ID + set_fact: + ocp_cluster_id: "{{ current_cr_spec.clusterID }}" + when: current_cr_spec.clusterID + +- name: Check for OCP clusterID + debug: msg='OCP clusterID is not defined' + when: not ocp_cluster_id + +- name: Set validation boolean + set_fact: + ocp_validate_cert: "{{ current_cr_spec.validate_cert }}" + when: current_cr_spec.validate_cert + +- name: Check for validation boolean + debug: msg='HTTPS certificate validation variable is not defined; defaulting to true' + when: not current_cr_spec.validate_cert + +- name: Set service account token name + set_fact: + reporting_operator_token_name: "{{ current_cr_spec.reporting_operator_token_name }}" + when: current_cr_spec.reporting_operator_token_name + +- name: Check for service account token name + debug: msg='Reporting Operator service account token name is not defined' + when: not reporting_operator_token_name + +- name: Fail if the clusterID or service token are not defined + fail: + msg: 'The CostManagement custom resource requires the clusterID and reporting_operator_token_name to be defined.' + when: not ocp_cluster_id or not reporting_operator_token_name + +- name: Set upload_wait + set_fact: + collect_upload_wait: "{{ current_cr_spec.upload_wait | int }}" + when: current_cr_spec.upload_wait + ignore_errors: true + +- name: Format current_month string if less than 10 + set_fact: + current_month: '{{ "0" + (current_month | string) }}' + when: + - (current_month | int) < 10 + +- name: Set monthly suffix for reports + set_fact: + current_year_month: '{{ (current_year | string ) + (current_month | string) }}' + +- name: Obtain metering api info + community.kubernetes.k8s_info: + api_version: v1 + kind: Route + namespace: "{{ namespace }}" + register: metering_route + +- name: Set metering api route + set_fact: + metering_api_route: "{{ api_prefix }}{{ metering_route.resources[0].spec.host }}/api/v1/reports/get" + when: metering_route.resources + +- name: Get the service account token + community.kubernetes.k8s_info: + api_version: v1 + kind: Secret + namespace: "{{ namespace }}" + name: "{{ reporting_operator_token_name }}" + register: reporting_token + +- name: Set authentication_secret name + set_fact: + authentication_secret_name: "{{ current_cr_spec.authentication_secret_name }}" + when: current_cr_spec.authentication_secret_name + +- name: debug auth secret name + debug: + var: authentication_secret_name + when: debug + +- name: Set the authentication method + set_fact: + authentication: "{{ current_cr_spec.authentication }}" + when: current_cr_spec.authentication + ignore_errors: true + +- name: debug auth method + debug: + var: authentication + when: debug + +- name: Set the ingress URL + set_fact: + ingress_url: "{{ current_cr_spec.ingress_url }}" + when: current_cr_spec.ingress_url + ignore_errors: true + +- name: debug ingress URL + debug: + var: ingress_url + when: debug + +- name: Fail if auth secret is not set + fail: + msg: 'The cost-mgmt-setup custom resource requires the authentication_secret_name to be defined.' + when: not authentication_secret_name + +- name: Get the authentication secret + community.kubernetes.k8s_info: + api_version: v1 + kind: Secret + namespace: "{{ namespace }}" + name: "{{ authentication_secret_name }}" + register: authentication_secret + +- name: Decode the service account token + set_fact: + reporting_operator_token: "{{ reporting_token.resources[0].data.token | b64decode }}" + when: reporting_token.resources + +- name: Fail when reporting_operator_token not defined + fail: + msg: 'Reporting Operator token does not exist' + when: not reporting_operator_token + +- name: Fail if the authentication secret could not be found + fail: + msg: 'The authentication secret could not be found.' + when: not authentication_secret.resources + +- name: If authentication is set to token, get the auth token + set_fact: + authentication_token: "{{ authentication_secret.resources[0].data.token }}" + when: authentication_secret.resources and authentication == 'token' + +- name: If authentication is set to basic then grab username and password + set_fact: + username: "{{ authentication_secret.resources[0].data.username | b64decode }}" + password: "{{ authentication_secret.resources[0].data.password | b64decode }}" + when: authentication_secret.resources and authentication == 'basic' + +- name: Fail if no token but token is specified + fail: + msg: 'The authentication method was set to token but the authentication secret did not contain a token.' + when: authentication == 'token' and not authentication_token + +- name: Fail if no username but basic authentication is specified + fail: + msg: 'The authentication method was set to basic but the authentication secret did not contain a username.' + when: authentication == 'basic' and not username + +- name: Fail if no password but basic authentication is specified + fail: + msg: 'The authentication method was set to basic but the authentication secret did not contain a password.' + when: authentication == 'basic' and not password + +- name: Check if cert file exists + stat: + path: "{{ cacert_path }}" + register: trusted_cert + +- name: Fail if the trusted cert does not exist + fail: + msg: 'Failing because the ssl certificate does not exist.' + when: not trusted_cert + +- name: Obtain the source commit from file + set_fact: + source_commit: "{{ lookup('file', ansible_env.HOME + '/commit') }}" + +- name: debug the source_commit + debug: + var: source_commit + when: debug + +- name: Create trusted-ca-bundle if it doesn't exist + community.kubernetes.k8s: + namespace: "{{ namespace }}" + state: present + src: '{{ ansible_env.HOME }}/roles/collect/files/trusted_ca_certmap.yaml' + +- name: Get the trusted-ca-bundle + community.kubernetes.k8s_info: + api_version: v1 + kind: ConfigMap + namespace: "{{ namespace }}" + name: "trusted-ca-bundle" + register: trusted_ca_bundle + +- name: Set the trusted-ca-bundle crt contents + set_fact: + trusted_ca_bundle_contents: "{{ trusted_ca_bundle.resources[0].data['ca-bundle.crt'] }}" + when: trusted_ca_bundle.resources + +- name: Write the trusted-ca-bundle contents to a file + copy: content="{{ trusted_ca_bundle_contents }}" dest="{{ cacert_path }}" + when: trusted_ca_bundle_contents is defined + +- name: Fail the trusted ca certificate could not be found and certificate validation is enabled + fail: + msg: 'The trusted ca certificate could not be found and certificate validation is enabled.' + when: trusted_ca_bundle_contents is not defined + +- name: Set download request facts + set_fact: + collect_file_prefix: '{{ collect_manifest_uuid }}' + format: "&format={{ collect_format }}" + namespace: "&namespace={{ namespace }}" + +# getting a little clever to build lists to append into +- name: initialize fact lists + set_fact: + api_params: [] + api_urls: [] + csv_files: [] + +# this appends the string inside the brackets to the 'api_params' list. +- name: compile URL query params, append to param list + set_fact: + api_params: "{{ api_params + ['?name='+item+current_year_month+format+namespace] }}" + with_items: "{{ collect_reports }}" + +# this appends the string inside the brackets to the 'api_urls' list. +- name: assemble compiled URL facts, append to list. + set_fact: + api_urls: "{{ api_urls + [metering_api_route+item] }}" + with_items: "{{ api_params }}" + +- name: Set download_path + set_fact: + collect_cluster_download_path: '{{ collect_download_path }}/{{ ocp_cluster_id }}' + +- name: Remove temp files + file: + path: '{{ collect_cluster_download_path }}' + state: absent + when: collect_delete_after | bool + +- name: Create temp dir for downloaded files + file: + path: '{{ collect_cluster_download_path }}' + state: directory + mode: 0777 + +- name: Download OCP report from endpoint + get_url: + url: '{{ item }}' + headers: + Authorization: "Bearer {{ reporting_operator_token }}" + dest: '{{ collect_cluster_download_path }}/{{ collect_file_prefix }}_openshift_usage_report.{{ idx }}.{{ collect_format }}' + validate_certs: '{{ ocp_validate_cert | bool }}' + timeout: '{{ collect_ocp_report_timeout }}' + with_items: "{{ api_urls }}" + loop_control: + index_var: idx + register: download_result + +- name: debug download result + debug: + var: download_result + when: debug + +- name: append filename to fact list + set_fact: + csv_files: "{{ csv_files + [item.dest | basename] }}" + with_items: "{{ download_result.results }}" + +- name: debug csv_files + debug: + var: csv_files + when: debug + +- name: Check that required files exist + stat: + path: '{{ collect_cluster_download_path + "/" + item }}' + register: csv_stat_result + with_items: + - '{{ csv_files }}' + +- name: debug the csv_stat_result + debug: + var: csv_stat_result + when: debug + +- name: Check for empty download results + fail: + msg: 'Downloaded file {{ item }} has no content or could not be found: {{ item.stat }}.' + when: not item.stat.exists or (item.stat.exists and item.stat.size <= 0) + with_items: + - '{{ csv_stat_result.results }}' + +- name: Run packaging script to prepare reports for sending to Insights + script: package_report.py --filepath {{ ocp_cluster_id }} --max-size {{ collect_max_csvfile_size }} --ocp-cluster-id {{ ocp_cluster_id }} --overwrite + args: + chdir: '{{ collect_download_path }}' + register: packaged_reports + +- name: Wait time before upload in seconds + debug: + var: collect_upload_wait + +- name: Wait before upload to space out metric delivery + wait_for: + timeout: '{{ collect_upload_wait }}' + delegate_to: localhost + +- name: Upload the cost report to ingress using basic auth + shell: + cmd: 'curl -vvvv -F "file=@{{ item }};type=application/vnd.redhat.hccm.tar+tgz" {{ ingress_url }} -u {{ username }}:{{ password }} --cacert {{ cacert_path }}' + chdir: '{{ collect_download_path }}' + with_items: + - '{{ packaged_reports.stdout_lines }}' + when: authentication == 'basic' + +- name: Upload the cost report to ingress using token auth + shell: + cmd: 'curl -vvvv -F "file=@{{ item }};type=application/vnd.redhat.hccm.tar+tgz" {{ ingress_url }} -H "Authorization: Bearer {{ authentication_token }}" -H "User-Agent: cost-mgmt-operator/{{ source_commit }} cluster/{{ ocp_cluster_id }}" --cacert {{ cacert_path }}' + chdir: '{{ collect_download_path }}' + with_items: + - '{{ packaged_reports.stdout_lines }}' + when: authentication == 'token' + +- name: Remove upload files + file: + path: '{{ collect_download_path }}/{{ item }}' + state: absent + with_items: + - '{{ packaged_reports.stdout_lines }}' + when: collect_delete_after | bool + +- include_tasks: update-status.yml + vars: + status_vars: + upload_attempt_time: "{{ ansible_date_time.iso8601 }}" diff --git a/roles/setup/tasks/main.yml b/roles/setup/tasks/main.yml index fd524d6..840ffa3 100644 --- a/roles/setup/tasks/main.yml +++ b/roles/setup/tasks/main.yml @@ -164,10 +164,14 @@ state: 'absent' when: (current_day | int) < 3 -- include_tasks: update-status.yml +- name: Upload metric data + import_tasks: collect.yml vars: - status_vars: - upload_attempt_time: "{{ ansible_date_time.iso8601 }}" + current_month: '{{ current_month }}' + current_year: '{{ current_year }}' + namespace: "{{ meta.namespace }}" + current_cr: "{{ current_cr }}" + current_cr_spec: "{{ current_cr_spec }}" when: - collect_data is defined - collect_data | bool