Run data collection based on elapsed time.

project-koku · Sep 3, 2020 · 9db3f6d · 9db3f6d
1 parent 0ca561a
commit 9db3f6d
Show file tree

Hide file tree

Showing 5 changed files with 676 additions and 4 deletions.
diff --git a/roles/setup/defaults/main.yml b/roles/setup/defaults/main.yml
@@ -5,4 +5,27 @@ current_year: '{{ ansible_date_time.year | int }}'
 setup_template_path: '/tmp/cost-mgmt-operator-collect'
 setup_template_dir: "{{ lookup('password', '/dev/null chars=ascii_letters') }}"
 setup_delete_after: 'true'
-upload_cycle_seconds: 21600
+upload_cycle_seconds: 21600
+collect_format: 'csv'
+collect_manifest_uuid: '{{ 99999999999999999999 | random | to_uuid }}'
+collect_archive_name: cost-mgmt
+ocp_validate_cert: 'true'
+ocp_cluster_id: ''
+reporting_operator_token_name: ''
+collect_reports:
+  - 'cm-openshift-usage-lookback-'
+  - 'cm-openshift-persistentvolumeclaim-lookback-'
+  - 'cm-openshift-node-labels-lookback-'
+collect_download_path: '/tmp/cost-mgmt-operator-collect'
+collect_delete_after: 'true'
+collect_ocp_report_timeout: 60
+collect_max_csvfile_size: 99
+api_prefix: 'https://'
+ingress_url: 'https://cloud.redhat.com/api/ingress/v1/upload'
+authentication: 'token'
+authentication_token: ''
+username: ''
+password: ''
+cacert_path: '{{ collect_download_path }}/ca-bundle.crt'
+debug: 'true'
+collect_upload_wait: '{{ 2100 | random(step=10) }}'
diff --git a/roles/setup/files/package_report.py b/roles/setup/files/package_report.py
@@ -0,0 +1,300 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright 2020 Red Hat, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+#
+"""Create a tarball for metering reports downloaded from an OpenShift cluster."""
+
+import argparse
+import csv
+import logging
+import json
+import os
+import sys
+import tarfile
+from datetime import datetime
+from uuid import uuid4
+
+DEFAULT_MAX_SIZE = 100
+MEGABYTE = 1024 * 1024
+
+TEMPLATE = {
+    "files": None,
+    "date": datetime.utcnow().isoformat(),
+    "uuid": None,
+    "cluster_id": None
+}
+
+
+# the csv module doesn't expose the bytes-offset of the
+# underlying file object.
+#
+# instead, the script estimates the size of the data as VARIANCE percent larger than a
+# naïve string concatenation of the CSV fields to cover the overhead of quoting
+# and delimiters. This gets close enough for now.
+VARIANCE = 0.03
+
+# Flag to use when writing to a file. Changed to "w" by the -o flag.
+FILE_FLAG = "x"
+
+# if we're creating more than 1k files, something is probably wrong.
+MAX_SPLITS = 1000
+
+# logging
+LOG = logging.getLogger(__name__)
+LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s"
+LOG_VERBOSITY = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
+logging.basicConfig(format=LOG_FORMAT, level=logging.ERROR, stream=sys.stdout)
+
+
+def parse_args():
+    """Handle CLI arg parsing."""
+    parser = argparse.ArgumentParser(
+        description="Cost Management CSV file packaging script", prog=sys.argv[0])
+
+    # required args
+    parser.add_argument("-f", "--filepath", required=True,
+                        help="path to files to package")
+    parser.add_argument(
+        "-s",
+        "--max-size",
+        type=int,
+        default=DEFAULT_MAX_SIZE,
+        help=f"Maximum size of packages in MiB. (Default: {DEFAULT_MAX_SIZE} MiB)",
+    )
+    parser.add_argument(
+        "-o", "--overwrite", action="store_true", default=False, help="whether to overwrite existing files."
+    )
+    parser.add_argument("--ocp-cluster-id", required=True,
+                        help="OCP Cluster ID")
+    parser.add_argument("-v", "--verbosity", action="count",
+                        default=0, help="increase verbosity (up to -vvv)")
+    return parser.parse_args()
+
+
+def write_part(filename, csvreader, header, num=0, size=(DEFAULT_MAX_SIZE * MEGABYTE)):
+    """Split a part of the file into a new file.
+
+    Args:
+        filename (str) name of original file
+        csvreader (CSVReader) the csvreader object of original file
+        header (list) the CSV file's header list
+        num (int) the current split file index
+        size (int) the maximum size of the split file in bytes
+
+    Returns:
+        (str) the name of the new split file
+        (bool) whether the split reached the end of the csvreader
+
+    """
+    fname_part, ext = os.path.splitext(filename)
+    size_estimate = 0
+    split_filename = f"{fname_part}_{num}{ext}"
+    try:
+        with open(split_filename, FILE_FLAG) as split_part:
+            LOG.info(f"Writing new file: {split_filename}")
+            csvwriter = csv.writer(split_part)
+            csvwriter.writerow(header)
+            for row in csvreader:
+                csvwriter.writerow(row)
+
+                row_len = len(",".join(row))
+                size_estimate += row_len + (row_len * VARIANCE)
+
+                LOG.debug(f"file size (est): {size_estimate}")
+                if size_estimate >= size:
+                    return (split_filename, False)
+    except (IOError, FileExistsError) as exc:
+        LOG.critical(f"Fatal error: {exc}")
+        sys.exit(2)
+    return (split_filename, True)
+
+
+def need_split(filepath, max_size):
+    """Determine whether to split up the CSV files.
+
+    Args:
+        filepath (str) a directory
+        max_size (int) maximum split size in MiB
+
+    Returns:
+        True if any single file OR the total sum of files exceeds the MAX_SIZE
+        False if each single file AND the total file size is below MAX_SIZE
+
+    """
+    total_size = 0
+    max_bytes = max_size * MEGABYTE
+    for filename in os.listdir(filepath):
+        this_size = os.stat(f"{filepath}/{filename}").st_size
+        total_size += this_size
+        if this_size >= max_bytes or total_size >= max_bytes:
+            return True
+    return False
+
+
+def split_files(filepath, max_size):
+    """Split any files that exceed the file size threshold.
+
+    Args:
+        filepath (str) file path containing the CSV files
+        max_size (int) the maximum size in MiB for each file
+
+    """
+    for filename in os.listdir(filepath):
+        abspath = f"{filepath}/{filename}"
+        if os.stat(abspath).st_size >= max_size * MEGABYTE:
+            csvheader = None
+            split_files = []
+            with open(abspath, "r") as fhandle:
+                csvreader = csv.reader(fhandle)
+                csvheader = next(csvreader)
+                LOG.debug(f"Header: {csvheader}")
+
+                part = 1
+                while True:
+                    newfile, eof = write_part(
+                        abspath, csvreader, csvheader, num=part, size=(max_size * MEGABYTE))
+                    split_files.append(newfile)
+                    part += 1
+                    if eof or part >= MAX_SPLITS:
+                        break
+
+            os.remove(abspath)
+
+            # return the list of split files to stdout
+            LOG.info(f"Split files: {split_files}")
+
+
+def render_manifest(args, archivefiles=[]):
+    """Render the manifest template and write it to a file.
+
+    Args:
+        args (Namespace) an ArgumentParser Namespace object
+
+    Returns:
+        (str) the rendered manifest file name
+        (str) the manifest uuid
+    """
+    manifest = TEMPLATE
+    manifest_uuid = str(uuid4())
+    manifest["cluster_id"] = args.ocp_cluster_id
+    manifest["uuid"] = manifest_uuid
+    manifest_files = []
+    for idx in range(len(archivefiles)): 
+        upload_name = f"{manifest_uuid}_openshift_usage_report.{idx}.csv"
+        manifest_files.append(upload_name)
+    manifest["files"] = manifest_files
+    LOG.debug(f"rendered manifest: {manifest}")
+    manifest_filename = f"{args.filepath}/manifest.json"
+
+    if not os.path.exists(args.filepath):
+        os.makedirs(args.filepath)
+        LOG.info(f"Created dirs: {args.filepath}")
+
+    try:
+        with open(manifest_filename, FILE_FLAG) as mfile:
+            json.dump(manifest, mfile)
+    except FileExistsError as exc:
+        LOG.critical(f"Fatal error: {exc}")
+        sys.exit(2)
+    LOG.info(f"manifest generated")
+    return (manifest_filename, manifest_uuid)
+
+
+def write_tarball(args, tarfilename, manifest_filename, manifest_uuid, archivefiles, file_count=0):
+    """Write a tarball, adding the given files to the archive.
+
+    Args:
+        args (Namespace) an ArgumentParser Namespace object
+        tarfilename (str) the name of the tarball to create
+        manifest_filename (str) the name of the report manifest
+        manifest_uuid (str) the unique identifier of the manifest
+        archivefiles (list) the list of files to include in the archive
+        file_count (int) file number initializer
+
+    Returns:
+        (str) full filepath of the created tarball
+
+    Raises:
+        FileExistsError if tarfilename already exists
+    """
+    if not archivefiles:
+        return None
+
+    try:
+        with tarfile.open(tarfilename, f"{FILE_FLAG}:gz") as tarball:
+            for fname in archivefiles:
+                LOG.debug(f"Adding {fname} to {tarfilename}: ")
+                if fname.endswith(".csv"):
+                    upload_name = f"{manifest_uuid}_openshift_usage_report.{file_count}.csv"
+                    tarball.add(fname, arcname=upload_name)
+                    file_count += 1
+            tarball.add(manifest_filename, arcname="manifest.json")
+    except FileExistsError as exc:
+        LOG.critical(exc)
+        sys.exit(2)
+    LOG.info(f"Wrote: {tarfilename}")
+    return f"{tarfilename}", file_count
+
+
+def build_local_csv_file_list(staging_directory):
+    """Build a list of all report csv files in staging directory."""
+    file_list = []
+    for csv_file in os.listdir(staging_directory):
+        if ".csv" in csv_file:
+            file_list.append(f"{staging_directory}/{csv_file}")
+    return file_list
+
+if "__main__" in __name__:
+    args = parse_args()
+    if args.verbosity:
+        LOG.setLevel(LOG_VERBOSITY[args.verbosity])
+    LOG.debug("CLI Args: %s", args)
+
+    if args.overwrite:
+        FILE_FLAG = "w"
+
+    out_files = []
+    need_split = need_split(args.filepath, args.max_size)
+    if need_split:
+        split_files(args.filepath, args.max_size)
+        tarpath = args.filepath + "/../"
+        tarfiletmpl = "cost-mgmt{}.tar.gz"
+
+        file_list = build_local_csv_file_list(args.filepath)
+        manifest_filename, manifest_uuid = render_manifest(args, file_list)
+        file_count = 0
+        for idx, filename in enumerate(file_list):
+            if ".csv" in filename:
+                tarfilename = os.path.abspath(
+                    tarpath + tarfiletmpl.format(idx))
+                output_tar, file_count = write_tarball(args, 
+                    tarfilename, manifest_filename, manifest_uuid, [filename], file_count)
+                if output_tar:
+                    out_files.append(output_tar)
+
+    else:
+        tarfilename = os.path.abspath(args.filepath + "/../cost-mgmt.tar.gz")
+
+        file_list = build_local_csv_file_list(args.filepath)
+        if file_list:
+            manifest_filename, manifest_uuid = render_manifest(args, file_list)
+            output_tar, _ = write_tarball(args, tarfilename, manifest_filename, manifest_uuid, file_list)
+            if output_tar:
+                out_files.append(output_tar)
+
+    for fname in out_files:
+        print(fname)
diff --git a/roles/setup/files/trusted_ca_certmap.yaml b/roles/setup/files/trusted_ca_certmap.yaml
@@ -0,0 +1,11 @@
+---
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  namespace: openshift-metering
+  name: trusted-ca-bundle
+  annotations:
+    release.openshift.io/create-only: "true"
+  labels:
+    config.openshift.io/inject-trusted-cabundle: "true"