Skip to content

Commit

Permalink
Run data collection based on elapsed time.
Browse files Browse the repository at this point in the history
  • Loading branch information
chambridge committed Sep 3, 2020
1 parent 0ca561a commit 9db3f6d
Show file tree
Hide file tree
Showing 5 changed files with 676 additions and 4 deletions.
25 changes: 24 additions & 1 deletion roles/setup/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,27 @@ current_year: '{{ ansible_date_time.year | int }}'
setup_template_path: '/tmp/cost-mgmt-operator-collect'
setup_template_dir: "{{ lookup('password', '/dev/null chars=ascii_letters') }}"
setup_delete_after: 'true'
upload_cycle_seconds: 21600
upload_cycle_seconds: 21600
collect_format: 'csv'
collect_manifest_uuid: '{{ 99999999999999999999 | random | to_uuid }}'
collect_archive_name: cost-mgmt
ocp_validate_cert: 'true'
ocp_cluster_id: ''
reporting_operator_token_name: ''
collect_reports:
- 'cm-openshift-usage-lookback-'
- 'cm-openshift-persistentvolumeclaim-lookback-'
- 'cm-openshift-node-labels-lookback-'
collect_download_path: '/tmp/cost-mgmt-operator-collect'
collect_delete_after: 'true'
collect_ocp_report_timeout: 60
collect_max_csvfile_size: 99
api_prefix: 'https://'
ingress_url: 'https://cloud.redhat.com/api/ingress/v1/upload'
authentication: 'token'
authentication_token: ''
username: ''
password: ''
cacert_path: '{{ collect_download_path }}/ca-bundle.crt'
debug: 'true'
collect_upload_wait: '{{ 2100 | random(step=10) }}'
300 changes: 300 additions & 0 deletions roles/setup/files/package_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,300 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Copyright 2020 Red Hat, Inc.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
"""Create a tarball for metering reports downloaded from an OpenShift cluster."""

import argparse
import csv
import logging
import json
import os
import sys
import tarfile
from datetime import datetime
from uuid import uuid4

DEFAULT_MAX_SIZE = 100
MEGABYTE = 1024 * 1024

TEMPLATE = {
"files": None,
"date": datetime.utcnow().isoformat(),
"uuid": None,
"cluster_id": None
}


# the csv module doesn't expose the bytes-offset of the
# underlying file object.
#
# instead, the script estimates the size of the data as VARIANCE percent larger than a
# naïve string concatenation of the CSV fields to cover the overhead of quoting
# and delimiters. This gets close enough for now.
VARIANCE = 0.03

# Flag to use when writing to a file. Changed to "w" by the -o flag.
FILE_FLAG = "x"

# if we're creating more than 1k files, something is probably wrong.
MAX_SPLITS = 1000

# logging
LOG = logging.getLogger(__name__)
LOG_FORMAT = "%(asctime)s [%(levelname)s] %(message)s"
LOG_VERBOSITY = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
logging.basicConfig(format=LOG_FORMAT, level=logging.ERROR, stream=sys.stdout)


def parse_args():
"""Handle CLI arg parsing."""
parser = argparse.ArgumentParser(
description="Cost Management CSV file packaging script", prog=sys.argv[0])

# required args
parser.add_argument("-f", "--filepath", required=True,
help="path to files to package")
parser.add_argument(
"-s",
"--max-size",
type=int,
default=DEFAULT_MAX_SIZE,
help=f"Maximum size of packages in MiB. (Default: {DEFAULT_MAX_SIZE} MiB)",
)
parser.add_argument(
"-o", "--overwrite", action="store_true", default=False, help="whether to overwrite existing files."
)
parser.add_argument("--ocp-cluster-id", required=True,
help="OCP Cluster ID")
parser.add_argument("-v", "--verbosity", action="count",
default=0, help="increase verbosity (up to -vvv)")
return parser.parse_args()


def write_part(filename, csvreader, header, num=0, size=(DEFAULT_MAX_SIZE * MEGABYTE)):
"""Split a part of the file into a new file.
Args:
filename (str) name of original file
csvreader (CSVReader) the csvreader object of original file
header (list) the CSV file's header list
num (int) the current split file index
size (int) the maximum size of the split file in bytes
Returns:
(str) the name of the new split file
(bool) whether the split reached the end of the csvreader
"""
fname_part, ext = os.path.splitext(filename)
size_estimate = 0
split_filename = f"{fname_part}_{num}{ext}"
try:
with open(split_filename, FILE_FLAG) as split_part:
LOG.info(f"Writing new file: {split_filename}")
csvwriter = csv.writer(split_part)
csvwriter.writerow(header)
for row in csvreader:
csvwriter.writerow(row)

row_len = len(",".join(row))
size_estimate += row_len + (row_len * VARIANCE)

LOG.debug(f"file size (est): {size_estimate}")
if size_estimate >= size:
return (split_filename, False)
except (IOError, FileExistsError) as exc:
LOG.critical(f"Fatal error: {exc}")
sys.exit(2)
return (split_filename, True)


def need_split(filepath, max_size):
"""Determine whether to split up the CSV files.
Args:
filepath (str) a directory
max_size (int) maximum split size in MiB
Returns:
True if any single file OR the total sum of files exceeds the MAX_SIZE
False if each single file AND the total file size is below MAX_SIZE
"""
total_size = 0
max_bytes = max_size * MEGABYTE
for filename in os.listdir(filepath):
this_size = os.stat(f"{filepath}/{filename}").st_size
total_size += this_size
if this_size >= max_bytes or total_size >= max_bytes:
return True
return False


def split_files(filepath, max_size):
"""Split any files that exceed the file size threshold.
Args:
filepath (str) file path containing the CSV files
max_size (int) the maximum size in MiB for each file
"""
for filename in os.listdir(filepath):
abspath = f"{filepath}/{filename}"
if os.stat(abspath).st_size >= max_size * MEGABYTE:
csvheader = None
split_files = []
with open(abspath, "r") as fhandle:
csvreader = csv.reader(fhandle)
csvheader = next(csvreader)
LOG.debug(f"Header: {csvheader}")

part = 1
while True:
newfile, eof = write_part(
abspath, csvreader, csvheader, num=part, size=(max_size * MEGABYTE))
split_files.append(newfile)
part += 1
if eof or part >= MAX_SPLITS:
break

os.remove(abspath)

# return the list of split files to stdout
LOG.info(f"Split files: {split_files}")


def render_manifest(args, archivefiles=[]):
"""Render the manifest template and write it to a file.
Args:
args (Namespace) an ArgumentParser Namespace object
Returns:
(str) the rendered manifest file name
(str) the manifest uuid
"""
manifest = TEMPLATE
manifest_uuid = str(uuid4())
manifest["cluster_id"] = args.ocp_cluster_id
manifest["uuid"] = manifest_uuid
manifest_files = []
for idx in range(len(archivefiles)):
upload_name = f"{manifest_uuid}_openshift_usage_report.{idx}.csv"
manifest_files.append(upload_name)
manifest["files"] = manifest_files
LOG.debug(f"rendered manifest: {manifest}")
manifest_filename = f"{args.filepath}/manifest.json"

if not os.path.exists(args.filepath):
os.makedirs(args.filepath)
LOG.info(f"Created dirs: {args.filepath}")

try:
with open(manifest_filename, FILE_FLAG) as mfile:
json.dump(manifest, mfile)
except FileExistsError as exc:
LOG.critical(f"Fatal error: {exc}")
sys.exit(2)
LOG.info(f"manifest generated")
return (manifest_filename, manifest_uuid)


def write_tarball(args, tarfilename, manifest_filename, manifest_uuid, archivefiles, file_count=0):
"""Write a tarball, adding the given files to the archive.
Args:
args (Namespace) an ArgumentParser Namespace object
tarfilename (str) the name of the tarball to create
manifest_filename (str) the name of the report manifest
manifest_uuid (str) the unique identifier of the manifest
archivefiles (list) the list of files to include in the archive
file_count (int) file number initializer
Returns:
(str) full filepath of the created tarball
Raises:
FileExistsError if tarfilename already exists
"""
if not archivefiles:
return None

try:
with tarfile.open(tarfilename, f"{FILE_FLAG}:gz") as tarball:
for fname in archivefiles:
LOG.debug(f"Adding {fname} to {tarfilename}: ")
if fname.endswith(".csv"):
upload_name = f"{manifest_uuid}_openshift_usage_report.{file_count}.csv"
tarball.add(fname, arcname=upload_name)
file_count += 1
tarball.add(manifest_filename, arcname="manifest.json")
except FileExistsError as exc:
LOG.critical(exc)
sys.exit(2)
LOG.info(f"Wrote: {tarfilename}")
return f"{tarfilename}", file_count


def build_local_csv_file_list(staging_directory):
"""Build a list of all report csv files in staging directory."""
file_list = []
for csv_file in os.listdir(staging_directory):
if ".csv" in csv_file:
file_list.append(f"{staging_directory}/{csv_file}")
return file_list

if "__main__" in __name__:
args = parse_args()
if args.verbosity:
LOG.setLevel(LOG_VERBOSITY[args.verbosity])
LOG.debug("CLI Args: %s", args)

if args.overwrite:
FILE_FLAG = "w"

out_files = []
need_split = need_split(args.filepath, args.max_size)
if need_split:
split_files(args.filepath, args.max_size)
tarpath = args.filepath + "/../"
tarfiletmpl = "cost-mgmt{}.tar.gz"

file_list = build_local_csv_file_list(args.filepath)
manifest_filename, manifest_uuid = render_manifest(args, file_list)
file_count = 0
for idx, filename in enumerate(file_list):
if ".csv" in filename:
tarfilename = os.path.abspath(
tarpath + tarfiletmpl.format(idx))
output_tar, file_count = write_tarball(args,
tarfilename, manifest_filename, manifest_uuid, [filename], file_count)
if output_tar:
out_files.append(output_tar)

else:
tarfilename = os.path.abspath(args.filepath + "/../cost-mgmt.tar.gz")

file_list = build_local_csv_file_list(args.filepath)
if file_list:
manifest_filename, manifest_uuid = render_manifest(args, file_list)
output_tar, _ = write_tarball(args, tarfilename, manifest_filename, manifest_uuid, file_list)
if output_tar:
out_files.append(output_tar)

for fname in out_files:
print(fname)
11 changes: 11 additions & 0 deletions roles/setup/files/trusted_ca_certmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---

apiVersion: v1
kind: ConfigMap
metadata:
namespace: openshift-metering
name: trusted-ca-bundle
annotations:
release.openshift.io/create-only: "true"
labels:
config.openshift.io/inject-trusted-cabundle: "true"
Loading

0 comments on commit 9db3f6d

Please sign in to comment.