Merge pull request #36 from getwilds/billing

Billing
getwilds · Mar 7, 2024 · 33bcced · 33bcced
2 parents 59c977c + 8b0b334
commit 33bcced
Show file tree

Hide file tree

Showing 19 changed files with 642 additions and 46 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -8,3 +8,4 @@
 ^.lintr$
 ^README\.Rmd$
 ^data-raw$
+vignettes/figure
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -14,6 +14,7 @@ VignetteBuilder: knitr
 Roxygen: list(markdown = TRUE, roclets = c("collate", "namespace", "rd",
     "roxyglobals::global_roclet"))
 RoxygenNote: 7.2.3
+LazyData: true
 Depends:
     R (>= 2.10)
 Imports: 
@@ -41,7 +42,8 @@ Suggests:
     RMariaDB,
     testthat (>= 3.0.0),
     vcr (>= 0.6.0),
-    withr
+    withr,
+    ggplot2
 Config/roxyglobals/filename: globals.R
 Config/roxyglobals/unique: FALSE
 Config/testthat/edition: 3

diff --git a/Makefile b/Makefile
@@ -2,6 +2,8 @@ PACKAGE := $(shell grep '^Package:' DESCRIPTION | sed -E 's/^Package:[[:space:]]
 RSCRIPT = Rscript --no-init-file
 FILE_TARGET := "R/${FILE}"
 
+.PHONY: docs
+
 install: doc build
 	R CMD INSTALL . && rm *.tar.gz
 
@@ -11,6 +13,9 @@ build:
 doc:
 	${RSCRIPT} -e "devtools::document()"
 
+docs:
+	${RSCRIPT} -e "pkgdown::build_site(); pkgdown::preview_site(preview=TRUE)"
+
 eg:
 	${RSCRIPT} -e "devtools::run_examples(run_dontrun = TRUE)"
 
@@ -24,6 +29,11 @@ vign_getting_started:
 	${RSCRIPT} -e "Sys.setenv(NOT_CRAN='true'); knitr::knit('sixtyfour.Rmd.og', output = 'sixtyfour.Rmd')";\
 	cd ..
 
+vign_billing:
+	cd vignettes;\
+	${RSCRIPT} -e "Sys.setenv(NOT_CRAN='true'); knitr::knit('billing.Rmd.og', output = 'billing.Rmd')";\
+	cd ..
+
 test:
 	${RSCRIPT} -e "devtools::test()"
 
@@ -41,6 +51,9 @@ style_file:
 style_package:
 	${RSCRIPT} -e "styler::style_pkg()"
 
+update_data:
+	${RSCRIPT} -e "source('data-raw/service-mapping.R')"
+
 scan_secrets:
 	@echo "scanning for leaks in commits\n"
 	gitleaks detect --source . -v

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
 export(as_policy_arn)
+export(aws_billing)
+export(aws_billing_raw)
 export(aws_bucket_create)
 export(aws_bucket_delete)
 export(aws_bucket_download)
@@ -73,7 +75,11 @@ importFrom(cli,cli_progress_update)
 importFrom(cli,pb_spin)
 importFrom(curl,curl_fetch_memory)
 importFrom(dplyr,bind_rows)
+importFrom(dplyr,coalesce)
 importFrom(dplyr,filter)
+importFrom(dplyr,left_join)
+importFrom(dplyr,rename)
+importFrom(dplyr,rename_with)
 importFrom(dplyr,last_col)
 importFrom(dplyr,mutate)
 importFrom(dplyr,pull)

diff --git a/R/billing.R b/R/billing.R
@@ -1,34 +1,88 @@
-#' Fetch billing data
+#' Fetch billing data - with some internal munging for ease of use
 #'
 #' @export
 #' @importFrom tibble tibble
 #' @importFrom purrr map map_chr list_rbind
 #' @importFrom rlang :=
+#' @importFrom dplyr rename rename_with left_join coalesce
 #' @param date_start,date_end Start and end date to get billing data for.
-#' Date format expected: `YYYY-MM-DD`
-#' @examples \dontrun{
-#' billing(date_start = "2023-01-01")
-#' }
-billing <- function(date_start, date_end = as.character(Sys.Date())) {
-  # TODO: assertions on date formats, and possibly max date - check to
-  # see if paws does any date validation first
-  list(
-    unblended = billing_unblended(date_start, date_end),
-    blended = billing_blended(date_start, date_end)
-  )
+#' Date format expected: `yyyy-MM-dd`. required
+#' @autoglobal
+#' @references <https://www.paws-r-sdk.com/docs/costexplorer/>
+#' @family billing
+#' @section Blended vs. Unblended:
+#' - Unblended: Unblended costs represent your usage costs on the day
+#' they are charged to you
+#' - Blended: Blended costs are calculated by multiplying each account’s
+#' service usage against something called a blended rate. A blended rate
+#' is the average rate of on-demand usage, as well as Savings Plans- and
+#' reservation-related usage, that is consumed by member accounts in an
+#' organization for a particular service.
+#' @section Historical data:
+#' If you supply a `date_start` older than 14 months prior to today's date
+#' you will likely see an error like "You haven't enabled historical data
+#' beyond 14 months". See
+#' <https://docs.aws.amazon.com/cost-management/latest/userguide/ce-advanced-cost-analysis.html> #nolint
+#' for help
+#' @return tibble with columns:
+#' - id: "blended", "unblended"
+#' - date: date, in format `yyyy-MM-dd`
+#' - service: AWS service name, spelled out in full
+#' - linked_account: account number
+#' - cost: cost in USD
+#' - acronym: short code for the service; if none known, this row
+#' will have the value in `service`
+#' @examplesIf interactive()
+#' library(lubridate)
+#' library(dplyr)
+#'
+#' start_date <- today() - months(13)
+#' z <- aws_billing(date_start = start_date)
+#' z %>%
+#'   filter(id == "blended") %>%
+#'   group_by(service) %>%
+#'   summarise(sum_cost = sum(cost)) %>%
+#'   filter(sum_cost > 0) %>%
+#'   arrange(desc(sum_cost))
+#'
+#' z %>%
+#'   filter(id == "blended") %>%
+#'   filter(cost > 0) %>%
+#'   arrange(service)
+#'
+#' z %>%
+#'   filter(id == "blended") %>%
+#'   group_by(service) %>%
+#'   summarise(sum_cost = sum(cost)) %>%
+#'   filter(service == "Amazon Relational Database Service")
+aws_billing <- function(date_start, date_end = as.character(Sys.Date())) {
+  bind_rows(
+    unblended = rename(
+      billing_unblended(date_start, date_end),
+      cost = UnblendedCost
+    ),
+    blended = rename(
+      billing_blended(date_start, date_end),
+      cost = BlendedCost
+    ),
+    .id = "id"
+  ) %>% rename_with(tolower)
 }
 
-# function factory to create functions for both blended and unblended data
+#' function factory to create functions for both blended and unblended data
+#' @autoglobal
+#' @keywords internal
+#' @noRd
 billing_factory <- function(type) {
   function(date_start, date_end) {
-    raw_billing_data <- env64$costexplorer$get_cost_and_usage(
-      TimePeriod = list(Start = date_start, End = date_end),
-      Granularity = "DAILY",
-      Metrics = type,
-      GroupBy = list(
-        list(Type = "DIMENSION", Key = "SERVICE"),
-        list(Type = "DIMENSION", Key = "LINKED_ACCOUNT")
-      )
+    groupby <- list(
+      list(Type = "DIMENSION", Key = "SERVICE"),
+      list(Type = "DIMENSION", Key = "LINKED_ACCOUNT")
+    )
+    raw_billing_data <- aws_billing_raw(date_start,
+      metrics = type,
+      granularity = "daily", group_by = groupby,
+      date_end = date_end
     )
 
     raw_billing_data$ResultsByTime %>%
@@ -53,9 +107,46 @@ billing_factory <- function(type) {
             as.double()
         )
       }) %>%
-      list_rbind()
+      list_rbind() %>%
+      left_join(service_map, by = c("Service" = "service")) %>%
+      mutate(acronym = coalesce(acronym, Service))
   }
 }
 
 billing_unblended <- billing_factory("UnblendedCost")
 billing_blended <- billing_factory("BlendedCost")
+
+#' Fetch billing data - rawest form
+#' @export
+#' @inheritParams aws_billing
+#' @param metrics (character) which metrics to return. required. One of:
+#' AmortizedCost, BlendedCost, NetAmortizedCost, NetUnblendedCost,
+#' NormalizedUsageAmount, UnblendedCost, and UsageQuantity
+#' @param granularity (character) monthly, daily, hourly. required.
+#' @param filter (list) filters costs by different dimensions. optional.
+#' @param group_by (list) group costs using up to two different groups,
+#' either dimensions, tag keys, cost categories, or any two group by types.
+#' optional.
+#' @family billing
+#' @return list with slots for:
+#' - NextPageToken
+#' - GroupDefinitions
+#' - ResultsByTime
+#' - DimensionValueAttributes
+#' @examplesIf interactive()
+#' aws_billing_x(date_start = "2023-02-01", metrics = "BlendedCost")
+aws_billing_raw <- function(
+    date_start, metrics, granularity = "daily",
+    filter = NULL, group_by = NULL, date_end = as.character(Sys.Date())) {
+  grans <- c("hourly", "daily", "monthly")
+  stopifnot(
+    "`granularity` must be one of hourly/daily/monthly" =
+      granularity %in% grans
+  )
+  env64$costexplorer$get_cost_and_usage(
+    TimePeriod = list(Start = date_start, End = date_end),
+    Granularity = toupper(granularity),
+    Metrics = metrics,
+    GroupBy = group_by
+  )
+}
diff --git a/R/globals.R b/R/globals.R
@@ -1,6 +1,11 @@
 # Generated by roxyglobals: do not edit by hand
 
 utils::globalVariables(c(
+  "UnblendedCost", # <aws_billing>
+  "BlendedCost", # <aws_billing>
+  "service_map", # <billing_factory>
+  "acronym", # <billing_factory>
+  "Service", # <billing_factory>
   ".", # <aws_group>
   ".", # <aws_policy>
   "PolicyName", # <as_policy_arn>

diff --git a/R/sixtyfour-package.R b/R/sixtyfour-package.R
@@ -16,3 +16,15 @@
 #' @importFrom curl curl_fetch_memory
 ## usethis namespace: end
 NULL
+
+#' Mapping of full names of AWS services to acronyms
+#'
+#' @format ## `service_map`
+#' A data frame with 178 rows and 2 columns:
+#' \describe{
+#'   \item{service}{Service name in full}
+#'   \item{acronym}{The acronym, from 2 to 5 characters in length}
+#'   ...
+#' }
+#' @source <https://tommymaynard.com/aws-service-acronyms/>
+"service_map"
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -7,19 +7,20 @@ reference:
       High level overview of package
     contents:
       - sixtyfour
+  - title: Billing
+    contents:
+      - starts_with("aws_billing")
   - title: Files
     desc: >
       All file functions are vectorized. That is, you can pass in 1 or more local or s3 remote paths to the file functions.
     contents:
       - starts_with("aws_file")
+      - service_map
   - title: Buckets
     desc: >
       All bucket functions are NOT vectorized.
     contents:
       - starts_with("aws_bucket")
-  - title: Billing
-    contents:
-      - billing
   - title: Users
     contents:
       - starts_with("aws_user")

diff --git a/data-raw/service-mapping.R b/data-raw/service-mapping.R
@@ -0,0 +1,6 @@
+## code to prepare `service_mapping` dataset goes here
+
+# originally from https://tommymaynard.com/aws-service-acronyms/
+
+service_map <- readr::read_csv("data-raw/service-mapping.csv")
+usethis::use_data(service_map, overwrite = TRUE)