Skip to content

Commit

Permalink
DBs fxns (#17)
Browse files Browse the repository at this point in the history
* #8 working on database stuff - still experimenting

* #8 a bunch of changes to db fxns

- stop within rds con fxn as not ready yet
- redshift con fxn changes: user/pwd required and no default, get other params from cluster id
- redshift create fxn: add security group, wait until available
- many helper fxns
- improved docs

* fix line too long

* fixes

* #8 more database work

- new files for db stuff, separate files for redshift, rds and wait fxns
- add rds functions
- modify wait functions to use common plumbing with generator for each of redshift, rds
- now importing RMariaDB

* add support for mysql and postgres to aws_db_rds_con

* more database refining

- add misc file for database helpers
- add info() fxn to give back info on the newly created rds or redshift instance
- add tests for some of the db fxns
- rds: toggle which DBI driver loaded; and only check for the single DB driver pkg for the engine
- redshift: add info() usage in create fxn, and verbose param
- wait_until - add info about waiting on instance

* improvements to docs

* start to getting started vignette, still need to add files

* make readme simpler - now that we are moving learning content to vignettes

* add force arg to file copy and upload; add force arg to create bucket if doesnt exist

* finish off getting started vignette with files egs

* line too long fix in buckets.R

* remove interface mention in readme - thats an internal fxn

* make concrete in RDS that were only supporting 3 dbs

* make clear that username and password for Redshift create are for IAM
  • Loading branch information
sckott authored Jan 9, 2024
1 parent d812a49 commit c76a9b2
Show file tree
Hide file tree
Showing 28 changed files with 1,218 additions and 2 deletions.
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sixtyfour
Title: Humane Interface to AWS
Version: 0.0.0.91
Version: 0.0.0.93
Authors@R: c(
person("Sean", "Kross", role = "aut"),
person("Scott", "Chamberlain", role = c("aut", "cre"), email = "sachamber@fredhutch.org")
Expand Down Expand Up @@ -33,6 +33,9 @@ Suggests:
knitr,
rmarkdown,
roxyglobals,
DBI,
RPostgres,
RMariaDB,
testthat (>= 3.0.0),
vcr (>= 0.6.0),
withr
Expand Down
13 changes: 13 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ export(aws_bucket_list_objects)
export(aws_bucket_tree)
export(aws_bucket_upload)
export(aws_buckets)
export(aws_db_cluster_status)
export(aws_db_instance_status)
export(aws_db_rds_client)
export(aws_db_rds_con)
export(aws_db_rds_create)
export(aws_db_redshift_client)
export(aws_db_redshift_con)
export(aws_db_redshift_create)
export(aws_file_attr)
export(aws_file_copy)
export(aws_file_delete)
Expand Down Expand Up @@ -43,6 +51,9 @@ export(billing)
export(s3_path)
export(set_s3_interface)
importFrom(cli,cli_inform)
importFrom(cli,cli_progress_bar)
importFrom(cli,cli_progress_update)
importFrom(cli,pb_spin)
importFrom(dplyr,bind_rows)
importFrom(dplyr,filter)
importFrom(dplyr,mutate)
Expand All @@ -54,6 +65,8 @@ importFrom(lubridate,as_datetime)
importFrom(magrittr,"%>%")
importFrom(paws,costexplorer)
importFrom(paws,iam)
importFrom(paws,rds)
importFrom(paws,redshift)
importFrom(paws,s3)
importFrom(purrr,flatten)
importFrom(purrr,list_rbind)
Expand Down
34 changes: 34 additions & 0 deletions R/database-misc.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#' internal helper function
#' @param id an RDS instance ID or Redshift cluster ID
#' @param fun a function that takes an ID for an AWS RDS instance
#' or Redshift cluster, and returns a single boolean
#' @noRd
#' @keywords internal
info <- function(id, fun) {
cli::cli_alert_success("Instance is up!")
cli::cli_alert_info("See `aws_db_rds_con` for connection info")
cli::cli_alert_info("Instance details:")
con_info <- fun(id)
for (i in seq_along(con_info)) {
if (names(con_info)[i] == "status") next
cli::cli_alert_info(" {names(con_info)[i]}: {con_info[[i]]}")
}
}

which_driver <- function(engine) {
switch(engine,
"mariadb" = {
check_for_pkg("RMariaDB")
RMariaDB::MariaDB()
},
"mysql" = {
check_for_pkg("RMariaDB")
RMariaDB::MariaDB()
},
"postgres" = {
check_for_pkg("RPostgres")
RPostgres::Postgres()
},
stop(glue::glue("{engine} not currently supported"))
)
}
180 changes: 180 additions & 0 deletions R/database-rds.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
#' Get a database connection to Amazon RDS
#'
#' Supports: MariaDB, MySQL, and PostgreSQL
#'
#' @export
#' @inheritParams aws_db_redshift_con
#' @param engine (character) The engine to use. optional if `user`, `pwd`, and
#' `id` are supplied - otherwise required
#' @details RDS supports many databases, but we only provide support for
#' MariaDB, MySQL, and PostgreSQL
#'
#' If the `engine` you've chosen for your RDS instance is not supported
#' with this function, you can likely connect to it on your own
#' @examples \dontrun{
#' con_rds <- aws_db_rds_con("<define all params here>")
#' con_rds
#'
#' library(DBI)
#' library(RMariaDB)
#' dbListTables(con_rds)
#' dbWriteTable(con_rds, "mtcars", mtcars)
#' dbListTables(con_rds)
#' dbReadTable(con_rds, "mtcars")
#'
#' library(dplyr)
#' tbl(con_rds, "mtcars")
#' }
aws_db_rds_con <- function(user, pwd, id = NULL, host = NULL, port = NULL,
dbname = NULL, engine = NULL, ...) {
check_for_pkg("DBI")

stopifnot("user is required" = !missing(user))
stopifnot("pwd is required" = !missing(pwd))

if (!is.null(id)) {
con_info <- instance_con_info(id)
host <- con_info$host
port <- con_info$port
dbname <- con_info$dbname
engine <- con_info$engine
}
if (any(vapply(list(host, port, dbname, engine), is.null, logical(1)))) {
stop("`host`, `port`, `dbname`, and `engine` can not be NULL",
call. = FALSE
)
}

DBI::dbConnect(
which_driver(engine),
host = host,
port = port,
dbname = dbname,
user = user,
password = pwd,
...
)
}

#' Create an RDS cluster
#'
#' @export
#' @importFrom paws rds
#' @param id (character) required. instance identifier. The identifier for
#' this DB instance. This parameter is stored as a lowercase string.
#' Constraints: must contain from 1 to 63 letters, numbers, or hyphens; first
#' character must be a letter; cn't end with a hyphen or contain two
#' consecutive hyphens. required.
#' @param class (character) required. The compute and memory capacity of the
#' DB instance, for example `db.m5.large`.
#' @param user (character) User name associated with the admin user account for
#' the cluster that is being created.
#' @param pwd (character) Password associated with the admin user account for
#' the cluster that is being created.
#' @param dbname (character) The name of the first database to be created when
#' the cluster is created. default: "dev". additional databases can be created
#' within the cluster
#' @param engine (character) The engine to use. default: "mariadb". required.
#' @param storage (character) The amount of storage in gibibytes (GiB) to
#' allocate for the DB instance. default: 20
#' @param storage_encrypted (logical) Whether the DB instance is encrypted.
#' default: `TRUE`
#' @param security_group_ids (character) VPC security group identifiers; one
#' or more. If none are supplied, you should go into your AWS Redshift
#' dashboard and add the appropriate VPC security group.
#' @param wait (logical) wait for cluster to initialize? default: `TRUE`. If
#' you don't wait (`FALSE`) then there's many operations you can not do
#' until the cluster is available. If `wait=FALSE` use
#' `aws_db_instance_status()` to check on the cluster status.
#' @param verbose (logical) verbose informational output? default: `TRUE`
#' @param ... named parameters passed on to
#' [create_db_instance](https://www.paws-r-sdk.com/docs/rds_create_db_instance/)
#' @details See above link to `create_cluster` docs for details on requirements
#' for each parameter
#'
#' Note that even though you can use any option for `engine` in this function,
#' we may not provide the ability to connect to the chosen data source
#' in this package.
#' @section Waiting:
#' Note that with `wait = TRUE` this function waits for the instance to be
#' available for returning. That wait can be around 5 - 7 minutes. You can
#' instead set `wait = FALSE` and then check on the status of the instance
#' yourself in the AWS dashboard.
#' @return a list with methods for interfacing with RDS;
#' see <https://www.paws-r-sdk.com/docs/rds/>. also prints useful
#' connection information after instance is available.
aws_db_rds_create <-
function(id, class, user, pwd, dbname = "dev",
engine = "mariadb", storage = 20,
storage_encrypted = TRUE, security_group_ids = NULL,
wait = TRUE, verbose = TRUE, ...) {
aws_db_rds_client()
env64$rds$create_db_instance(
DBName = dbname, DBInstanceIdentifier = id,
Engine = engine, DBInstanceClass = class,
AllocatedStorage = storage,
MasterUsername = user, MasterUserPassword = pwd,
VpcSecurityGroupIds = security_group_ids,
StorageEncrypted = storage_encrypted,
...
)
if (wait) {
wait_for_instance(id)
}
if (verbose) info(id, instance_con_info)
return(env64$rds)
}

#' Get the `paws` RDS client
#' @export
#' @note returns existing client if found; a new client otherwise
#' @return a list with methods for interfacing with RDS;
#' see <https://www.paws-r-sdk.com/docs/rds/>
aws_db_rds_client <- function() {
if (is.null(env64$rds)) env64$rds <- paws::rds()
return(env64$rds)
}

#' Get information for all RDS instances
#' @return a list of instance details
#' @keywords internal
instance_details <- function() {
aws_db_rds_client()
instances <- env64$rds$describe_db_instances()
return(instances)
}

#' Get connection information for all instances
#' @inheritParams aws_db_redshift_create
#' @return a list of cluster details
#' @keywords internal
instance_con_info <- function(id) {
deets <- instance_details()$DBInstances
z <- Filter(function(x) x$DBInstanceIdentifier == id, deets)[[1]]
list(
host = z$Endpoint$Address,
port = z$Endpoint$Port,
dbname = z$DBName,
engine = z$Engine,
class = z$DBInstanceClass,
status = z$DBInstanceStatus
)
}

#' Get instance status
#' @export
#' @inheritParams aws_db_rds_create
#' @return (character) the status of the instance, e.g., "creating",
#' "available", "not found"
#' @examples \dontrun{
#' aws_db_instance_status(id = "thedbinstance")
#' }
aws_db_instance_status <- function(id) {
deets <- instance_details()$DBInstances
instance <- Filter(function(x) x$DBInstanceIdentifier == id, deets)
if (!length(instance)) {
warning(glue::glue("instance id '{id}' not found"))
return("not found")
}
instance[[1]]$DBInstanceStatus
}
Loading

0 comments on commit c76a9b2

Please sign in to comment.