Skip to content

Commit

Permalink
Update rc.cmpd.get.pubchem.R
Browse files Browse the repository at this point in the history
add pathway retreive function
  • Loading branch information
cbroeckl committed Dec 29, 2022
1 parent 5c2ff6e commit 36fce5b
Showing 1 changed file with 47 additions and 21 deletions.
68 changes: 47 additions & 21 deletions R/rc.cmpd.get.pubchem.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#' @param assign.short.name = TRUE. If TRUE, short names from find.short.lipid.name and/or find.short.synonym = TRUE, short names are assigned the be the default annotation name ($ann slot), and original annotations are moved to $long.name slot.
#' @param all.props logical. If TRUE, all pubchem properties (https://pubchemdocs.ncbi.nlm.nih.gov/pug-rest$_Toc494865567) are returned. If false, only a subset (faster).
#' @param get.bioassays logical. If TRUE, return a table summarizing existing bioassay data for that CID.
#' @param get.pathways logical. If TRUE, return a table of metabolic pathways for that CID.
#' @param write.csv logical. If TRUE, write csv files of all returned pubchem data.
#' @param search.name character. optional name to assign to pubchem search to name output .csv files.
#' @return returns a list with one or more of $pubchem (compound name and identifiers) - one row in dataframe per CID; $properties contains physicochemical properties - one row in dataframe per CID; $vendors contains the number of vendors for a given compound and selects a vendor based on 'priority.vendors' supplied, or randomly choses a vendor with a HTML link - one row in dataframe per CID; $bioassays contains a summary of bioassay activity data from pubchem - zero to many rows in dataframe per CID
Expand All @@ -28,27 +29,28 @@
#'

rc.cmpd.get.pubchem <- function(
ramclustObj = NULL,
search.name = NULL,
cmpd.names = NULL,
cmpd.cid = NULL,
cmpd.inchikey = NULL,
cmpd.smiles = NULL,
use.parent.cid = FALSE,
manual.entry = FALSE,
get.vendors = FALSE,
priority.vendors = c("Sigma Aldrich", "Alfa Chemistry", "Acros Organics", "VWR",
"Alfa Aesar", "molport", "Key Organics", "BLD Pharm"),
get.properties = TRUE,
all.props = FALSE,
get.synonyms = TRUE,
find.short.lipid.name = TRUE,
find.short.synonym = TRUE,
max.name.length = 30,
assign.short.name = TRUE,
get.bioassays = FALSE,
write.csv = TRUE

ramclustObj = NULL,
search.name = NULL,
cmpd.names = NULL,
cmpd.cid = NULL,
cmpd.inchikey = NULL,
cmpd.smiles = NULL,
use.parent.cid = FALSE,
manual.entry = FALSE,
get.vendors = FALSE,
priority.vendors = c("Sigma Aldrich", "Alfa Chemistry", "Acros Organics", "VWR",
"Alfa Aesar", "molport", "Key Organics", "BLD Pharm"),
get.properties = TRUE,
all.props = FALSE,
get.synonyms = TRUE,
find.short.lipid.name = TRUE,
find.short.synonym = TRUE,
max.name.length = 30,
assign.short.name = TRUE,
get.bioassays = TRUE,
get.pathways = TRUE,
write.csv = TRUE

) {

## function to close failed pubchem queries to prevent
Expand Down Expand Up @@ -752,6 +754,30 @@ rc.cmpd.get.pubchem <- function(
}
}

if(get.pathways) {
cat("getting pathway from cid", '\n')

for(i in 1:length(cid)) {
get.pathways <- function(cid = 5793) {
url.pre <- "https://pubchem.ncbi.nlm.nih.gov/sdq/sdqagent.cgi?infmt=json&outfmt=csv&query={%22download%22:%22*%22,%22collection%22:%22pathway%22,%22where%22:{%22ands%22:[{%22cid%22:%22"
url.mid <- "%22},{%22core%22:%221%22}]},%22order%22:[%22taxname,asc%22],%22start%22:1,%22limit%22:10000000,%22downloadfilename%22:%22CID_"
url.post <- "_pathway%22}"

d <- suppressWarnings(utils::read.csv(paste0(url.pre, cid, url.mid, cid, url.post)))

if(!is.data.frame(d)) {cat("not a data.frame")}
return(d)
}
pathway <- get.pathways(cid = cids[i])
pathway <- cbind("cid" = rep(cids[i]), pathway)
if(!any(ls()=="pathways")) pathways <- pathway[0,]
if(is.na(pathway[1,'name'])) next
pathways <- rbind(pathways, pathway)

}
pubchem$pathways <- pathways
}

for(i in 1:length(pubchem)) {
if(!is.data.frame(pubchem[[i]])) next
if(nrow(pubchem[[i]]) == length(ramclustObj$cmpd)) {
Expand Down

0 comments on commit 36fce5b

Please sign in to comment.