Skip to content

Commit

Permalink
0.0.11 release. multiple group membership delimited by ;. --thread op…
Browse files Browse the repository at this point in the history
…tion. rewriting datastructures for time efficiency, etc
  • Loading branch information
reality committed Aug 23, 2021
1 parent 19a8b8a commit 44989f1
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 25 deletions.
2 changes: 1 addition & 1 deletion klarigi/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,4 @@ jacocoTestReport {
}
}

version = '0.0.10'
version = '0.0.11'
16 changes: 14 additions & 2 deletions klarigi/src/main/groovy/klarigi/App.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class App {
_ longOpt: 'output-classification-scores', 'Output classification scores and true/false labels for each group into files. Useful for generating AUCs.', type: Boolean
_ longOpt: 'output-exp-dataframe', "Output a TSV describing a 'data-frame' of categorical values for each term appearing in derived explanations. Easy to load into R and do stuff with.", type: Boolean

_ longOpt: 'threads', 'Number of threads to use, particularly for calculating scoring. This should speed things up a lot with larger datasets.', args: 1

_ longOpt: 'output', 'File to output results to. If not given, will print to stdout', args: 1
_ longOpt: 'print-members', 'Print members of groups by label (first column of data file). Only works with standard output (not LaTeX)', type: Boolean

Expand All @@ -66,10 +68,20 @@ class App {
cliBuilder.usage()
}

def threads = 1
if(o['threads']) {
try {
threads = Integer.parseInt(o['threads'])
} catch(e) {
println 'Warning: Could not parse --threads argument. Defaulting to 1.'
threads = 1
}
}

def k = new Klarigi(o)
if(!o['similarity-mode']) {
if(!o['group'] || (o['group'] && o['group'] == '*')) {
def allExplanations = k.explainAllClusters(o['output-scores'], o['power'])
def allExplanations = k.explainAllClusters(o['output-scores'], o['power'], threads)
allExplanations.each {
k.output(it.cluster, it.results, o['output-type'], o['print-members'], o['output'])
}
Expand All @@ -89,7 +101,7 @@ class App {
}
}
} else {
def r = k.explainCluster(o['group'], o['power'], o['output-scores'])
def r = k.explainCluster(o['group'], o['power'], o['output-scores'], threads)
k.output(o['group'], r, o['output-type'], o['print-members'], o['output'])

if(o['reclassify'] || o['output-exp-dataframe']) {
Expand Down
32 changes: 24 additions & 8 deletions klarigi/src/main/groovy/klarigi/Klarigi.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ public class Klarigi {
def icFactory

Klarigi(o) {
verbose = o['verbose']

loadData(o['data'])
loadOntology(o['ontology'])
loadIc(o['ic'], o['ontology'], o['data'], o['resnik-ic'], o['save-ic'], o['turtle'])
coefficients = Coefficients.Generate(o)
verbose = o['verbose']

if(o['output']) { // blank the output file, since we will subsequently append to it. all the output stuff could probs be better abstracted.
new File(o['output']).text = ''
Expand Down Expand Up @@ -64,14 +65,20 @@ public class Klarigi {
data.associations[entity][it] = true
}

if(!data.groupings.containsKey(group)) {
data.groupings[group] = []
group.tokenize(';').each { g ->
if(!data.groupings.containsKey(g)) {
data.groupings[g] = []
}
data.groupings[g] << entity
}
data.groupings[group] << entity
}
} catch(e) {
HandleError(e, verbose, "Error loading data file ($dataFile)")
}

if(verbose) {
println "Done loading dataset"
}
}

def loadIc(icFile, ontologyFile, annotFile, resnikIc, saveIc, turtle) {
Expand All @@ -94,6 +101,10 @@ public class Klarigi {
}
}

if(verbose) {
println "Done loading IC values"
}

if(saveIc) {
try {
InformationContent.Write(data.ic, saveIc)
Expand Down Expand Up @@ -140,11 +151,15 @@ public class Klarigi {
ontoHelper.dataFactory = manager.getOWLDataFactory()
ontoHelper.reasoner = elkFactory.createReasoner(ontology, config)
ontoHelper.labels = labels

if(verbose) {
println "Done loading the ontology"
}
}

def explainCluster(cid, powerMode, outputScores) {
def explainCluster(cid, powerMode, outputScores, threads) {
def scorer = new Scorer(ontoHelper, data)
def candidates = scorer.scoreClasses(cid)
def candidates = scorer.scoreClasses(cid, threads)

println "$cid: Scoring completed. Candidates: ${candidates.size()}"

Expand All @@ -156,16 +171,17 @@ public class Klarigi {
}
}

// TODO: now we have to, ah, add the multiprocessing
if(powerMode) {
StepDown.RunNewAlgorithm(coefficients, cid, candidates, data)
} else {
StepDown.Run(coefficients, cid, candidates, data)
}
}

def explainAllClusters(outputScores, powerMode) {
def explainAllClusters(outputScores, powerMode, threads) {
data.groupings.collect { g, v ->
[ cluster: g, results: explainCluster(g, powerMode, outputScores) ]
[ cluster: g, results: explainCluster(g, powerMode, outputScores, threads) ]
}
}

Expand Down
44 changes: 31 additions & 13 deletions klarigi/src/main/groovy/klarigi/Scorer.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ public class Scorer {
this.data = data
}

// so what if we could build a list of the classes of interest and their subclasses, and then go through it once
private def processClass(explainers, cid, c) {
if(explainers.containsKey(c)) { return; }
def ce = ontoHelper.dataFactory.getOWLClass(IRI.create(c))
Expand All @@ -31,13 +32,6 @@ public class Scorer {
]
explainers[c].inclusion = explainers[c].internalIncluded.size()
explainers[c].exclusion = explainers[c].externalIncluded.size()

ontoHelper.reasoner.getSuperClasses(ce, true).each { n ->
n.getEntities().each { sc ->
def strc = sc.getIRI().toString()
processClass(explainers, cid, strc)
}
}
}

private def normalise(explainers, cid) {
Expand All @@ -58,14 +52,38 @@ public class Scorer {
}
}

def scoreClasses(cid) {
def classList = data.associations.collect { k, v -> v.collect { kk, vv -> kk } }.flatten().unique(false) // all classes used in entity descriptions
def explainers = [:]
//GParsPool.withPool(4) { p ->
classList.each {
private def findRelevantClasses(relevant, c) {
if(relevant.contains(c)) { return; }
relevant << c

def ce = ontoHelper.dataFactory.getOWLClass(IRI.create(c))
ontoHelper.reasoner.getSuperClasses(ce, false).each { n ->
n.getEntities().each { sc ->
def strc = sc.getIRI().toString()
findRelevantClasses(relevant, strc)
}
}

return relevant
}

def scoreClasses(cid, threads) {
// quick, though it should probably be built elsewhere
def classMap = [:]
data.associations.each { k, v -> v.collect { kk, vv -> classMap[kk] = true }}
def classList = classMap.collect { k, v -> k }

def relevant = []
classList.each {
findRelevantClasses(relevant, it)
}

def explainers = new ConcurrentHashMap()
GParsPool.withPool(threads) { p ->
relevant.eachParallel {
processClass(explainers, cid, it)
}
//}
}
explainers = normalise(explainers, cid) // Note, this turns it into a list rather than a hashmap
explainers
}
Expand Down
2 changes: 1 addition & 1 deletion klarigi/src/test/groovy/klarigi/KlarigiTest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class KlarigiTest extends Specification {
def "test_scoring"() {
when:
def clusterId = "OMIM:604271"
explanations = s.scoreClasses(clusterId)
explanations = s.scoreClasses(clusterId, 1)
def items = [
[
term: "http://purl.obolibrary.org/obo/HP_0004322",
Expand Down

0 comments on commit 44989f1

Please sign in to comment.