Skip to content

Commit

Permalink
Stability fixes for benchmarks/Neo4j and process monitoring
Browse files Browse the repository at this point in the history
  • Loading branch information
DavidBakerEffendi committed Jul 19, 2024
1 parent 449c605 commit df28007
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import overflowdb.{BatchedUpdate, DetachedNodeData}

import java.util
import java.util.concurrent.atomic.AtomicBoolean
import scala.compiletime.uninitialized
import scala.jdk.CollectionConverters
import scala.jdk.CollectionConverters.*
import scala.util.{Failure, Success, Try, Using}
Expand All @@ -27,24 +28,29 @@ final class Neo4jEmbeddedDriver(
) extends IDriver
with ISchemaSafeDriver {

private val logger = LoggerFactory.getLogger(getClass)
private val connected = new AtomicBoolean(true)
private var managementService = new DatabaseManagementServiceBuilder(databaseDir.path).build()
registerShutdownHook(managementService)
private var graphDb = managementService.database(databaseName)

private def registerShutdownHook(managementService: DatabaseManagementService): Unit = {
Runtime.getRuntime.addShutdownHook(new Thread() {
override def run(): Unit = {
managementService.shutdown()
}
})
}
private val logger = LoggerFactory.getLogger(getClass)
private val connected = new AtomicBoolean(false)
private var managementService: DatabaseManagementService = uninitialized
private var graphDb: GraphDatabaseService = uninitialized
connect()

/** @return
* a direct reference to the underlying graph database service.
*/
def graph: GraphDatabaseService = graphDb

private def connect(): Unit = {

def registerShutdownHook(managementService: DatabaseManagementService): Unit = {
Runtime.getRuntime.addShutdownHook(new Thread() {
override def run(): Unit = {
managementService.shutdown()
}
})
}

managementService = new DatabaseManagementServiceBuilder(databaseDir.path).build()
registerShutdownHook(managementService)
graphDb = managementService.database(databaseName)
connected.set(true)
}
Expand Down
91 changes: 79 additions & 12 deletions runBenchmarks.sc
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
import java.io.{BufferedReader, File, FileReader}
import scala.sys.process.*
import java.nio.file.Files
import java.nio.file.Path
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future, blocking}
import scala.jdk.CollectionConverters.*
import scala.concurrent.ExecutionContext.Implicits.global

// Combinations of driver, project, Gb mem, known to OOM
val oomCombinations: Set[(String, String, Int)] = Set(("tinkergraph", "compress", 2))
val drivers = Seq(
"overflowdb",
"tinkergraph",
"neo4j-embedded"
)

@main def main(): Unit = {
println("[info] Ensuring compilation status and benchmark dataset availability...")
Expand All @@ -10,39 +22,94 @@ import scala.jdk.CollectionConverters.*
val datasetDir = Path.of("workspace", "defects4j")
val resultsDir = Path.of("results").createIfNotExists

def benchmarkArgs(driver: String, project: String, memGb: Int): String = {
val projectDir = Path.of(datasetDir.toString, project)
val projectName = project.toLowerCase.stripSuffix(".jar")
def benchmarkArgs(driver: String, project: String, memGb: Int): JmhProcessInfo = {
val projectDir = Path.of(datasetDir.toString, project)
val projectName = project.toLowerCase.stripSuffix(".jar")
val driverResultsDir = Path.of(resultsDir.toString, driver, projectName).createIfNotExists
val resultsPath = Path.of(driverResultsDir.toString, s"results-Xmx${memGb}G")
val outputPath = Path.of(driverResultsDir.toString, s"output-Xmx${memGb}G")
s"Jmh/runMain com.github.plume.oss.Benchmark $driver $projectDir -o ${outputPath.toAbsolutePath} -r ${resultsPath.toAbsolutePath} -m $memGb"
val resultsPath = Path.of(driverResultsDir.toString, s"results-Xmx${memGb}G")
val outputPath = Path.of(driverResultsDir.toString, s"output-Xmx${memGb}G")
val resultsExist =
Path.of(s"$resultsPath-read.csv").toFile.exists() && Path.of(s"$outputPath-read.txt").toFile.exists()
val cmd =
s"Jmh/runMain com.github.plume.oss.Benchmark $driver $projectDir -o ${outputPath.toAbsolutePath} -r ${resultsPath.toAbsolutePath} -m $memGb"
JmhProcessInfo(cmd, resultsExist, outputPath.toFile)
}

println("[info] Available projects:")
val projects = Files.list(datasetDir).filter(_.toString.endsWith(".jar")).toList.asScala.toList
projects.foreach(p => println(s" - ${p.getFileName.toString}"))

println("[info] Available drivers:")
val drivers = Seq("overflowdb", "tinkergraph", "neo4j-embedded")
drivers.foreach(d => println(s" - $d"))

val memoryConfigs = Seq(2, 4, 6, 8)

memoryConfigs.foreach { memConfig =>
memoryConfigs.reverse.foreach { memConfig =>
drivers.foreach { driver =>
projects.foreach { project =>
val cmd = benchmarkArgs(driver, project.getFileName.toString, memConfig)
println(s"[info] Benchmarking '$driver' on project '$project' with `-Xmx${memConfig}G`")
s"sbt \"$cmd\"".!
val projectName = project.getFileName.toString.toLowerCase.stripSuffix(".jar")
if (oomCombinations.contains(driver, projectName, memConfig)) {
println(
s"[info] '$driver' on project '$project' with `-Xmx${memConfig}G` will cause an OutOfMemoryException. Skipping..."
)
} else {
val JmhProcessInfo(cmd, resultsExist, outputFile) =
benchmarkArgs(driver, project.getFileName.toString, memConfig)
if (resultsExist) {
println(
s"[info] Results for '$driver' on project '$project' with `-Xmx${memConfig}G` already exist. Skipping..."
)
} else {
println(s"[info] Benchmarking '$driver' on project '$project' with `-Xmx${memConfig}G`")
runAndMonitorBenchmarkProcess(cmd, outputFile)
}
}
}
}
}
}

def runAndMonitorBenchmarkProcess(cmd: String, outputFile: File): Unit = {
val processBuilder = Process(s"sbt \"$cmd\"")

// Start the process
val process = processBuilder.#>(outputFile).run()

// Monitor the output file for timeout messages
val timeoutFuture = Future {
blocking {
val reader = new BufferedReader(new FileReader(outputFile))
try {
var line: String = null
while ({
line = reader.readLine(); line != null
}) {
println(line) // Log the output
if (line.contains("benchmark timed out")) {
println("Timeout detected. Terminating process...")
process.destroy()
return
} else if (line.contains("java.lang.OutOfMemoryError")) {
println("OutOfMemory error detected. Terminating process...")
process.destroy()
return
}
}
} finally {
reader.close()
}
}
}

// Wait for the process to finish or timeout monitoring to detect a timeout
Await.result(timeoutFuture, Duration.Inf)
}

implicit class PathExt(x: Path) {
def createIfNotExists: Path = {
if (!Files.exists(x)) Files.createDirectories(x)
x
}
}
}

case class JmhProcessInfo(cmd: String, resultsExist: Boolean, outputFile: File)
24 changes: 11 additions & 13 deletions src/main/scala/com/github/plume/oss/Benchmark.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,12 @@ import com.github.plume.oss.benchmarking.{
TinkerGraphReadBenchmark
}
import com.github.plume.oss.drivers.{IDriver, TinkerGraphDriver}
import org.cache2k.benchmark.jmh.ForcedGcMemoryProfiler
import org.cache2k.benchmark.jmh.HeapProfiler
import org.openjdk.jmh.annotations.Mode
import org.openjdk.jmh.runner.Runner
import org.openjdk.jmh.runner.options.{ChainedOptionsBuilder, OptionsBuilder, TimeValue}
import upickle.default.*

import java.util
import java.util.concurrent.TimeUnit

object Benchmark {

def main(args: Array[String]): Unit = {
Expand All @@ -27,6 +24,7 @@ object Benchmark {
.foreach { config =>
val writeOptsBenchmark = createOptionsBoilerPlate(config, WRITE)
.include(classOf[GraphWriteBenchmark].getSimpleName)
.warmupIterations(5)
.build()
new Runner(writeOptsBenchmark).run()
println(
Expand All @@ -38,18 +36,21 @@ object Benchmark {
Option(
createOptionsBoilerPlate(config, READ)
.include(classOf[TinkerGraphReadBenchmark].getSimpleName)
.warmupIterations(1)
.build()
)
case _: OverflowDbConfig =>
Option(
createOptionsBoilerPlate(config, READ)
.include(classOf[OverflowDbReadBenchmark].getSimpleName)
.warmupIterations(1)
.build()
)
case _: Neo4jEmbeddedConfig =>
Option(
createOptionsBoilerPlate(config, READ)
.include(classOf[Neo4jEmbedReadBenchmark].getSimpleName)
.warmupIterations(1)
.build()
)
case x =>
Expand All @@ -68,16 +69,13 @@ object Benchmark {

private def createOptionsBoilerPlate(config: PlumeConfig, benchmarkType: BenchmarkType): ChainedOptionsBuilder = {
new OptionsBuilder()
.addProfiler(classOf[ForcedGcMemoryProfiler])
.warmupIterations(1)
.warmupTime(TimeValue.seconds(1))
.measurementTime(TimeValue.seconds(2))
.measurementIterations(5)
.addProfiler(classOf[HeapProfiler])
.warmupTime(TimeValue.seconds(30))
.measurementIterations(3)
.mode(Mode.AverageTime)
.timeUnit(TimeUnit.NANOSECONDS)
.forks(2)
.output(s"${config.jmhOutputFile}-$benchmarkType.txt")
.result(s"${config.jmhResultFile}-$benchmarkType.csv")
.forks(1)
.output(s"${config.jmhOutputFile}-${benchmarkType.toString.toLowerCase}.txt")
.result(s"${config.jmhResultFile}-${benchmarkType.toString.toLowerCase}.csv")
.param("configStr", write(config))
.jvmArgsAppend(s"-Xmx${config.jmhMemoryGb}G", "-XX:+UseZGC")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,27 @@ import com.github.plume.oss
import com.github.plume.oss.{Benchmark, JimpleAst2Database, PlumeConfig, TinkerGraphConfig}
import com.github.plume.oss.drivers.{IDriver, TinkerGraphDriver}
import io.joern.jimple2cpg.Config
import org.openjdk.jmh.annotations.{Benchmark, Level, Param, Scope, Setup, State, TearDown, Timeout}
import io.shiftleft.codepropertygraph.generated.{NodeTypes, PropertyNames}
import org.openjdk.jmh.annotations.{
Benchmark,
Level,
Measurement,
OutputTimeUnit,
Param,
Scope,
Setup,
State,
TearDown,
Timeout
}
import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole}

import java.util.concurrent.TimeUnit
import scala.compiletime.uninitialized

@State(Scope.Benchmark)
@Timeout(2, TimeUnit.MINUTES)
@Timeout(5, TimeUnit.MINUTES)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
trait GraphReadBenchmark {

@Param(Array(""))
Expand Down Expand Up @@ -40,7 +53,7 @@ trait GraphReadBenchmark {
val (driver_, config_) = oss.Benchmark.initializeDriverAndInputDir(configStr, useCachedGraph = true)
driver = driver_
config = config_
if (!driver.exists(1L)) {
if (driver.propertyFromNodes(NodeTypes.FILE, PropertyNames.NAME).isEmpty) {
JimpleAst2Database(driver).createAst(Config().withInputPath(config_.inputDir))
config.dbConfig match {
case TinkerGraphConfig(_, Some(exportPath)) => driver.asInstanceOf[TinkerGraphDriver].exportGraph(exportPath)
Expand All @@ -60,29 +73,41 @@ trait GraphReadBenchmark {
protected def setUpMethodFullName(): Array[String]

@Benchmark
@Measurement(time = 5, timeUnit = TimeUnit.SECONDS)
def astDFS(blackhole: Blackhole): Int

@Benchmark
@Measurement(time = 5, timeUnit = TimeUnit.SECONDS)
def astUp(blackhole: Blackhole): Int

@Benchmark
@Measurement(time = 5, timeUnit = TimeUnit.SECONDS)
def orderSum(blackhole: Blackhole): Int

@Benchmark
@Measurement(time = 5, timeUnit = TimeUnit.SECONDS)
def callOrderTrav(blackhole: Blackhole): Int

@Benchmark
@Measurement(time = 5, timeUnit = TimeUnit.SECONDS)
def callOrderExplicit(blackhole: Blackhole): Int

@Benchmark
@Measurement(time = 5, timeUnit = TimeUnit.SECONDS)
def indexedMethodFullName(bh: Blackhole): Unit

@Benchmark
@Measurement(time = 5, timeUnit = TimeUnit.SECONDS)
def unindexedMethodFullName(bh: Blackhole): Unit

@TearDown
def cleanupBenchmark(): Unit = {
driver.close()
}

@TearDown(Level.Iteration)
def teardown(): Unit = {
System.gc()
}

}
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
package com.github.plume.oss.benchmarking

import com.github.plume.oss
import com.github.plume.oss.{Benchmark, JimpleAst2Database}
import com.github.plume.oss.drivers.IDriver
import com.github.plume.oss.{Benchmark, JimpleAst2Database}
import io.joern.jimple2cpg.Config
import org.openjdk.jmh.annotations.{Benchmark, Level, Param, Scope, Setup, State, TearDown, Timeout}
import org.openjdk.jmh.annotations.*
import org.openjdk.jmh.infra.{BenchmarkParams, Blackhole}

import java.util.concurrent.TimeUnit
import scala.compiletime.uninitialized

@State(Scope.Benchmark)
@Timeout(5, TimeUnit.MINUTES)
@Timeout(6, TimeUnit.MINUTES)
@OutputTimeUnit(TimeUnit.SECONDS)
class GraphWriteBenchmark {

@Param(Array(""))
Expand All @@ -32,9 +33,12 @@ class GraphWriteBenchmark {
}

@Benchmark
def createAst(blackhole: Blackhole): Unit = {
@Measurement(time = 10, timeUnit = TimeUnit.SECONDS)
def createAst(blackhole: Blackhole): Unit = try {
JimpleAst2Database(driver).createAst(Config().withInputPath(inputDir))
Option(blackhole).foreach(_.consume(driver))
} catch {
case e: Throwable => Option(blackhole).foreach(_.consume(e))
}

@TearDown
Expand All @@ -43,4 +47,9 @@ class GraphWriteBenchmark {
driver.close()
}

@TearDown(Level.Iteration)
def teardown(): Unit = {
System.gc()
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
new util.HashMap[String, Object](1) {
put("nodeIds", nodeStart.toList.asJava.asInstanceOf[Object])
}
).map(_.get("SIZE").asInstanceOf[Int])
).map(_.get("SIZE").asInstanceOf[Long].toInt)
.next()
}
Option(blackhole).foreach(_.consume(res))
Expand Down Expand Up @@ -239,7 +239,7 @@ class Neo4jEmbedReadBenchmark extends GraphReadBenchmark {
tx.execute(
s"""
|MATCH (n)
|WHERE n.$FULL_NAME = $$fullName and $METHOD IN labels(n)
|WHERE n.$FULL_NAME = $$fullName and \"$METHOD\" IN labels(n)
|RETURN n AS NODE
|""".stripMargin,
new util.HashMap[String, Object](1) {
Expand Down
Loading

0 comments on commit df28007

Please sign in to comment.