diff --git a/framework/codemodder-base/src/main/java/io/codemodder/CLI.java b/framework/codemodder-base/src/main/java/io/codemodder/CLI.java index f0297d0dd..c5b6bf5f5 100644 --- a/framework/codemodder-base/src/main/java/io/codemodder/CLI.java +++ b/framework/codemodder-base/src/main/java/io/codemodder/CLI.java @@ -366,10 +366,15 @@ public Integer call() throws IOException { log.debug("excluding paths: {}", pathExcludes); // get all files that match + log.debug("Listing source directories"); List sourceDirectories = sourceDirectoryLister.listJavaSourceDirectories(List.of(projectDirectory)); + + log.debug("Listing files"); List filePaths = fileFinder.findFiles(projectPath, includesExcludes); + log.debug("Creating codemod regulator"); + // get codemod includes/excludes final CodemodRegulator regulator; if (codemodIncludes != null && codemodExcludes != null) { @@ -386,10 +391,13 @@ public Integer call() throws IOException { } // create the loader + log.debug("Loading input files"); CodeDirectory codeDirectory = new DefaultCodeDirectory(projectPath); List sarifFiles = convertToPaths(sarifs); List sonarIssuesJsonFiles = convertToPaths(sonarIssuesJsonFilePaths); List sonarHotspotJsonFiles = convertToPaths(sonarHotspotsJsonFilePaths); + + log.debug("Parsing SARIFs"); Map> pathSarifMap = SarifParser.create().parseIntoMap(sarifFiles, codeDirectory); List codemodParameters = diff --git a/framework/codemodder-base/src/main/java/io/codemodder/CodemodLoader.java b/framework/codemodder-base/src/main/java/io/codemodder/CodemodLoader.java index 44ce46684..75640b1b6 100644 --- a/framework/codemodder-base/src/main/java/io/codemodder/CodemodLoader.java +++ b/framework/codemodder-base/src/main/java/io/codemodder/CodemodLoader.java @@ -13,6 +13,8 @@ import java.util.regex.Pattern; import javax.inject.Inject; import org.jetbrains.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** This type is responsible for loading codemods and the surrounding subsystem. */ public final class CodemodLoader { @@ -33,6 +35,8 @@ public CodemodLoader( final Path defectDojoFindingsJsonFile, final Path contrastVulnerabilitiesXmlFilePath) { + log.debug("Loading providers"); + // get all the providers ready for dependency injection & codemod instantiation final List providers = ServiceLoader.load(CodemodProvider.class).stream() @@ -106,6 +110,7 @@ public CodemodLoader( wantsSarif.stream() .flatMap(toolName -> ruleSarifByTool.getOrDefault(toolName, List.of()).stream()) .toList(); + log.debug("Loading modules from provider: {}", provider.getClass().getSimpleName()); final Set modules = provider.getModules( repositoryDir, @@ -125,7 +130,9 @@ public CodemodLoader( final List codemods = new ArrayList<>(); // validate and instantiate the codemods + log.debug("Instantiating codemods"); final Injector injector = Guice.createInjector(allModules); + log.debug("Codemods instantiated"); final Set codemodIds = new HashSet<>(); for (final Class type : orderedCodemodTypes) { final Codemod codemodAnnotation = type.getAnnotation(Codemod.class); @@ -164,6 +171,8 @@ static boolean isValidCodemodId(final String codemodId) { return codemodIdPattern.matcher(codemodId).matches(); } + private static final Logger log = LoggerFactory.getLogger(CodemodLoader.class); + private static final Pattern codemodIdPattern = Pattern.compile("^([A-Za-z0-9]+):(([A-Za-z0-9]+)/)+([A-Za-z0-9\\-\\.]+)$"); } diff --git a/framework/codemodder-base/src/main/java/io/codemodder/DefaultSarifParser.java b/framework/codemodder-base/src/main/java/io/codemodder/DefaultSarifParser.java index d1a6ba370..0c7b5b4fc 100644 --- a/framework/codemodder-base/src/main/java/io/codemodder/DefaultSarifParser.java +++ b/framework/codemodder-base/src/main/java/io/codemodder/DefaultSarifParser.java @@ -5,6 +5,7 @@ import com.contrastsecurity.sarif.SarifSchema210; import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.*; @@ -17,10 +18,14 @@ final class DefaultSarifParser implements SarifParser { private Optional readSarifFile(final Path sarifFile) { try { - return Optional.of( - new ObjectMapper().readValue(Files.newInputStream(sarifFile), SarifSchema210.class)); + log.debug("Reading input file: {}", sarifFile); + InputStream stream = Files.newInputStream(sarifFile); + log.debug("Parsing to SARIF input files"); + SarifSchema210 sarif = new ObjectMapper().readValue(stream, SarifSchema210.class); + log.debug("Parsed SARIF input files"); + return Optional.of(sarif); } catch (final IOException e) { - LOG.error("Problem deserializing SARIF file: {}", sarifFile, e); + log.error("Problem deserializing SARIF file: {}", sarifFile, e); return Optional.empty(); } } @@ -33,6 +38,7 @@ private Optional> tryToBuild( final CodeDirectory codeDirectory, final List factories) { for (final var factory : factories) { + log.debug("Building SARIF: {}", factory.getClass().getSimpleName()); final var maybeRuleSarif = factory.build(toolName, rule.ruleId, rule.messageText, sarif, codeDirectory); if (maybeRuleSarif.isPresent()) { @@ -40,7 +46,7 @@ private Optional> tryToBuild( } } - LOG.info("Found SARIF from unsupported tool: {}", toolName); + log.info("Found SARIF from unsupported tool: {}", toolName); return Optional.empty(); } @@ -60,7 +66,7 @@ private RuleDescriptor extractRuleId(final Result result, final Run run) { if (maybeRule.isPresent()) { return maybeRule.get(); } else { - LOG.info("Could not find rule id for result."); + log.info("Could not find rule id for result."); return null; } } @@ -72,10 +78,12 @@ private Stream> fromSarif( final Run run, final SarifSchema210 sarif, final CodeDirectory codeDirectory) { // driver name final var toolName = run.getTool().getDriver().getName(); + log.debug("Loading SARIF rule factories"); final List factories = ServiceLoader.load(RuleSarifFactory.class).stream() .map(ServiceLoader.Provider::get) .toList(); + log.debug("Done loading SARIF rule factories"); final var runResults = run.getResults(); final var allResults = runResults != null @@ -105,16 +113,18 @@ public Map> parseIntoMap( .flatMap( sarif -> sarif.getRuns().stream().flatMap(run -> fromSarif(run, sarif, codeDirectory))) .forEach( - p -> - map.merge( - p.getKey(), - new ArrayList<>(Collections.singletonList(p.getValue())), - (l1, l2) -> { - l1.add(l2.get(0)); - return l1; - })); + p -> { + log.debug("Merging SARIF results"); + map.merge( + p.getKey(), + new ArrayList<>(Collections.singletonList(p.getValue())), + (l1, l2) -> { + l1.add(l2.get(0)); + return l1; + }); + }); return map; } - private static final Logger LOG = LoggerFactory.getLogger(DefaultSarifParser.class); + private static final Logger log = LoggerFactory.getLogger(DefaultSarifParser.class); } diff --git a/plugins/codemodder-plugin-appscan/src/main/java/io/codemodder/providers/sarif/appscan/AppScanRuleSarif.java b/plugins/codemodder-plugin-appscan/src/main/java/io/codemodder/providers/sarif/appscan/AppScanRuleSarif.java index 93fd6ebab..016c0d047 100644 --- a/plugins/codemodder-plugin-appscan/src/main/java/io/codemodder/providers/sarif/appscan/AppScanRuleSarif.java +++ b/plugins/codemodder-plugin-appscan/src/main/java/io/codemodder/providers/sarif/appscan/AppScanRuleSarif.java @@ -1,12 +1,11 @@ package io.codemodder.providers.sarif.appscan; import com.contrastsecurity.sarif.*; -import io.codemodder.CodeDirectory; import io.codemodder.RuleSarif; -import java.io.IOException; -import java.io.UncheckedIOException; import java.nio.file.Path; import java.util.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** A {@link RuleSarif} for AppScan results. */ final class AppScanRuleSarif implements RuleSarif { @@ -14,45 +13,20 @@ final class AppScanRuleSarif implements RuleSarif { private final SarifSchema210 sarif; private final String messageText; private final Map> resultsCache; - private final List locations; - - /** A map of a AppScan SARIF "location" URIs mapped to their respective file paths. */ - private final Map> artifactLocationIndices; + private final AppScanSarifLocationData sarifLocationData; /** * Creates an {@link AppScanRuleSarif} that has already done the work of mapping AppScan SARIF * locations, which are strange combinations of class name and file path, into predictable paths. */ AppScanRuleSarif( - final String messageText, final SarifSchema210 sarif, final CodeDirectory codeDirectory) { + final String messageText, + final SarifSchema210 sarif, + final AppScanSarifLocationData sarifLocationData) { this.sarif = Objects.requireNonNull(sarif); this.messageText = Objects.requireNonNull(messageText); this.resultsCache = new HashMap<>(); - this.locations = - sarif.getRuns().get(0).getArtifacts().stream() - .map(Artifact::getLocation) - .map(ArtifactLocation::getUri) - .map(u -> u.substring(8)) // trim the file:/// of all results - .toList(); - Map> artifactLocationIndicesMap = new HashMap<>(); - - for (int i = 0; i < locations.size(); i++) { - final Integer index = i; - String path = locations.get(i); - path = path.replace('\\', '/'); - // we have a real but partial path, now we have to find it in the repository - Optional existingRealPath; - try { - existingRealPath = codeDirectory.findFilesWithTrailingPath(path); - } catch (IOException e) { - throw new UncheckedIOException(e); - } - - // add to the map if we found a matching file - existingRealPath.ifPresent( - p -> artifactLocationIndicesMap.computeIfAbsent(p, k -> new HashSet<>()).add(index)); - } - this.artifactLocationIndices = Map.copyOf(artifactLocationIndicesMap); + this.sarifLocationData = Objects.requireNonNull(sarifLocationData); } @Override @@ -71,6 +45,9 @@ public List getRegionsFromResultsByRule(final Path path) { */ @Override public List getResultsByLocationPath(final Path path) { + + Map> artifactLocationIndices = + sarifLocationData.getArtifactLocationIndices(); return resultsCache.computeIfAbsent( path, p -> @@ -117,4 +94,6 @@ public String getRule() { } static final String toolName = "HCL AppScan Static Analyzer"; + + private static final Logger log = LoggerFactory.getLogger(AppScanRuleSarif.class); } diff --git a/plugins/codemodder-plugin-appscan/src/main/java/io/codemodder/providers/sarif/appscan/AppScanRuleSarifFactory.java b/plugins/codemodder-plugin-appscan/src/main/java/io/codemodder/providers/sarif/appscan/AppScanRuleSarifFactory.java index 3f733a786..8710d244c 100644 --- a/plugins/codemodder-plugin-appscan/src/main/java/io/codemodder/providers/sarif/appscan/AppScanRuleSarifFactory.java +++ b/plugins/codemodder-plugin-appscan/src/main/java/io/codemodder/providers/sarif/appscan/AppScanRuleSarifFactory.java @@ -4,11 +4,18 @@ import io.codemodder.CodeDirectory; import io.codemodder.RuleSarif; import io.codemodder.RuleSarifFactory; -import java.util.Optional; +import java.util.*; /** A factory for building {@link AppScanRuleSarif}s. */ public final class AppScanRuleSarifFactory implements RuleSarifFactory { + /** A map of a AppScan SARIF "location" URIs mapped to their respective file paths. */ + private final Map sarifLocationDataCache; + + public AppScanRuleSarifFactory() { + this.sarifLocationDataCache = new HashMap<>(); + } + @Override public Optional build( final String toolName, @@ -17,7 +24,12 @@ public Optional build( final SarifSchema210 sarif, final CodeDirectory codeDirectory) { if (AppScanRuleSarif.toolName.equals(toolName)) { - return Optional.of(new AppScanRuleSarif(messageText, sarif, codeDirectory)); + AppScanSarifLocationData sarifLocationData = sarifLocationDataCache.get(sarif); + if (sarifLocationData == null) { + sarifLocationData = new AppScanSarifLocationData(sarif, codeDirectory); + sarifLocationDataCache.put(sarif, sarifLocationData); + } + return Optional.of(new AppScanRuleSarif(messageText, sarif, sarifLocationData)); } return Optional.empty(); } diff --git a/plugins/codemodder-plugin-appscan/src/main/java/io/codemodder/providers/sarif/appscan/AppScanSarifLocationData.java b/plugins/codemodder-plugin-appscan/src/main/java/io/codemodder/providers/sarif/appscan/AppScanSarifLocationData.java new file mode 100644 index 000000000..ca2acc11a --- /dev/null +++ b/plugins/codemodder-plugin-appscan/src/main/java/io/codemodder/providers/sarif/appscan/AppScanSarifLocationData.java @@ -0,0 +1,55 @@ +package io.codemodder.providers.sarif.appscan; + +import com.contrastsecurity.sarif.Artifact; +import com.contrastsecurity.sarif.ArtifactLocation; +import com.contrastsecurity.sarif.SarifSchema210; +import io.codemodder.CodeDirectory; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Path; +import java.util.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Holds the locations of the artifacts in the SARIF file. */ +final class AppScanSarifLocationData { + + private final Map> artifactLocationIndices; + + AppScanSarifLocationData(final SarifSchema210 sarif, final CodeDirectory codeDirectory) { + log.debug("Cleaning locations"); + List locations = + sarif.getRuns().get(0).getArtifacts().stream() + .map(Artifact::getLocation) + .map(ArtifactLocation::getUri) + .map(u -> u.substring(8)) // trim the file:/// of all results + .toList(); + + Map> artifactLocationIndicesMap = new HashMap<>(); + + log.debug("Calculating locations in project dir"); + for (int i = 0; i < locations.size(); i++) { + final Integer index = i; + String path = locations.get(i); + path = path.replace('\\', '/'); + // we have a real but partial path, now we have to find it in the repository + Optional existingRealPath; + try { + existingRealPath = codeDirectory.findFilesWithTrailingPath(path); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + // add to the map if we found a matching file + existingRealPath.ifPresent( + p -> artifactLocationIndicesMap.computeIfAbsent(p, k -> new HashSet<>()).add(index)); + } + log.debug("Done calculating locations"); + this.artifactLocationIndices = Map.copyOf(artifactLocationIndicesMap); + } + + Map> getArtifactLocationIndices() { + return artifactLocationIndices; + } + + private static final Logger log = LoggerFactory.getLogger(AppScanSarifLocationData.class); +} diff --git a/plugins/codemodder-plugin-appscan/src/test/java/io/codemodder/providers/sarif/appscan/AppScanModuleTest.java b/plugins/codemodder-plugin-appscan/src/test/java/io/codemodder/providers/sarif/appscan/AppScanModuleTest.java index 90a92285b..029eeed58 100644 --- a/plugins/codemodder-plugin-appscan/src/test/java/io/codemodder/providers/sarif/appscan/AppScanModuleTest.java +++ b/plugins/codemodder-plugin-appscan/src/test/java/io/codemodder/providers/sarif/appscan/AppScanModuleTest.java @@ -21,7 +21,7 @@ final class AppScanModuleTest { @Codemod( - id = "appscan-test:java/finds-stuff", + id = "appscan:java/finds-stuff", importance = Importance.LOW, reviewGuidance = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW) static class AppScanSarifTestCodemod implements CodeChanger { diff --git a/plugins/codemodder-plugin-pmd/src/main/java/io/codemodder/providers/sarif/pmd/PmdModule.java b/plugins/codemodder-plugin-pmd/src/main/java/io/codemodder/providers/sarif/pmd/PmdModule.java index 51afaa1ca..45ab4d437 100644 --- a/plugins/codemodder-plugin-pmd/src/main/java/io/codemodder/providers/sarif/pmd/PmdModule.java +++ b/plugins/codemodder-plugin-pmd/src/main/java/io/codemodder/providers/sarif/pmd/PmdModule.java @@ -3,9 +3,7 @@ import com.contrastsecurity.sarif.Result; import com.contrastsecurity.sarif.SarifSchema210; import com.google.inject.AbstractModule; -import io.codemodder.CodeChanger; -import io.codemodder.LazyLoadingRuleSarif; -import io.codemodder.RuleSarif; +import io.codemodder.*; import io.github.classgraph.*; import java.lang.reflect.Executable; import java.lang.reflect.Parameter; @@ -83,6 +81,11 @@ protected void configure() { } } + if (scanTargets.isEmpty()) { + LOG.trace("No @PmdScan annotations found, skipping"); + return; + } + SarifSchema210 allRulesBatchedRun = pmdRunner.run( scanTargets.stream().map(PmdScanTarget::pmdScan).map(PmdScan::ruleId).toList(), diff --git a/plugins/codemodder-plugin-pmd/src/test/java/io/codemodder/providers/sarif/pmd/PmdModuleTest.java b/plugins/codemodder-plugin-pmd/src/test/java/io/codemodder/providers/sarif/pmd/PmdModuleTest.java index 8d635de2e..97ff63c3c 100644 --- a/plugins/codemodder-plugin-pmd/src/test/java/io/codemodder/providers/sarif/pmd/PmdModuleTest.java +++ b/plugins/codemodder-plugin-pmd/src/test/java/io/codemodder/providers/sarif/pmd/PmdModuleTest.java @@ -46,7 +46,7 @@ public abstract class MultipleDeclarations { } @Codemod( - id = "pmd-test:java/my-pmd-codemod", + id = "pmd:java/my-pmd-codemod", importance = Importance.HIGH, reviewGuidance = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW) static class UsesPmdCodemod extends SarifPluginJavaParserChanger { diff --git a/plugins/codemodder-plugin-semgrep/src/main/java/io/codemodder/providers/sarif/semgrep/SemgrepModule.java b/plugins/codemodder-plugin-semgrep/src/main/java/io/codemodder/providers/sarif/semgrep/SemgrepModule.java index 3f086d991..9a5b8fefd 100644 --- a/plugins/codemodder-plugin-semgrep/src/main/java/io/codemodder/providers/sarif/semgrep/SemgrepModule.java +++ b/plugins/codemodder-plugin-semgrep/src/main/java/io/codemodder/providers/sarif/semgrep/SemgrepModule.java @@ -3,9 +3,7 @@ import com.contrastsecurity.sarif.Result; import com.contrastsecurity.sarif.SarifSchema210; import com.google.inject.AbstractModule; -import io.codemodder.CodeChanger; -import io.codemodder.LazyLoadingRuleSarif; -import io.codemodder.RuleSarif; +import io.codemodder.*; import io.github.classgraph.*; import java.io.IOException; import java.io.UncheckedIOException; @@ -44,7 +42,7 @@ public SemgrepModule( new DefaultSemgrepRuleFactory()); } - public SemgrepModule( + SemgrepModule( final Path codeDirectory, final List includePatterns, final List excludePatterns, @@ -143,6 +141,14 @@ protected void configure() { } } + if (rules.isEmpty()) { + // no active codemods have rules to bind for, so we can return + LOG.info("No active codemods have Semgrep rules to bind for JIT scanning"); + return; + } else { + LOG.info("Found {} Semgrep rules to bind for JIT scanning", rules.size()); + } + /* * To avoid running semgrep and eating heavy, redundant file I/O for every codemod, we'll run it once with all rules, calculate which rules didn't "hit", and then storing an empty result for them. This will allow us to only run Semgrep on the rules for which we have evidence will hit. Given that we don't expect most projects to hit most codemods, this is a big time-savings. */