Skip to content

Commit

Permalink
Support use of Maven to resolve all dependencies.
Browse files Browse the repository at this point in the history
Signed-off-by: Gijs Calis <gijs.calis@triopsys.nl>

Signed-off-by: Gijs Calis <51088038+GijsCalis@users.noreply.github.com>
  • Loading branch information
GijsCalis committed Feb 24, 2024
1 parent 170ac07 commit ab5525f
Show file tree
Hide file tree
Showing 20 changed files with 933 additions and 4 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,14 @@ java:
# this option is helpful for when the parent pom has more data,
# that is not accessible from within the final built artifact
use-network: false
# Enables use of Maven application to resolve found pom.xml files, ensures all properties are resolved
# and all configuration is inherited from parent poms.
# When `use-network` is false, Maven will work in offline mode. If a Maven build has been run before
# the scan, all required pom files will be available locally.
# If Maven is not available a warning will be logged and the unresolved pom.xml will be used.
use-maven: true
# Command used to run Maven. May include full path.
maven-command: "mvn"

linux-kernel:
# whether to catalog linux kernel modules found within lib/modules/** directories
Expand Down
2 changes: 2 additions & 0 deletions cmd/syft/internal/options/catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config {
},
JavaArchive: java.DefaultArchiveCatalogerConfig().
WithUseNetwork(cfg.Java.UseNetwork).
WithUseMaven(cfg.Java.UseMaven).
WithMavenCommand(cfg.Java.MavenCommand).
WithMavenBaseURL(cfg.Java.MavenURL).
WithArchiveTraversal(archiveSearch, cfg.Java.MaxParentRecursiveDepth),
}
Expand Down
2 changes: 2 additions & 0 deletions cmd/syft/internal/options/java.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package options

type javaConfig struct {
UseNetwork bool `yaml:"use-network" json:"use-network" mapstructure:"use-network"`
UseMaven bool `yaml:"use-maven" json:"use-maven" mapstructure:"use-maven"`
MavenCommand string `yaml:"maven-command" json:"maven-command" mapstructure:"maven-command"`
MavenURL string `yaml:"maven-url" json:"maven-url" mapstructure:"maven-url"`
MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"`
}
2 changes: 1 addition & 1 deletion syft/file/license.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ type LicenseEvidence struct {
func NewLicense(value string) License {
spdxExpression, err := license.ParseExpression(value)
if err != nil {
log.Trace("unable to parse license expression: %s, %w", value, err)
log.Tracef("unable to parse license expression: %s, %w", value, err)
}

return License{
Expand Down
7 changes: 7 additions & 0 deletions syft/pkg/cataloger/java/archive_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ func newGenericArchiveParserAdapter(cfg ArchiveCatalogerConfig) genericArchivePa

// parseJavaArchive is a parser function for java archive contents, returning all Java libraries and nested archives.
func (gap genericArchiveParserAdapter) parseJavaArchive(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
// Get full path
archiveFilename := string(reader.Reference().RealPath.Normalize())
// Get path to file within the archive
if archiveFilename == "/" {
archiveFilename = reader.AccessPath
}
log.Tracef("Processing Java archive: '%q'", archiveFilename)
parser, cleanupFn, err := newJavaArchiveParser(reader, true, gap.cfg)
// note: even on error, we should always run cleanup functions
defer cleanupFn()
Expand Down
3 changes: 3 additions & 0 deletions syft/pkg/cataloger/java/archive_parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ func TestSearchMavenForLicenses(t *testing.T) {
detectNested: false,
config: ArchiveCatalogerConfig{
UseNetwork: true,
UseMaven: false,
MavenBaseURL: url,
MaxParentRecursiveDepth: 2,
},
Expand Down Expand Up @@ -1337,7 +1338,9 @@ func Test_parseJavaArchive_regressions(t *testing.T) {
},
PomProject: &pkg.JavaPomProject{
Path: "META-INF/maven/org.apache.directory.api/api-asn1-api/pom.xml",
GroupID: "org.apache.directory.api",
ArtifactID: "api-asn1-api",
Version: "2.0.0",
Name: "Apache Directory API ASN.1 API",
Description: "ASN.1 API",
Parent: &pkg.JavaPomParent{
Expand Down
17 changes: 17 additions & 0 deletions syft/pkg/cataloger/java/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ package java
import "github.com/anchore/syft/syft/cataloging"

const mavenBaseURL = "https://repo1.maven.org/maven2"
const mavenCommand = "mvn"

type ArchiveCatalogerConfig struct {
cataloging.ArchiveSearchConfig `yaml:",inline" json:"" mapstructure:",squash"`
UseNetwork bool `yaml:"use-network" json:"use-network" mapstructure:"use-network"`
UseMaven bool `yaml:"use-maven" json:"use-maven" mapstructure:"use-maven"`
MavenCommand string `yaml:"maven-command" json:"maven-command" mapstructure:"maven-command"`
MavenBaseURL string `yaml:"maven-base-url" json:"maven-base-url" mapstructure:"maven-base-url"`
MaxParentRecursiveDepth int `yaml:"max-parent-recursive-depth" json:"max-parent-recursive-depth" mapstructure:"max-parent-recursive-depth"`
}
Expand All @@ -15,6 +18,8 @@ func DefaultArchiveCatalogerConfig() ArchiveCatalogerConfig {
return ArchiveCatalogerConfig{
ArchiveSearchConfig: cataloging.DefaultArchiveSearchConfig(),
UseNetwork: false,
UseMaven: true,
MavenCommand: mavenCommand,
MavenBaseURL: mavenBaseURL,
MaxParentRecursiveDepth: 5,
}
Expand All @@ -25,6 +30,18 @@ func (j ArchiveCatalogerConfig) WithUseNetwork(input bool) ArchiveCatalogerConfi
return j
}

func (j ArchiveCatalogerConfig) WithUseMaven(input bool) ArchiveCatalogerConfig {
j.UseMaven = input
return j
}

func (j ArchiveCatalogerConfig) WithMavenCommand(input string) ArchiveCatalogerConfig {
if input != "" {
j.MavenCommand = input
}
return j
}

func (j ArchiveCatalogerConfig) WithMavenBaseURL(input string) ArchiveCatalogerConfig {
if input != "" {
j.MavenBaseURL = input
Expand Down
7 changes: 7 additions & 0 deletions syft/pkg/cataloger/java/maven_repo_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package java

import (
"context"
"errors"
"fmt"
"io"
"net/http"
Expand Down Expand Up @@ -30,6 +31,8 @@ func formatMavenPomURL(groupID, artifactID, version, mavenBaseURL string) (reque

// An artifact can have its version defined in a parent's DependencyManagement section
func recursivelyFindVersionFromParentPom(ctx context.Context, groupID, artifactID, parentGroupID, parentArtifactID, parentVersion string, cfg ArchiveCatalogerConfig) string {
log.Debugf("recursively finding version from parent Pom for artifact [%v:%v], using parent pom: [%v:%v:%v]",
groupID, artifactID, parentGroupID, parentArtifactID, parentVersion)
// As there can be nested parent poms, we'll recursively check for the version until we reach the max depth
for i := 0; i < cfg.MaxParentRecursiveDepth; i++ {
parentPom, err := getPomFromMavenRepo(ctx, parentGroupID, parentArtifactID, parentVersion, cfg.MavenBaseURL)
Expand Down Expand Up @@ -80,7 +83,11 @@ func recursivelyFindLicensesFromParentPom(ctx context.Context, groupID, artifact
}

func getPomFromMavenRepo(ctx context.Context, groupID, artifactID, version, mavenBaseURL string) (*gopom.Project, error) {
if len(groupID) == 0 || len(artifactID) == 0 || len(version) == 0 {
return nil, errors.New("missing/incomplete maven artiface coordinates, cannot download pom from repository")
}
requestURL, err := formatMavenPomURL(groupID, artifactID, version, mavenBaseURL)
log.Tracef("Requesting pom for artifact %s:%s:%s", groupID, artifactID, version)
if err != nil {
return nil, err
}
Expand Down
122 changes: 120 additions & 2 deletions syft/pkg/cataloger/java/parse_pom_xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ import (
"encoding/xml"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"reflect"
"regexp"
"strings"
Expand All @@ -23,9 +26,50 @@ import (

const pomXMLGlob = "*pom.xml"

var checkedForMaven bool = false

Check failure on line 29 in syft/pkg/cataloger/java/parse_pom_xml.go

View workflow job for this annotation

GitHub Actions / Static analysis

var-declaration: should omit type bool from declaration of var checkedForMaven; it will be inferred from the right-hand side (revive)
var mavenAvailable bool = false

Check failure on line 30 in syft/pkg/cataloger/java/parse_pom_xml.go

View workflow job for this annotation

GitHub Actions / Static analysis

var-declaration: should omit type bool from declaration of var mavenAvailable; it will be inferred from the right-hand side (revive)

var propertyMatcher = regexp.MustCompile("[$][{][^}]+[}]")

func (gap genericArchiveParserAdapter) parserPomXML(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {

Check failure on line 34 in syft/pkg/cataloger/java/parse_pom_xml.go

View workflow job for this annotation

GitHub Actions / Static analysis

unnecessary leading newline (whitespace)

var pom string
// try to get absolute path first. This fails for tests, so fall back to AccessPath
if reader.Reference().RealPath != "" {
pom = string(reader.Reference().RealPath.Normalize())
} else {
pom = reader.AccessPath
}

var effectivePom string = filepath.Join(filepath.Dir(pom), "target", "effective-pom.xml")

Check failure on line 44 in syft/pkg/cataloger/java/parse_pom_xml.go

View workflow job for this annotation

GitHub Actions / Static analysis

var-declaration: should omit type string from declaration of var effectivePom; it will be inferred from the right-hand side (revive)

log.Tracef("Found POM in dir: %q", filepath.Dir(pom))

trueLocation := reader.Location

if gap.cfg.UseMaven && isMavenAvailable(gap.cfg.MavenCommand) {
generateEffectivePom(pom, effectivePom, gap.cfg.MavenCommand, gap.cfg.UseNetwork)

var pomReader io.ReadCloser
pomReader, err := os.Open(effectivePom)

if err == nil {
var pomLocation file.Location = file.NewLocation(effectivePom)

reader = file.NewLocationReadCloser(pomLocation, pomReader)
log.Debugf("Parsing effective POM: %q", effectivePom)
} else {
log.Errorf("Could not open file %q : %w", effectivePom, err)
}
} else {
log.Debugf("Parsing unresolved POM: %q", pom)
}

return parserPomXML(ctx, reader, gap, trueLocation)
}

// Parse pom file, when an effective pom file was generated, originalPom points to the original pom file
func parserPomXML(ctx context.Context, reader file.LocationReadCloser, gap genericArchiveParserAdapter, originalPom file.Location) ([]pkg.Package, []artifact.Relationship, error) {
pom, err := decodePomXML(reader)
if err != nil {
return nil, nil, err
Expand All @@ -34,24 +78,78 @@ func (gap genericArchiveParserAdapter) parserPomXML(ctx context.Context, _ file.
var pkgs []pkg.Package
if pom.Dependencies != nil {
for _, dep := range *pom.Dependencies {
var location file.Location
if originalPom.Coordinates != reader.Location.Coordinates {
location = originalPom.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)
} else {
location = reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)
}

p := newPackageFromPom(
ctx,
pom,
dep,
gap.cfg,
reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
location,
)
if p.Name == "" {
continue
}

pkgs = append(pkgs, p)

if len(p.Version) == 0 || strings.HasPrefix(p.Version, "${") {
groupId := *dep.GroupID

Check failure on line 102 in syft/pkg/cataloger/java/parse_pom_xml.go

View workflow job for this annotation

GitHub Actions / Static analysis

var-naming: var groupId should be groupID (revive)

Check failure on line 102 in syft/pkg/cataloger/java/parse_pom_xml.go

View workflow job for this annotation

GitHub Actions / Static analysis

ST1003: var groupId should be groupID (stylecheck)
artifactId := *dep.ArtifactID

Check failure on line 103 in syft/pkg/cataloger/java/parse_pom_xml.go

View workflow job for this annotation

GitHub Actions / Static analysis

var-naming: var artifactId should be artifactID (revive)

Check failure on line 103 in syft/pkg/cataloger/java/parse_pom_xml.go

View workflow job for this annotation

GitHub Actions / Static analysis

ST1003: var artifactId should be artifactID (stylecheck)
artifact := groupId + ":" + artifactId
log.Infof("Found artifact without version: %q, version: %q", artifact, p.Version)
}
}
}

return pkgs, nil, nil
}

func isMavenAvailable(mvnCommand string) bool {

Check failure on line 112 in syft/pkg/cataloger/java/parse_pom_xml.go

View workflow job for this annotation

GitHub Actions / Static analysis

unnecessary leading newline (whitespace)

// Only check for Maven on first call
if !checkedForMaven {
log.Tracef("Running command: %q -v", mvnCommand)

cmd := exec.Command(mvnCommand, "-v")
_, err := cmd.Output()

if err == nil {
log.Trace("Maven is available.")
mavenAvailable = true
} else {
log.Warnf("Maven is not available java pom.xml file analysis might be incomplete/incorrect! %+v", err)
}
checkedForMaven = true
}
return mavenAvailable
}

func generateEffectivePom(pomFile string, effectivePomFile string, mvnCommand string, useNetwork bool) {
log.Debugf("Generating effective POM for: %q", pomFile)

var args = []string{"help:effective-pom", "--non-recursive"}

if !useNetwork {
args = append(args, "--offline")
}

args = append(args, "-Doutput="+effectivePomFile, "--file", pomFile)

cmd := exec.Command(mvnCommand, args...) // #nosec G204
output, err := cmd.Output()

if err != nil {
log.Errorf("failed to execute command: %q: %+v", cmd, err)
log.Debug(string(output))
}
log.Trace(string(output))
}

func parsePomXMLProject(path string, reader io.Reader, location file.Location) (*parsedPomProject, error) {
project, err := decodePomXML(reader)
if err != nil {
Expand Down Expand Up @@ -164,6 +262,26 @@ func decodePomXML(content io.Reader) (project gopom.Project, err error) {
return project, fmt.Errorf("unable to unmarshal pom.xml: %w", err)
}

// For modules groupID and version are almost always inherited from parent pom
if project.GroupID == nil && project.Parent != nil {
project.GroupID = project.Parent.GroupID
}
if project.Version == nil && project.Parent != nil {
project.Version = project.Parent.Version
}

// If missing, add maven built-in version property often used in multi-module projects
if project.Version != nil {
if project.Properties == nil {
var props gopom.Properties
props.Entries = make(map[string]string)
props.Entries["project.version"] = *project.Version
project.Properties = &props
} else {
project.Properties.Entries["project.version"] = *project.Version
}
}

return project, nil
}

Expand Down
Loading

0 comments on commit ab5525f

Please sign in to comment.