Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restrict fetching PRs and issues only to time range needed #103

Merged
merged 1 commit into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions chronicle/release/releasers/github/gh_issue.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,18 @@ func issuesWithoutLabels() issueFilter {
}

//nolint:funlen
func fetchClosedIssues(user, repo string) ([]ghIssue, error) {
func fetchClosedIssues(user, repo string, since *time.Time) ([]ghIssue, error) {
src := oauth2.StaticTokenSource(
// TODO: DI this
&oauth2.Token{AccessToken: os.Getenv("GITHUB_TOKEN")},
)
httpClient := oauth2.NewClient(context.Background(), src)
client := githubv4.NewClient(httpClient)
var allIssues []ghIssue
var (
pages = 0
saw = 0
allIssues []ghIssue
)

{
// TODO: act on hitting a rate limit
Expand Down Expand Up @@ -192,6 +196,7 @@ func fetchClosedIssues(user, repo string) ([]ghIssue, error) {
}
Closed githubv4.Boolean
ClosedAt githubv4.DateTime
UpdatedAt githubv4.DateTime
StateReason githubv4.String
Labels struct {
Edges []struct {
Expand All @@ -202,7 +207,7 @@ func fetchClosedIssues(user, repo string) ([]ghIssue, error) {
} `graphql:"labels(first:100)"`
}
}
} `graphql:"issues(first:100, states:CLOSED, after:$issuesCursor)"`
} `graphql:"issues(first:100, states:CLOSED, after:$issuesCursor, orderBy:{field: UPDATED_AT, direction: DESC})"`
} `graphql:"repository(owner:$repositoryOwner, name:$repositoryName)"`

RateLimit rateLimit
Expand All @@ -214,14 +219,28 @@ func fetchClosedIssues(user, repo string) ([]ghIssue, error) {
}

// var limit rateLimit
for {
var (
process bool
terminate = false
)

for !terminate {
log.WithFields("user", user, "repo", repo, "page", pages).Trace("fetching closed issues from github.com")

err := client.Query(context.Background(), &query, variables)
if err != nil {
return nil, err
}

// limit = query.RateLimit

for _, iEdge := range query.Repository.Issues.Edges {
saw++
process, terminate = checkSearchTermination(since, &iEdge.Node.UpdatedAt, &iEdge.Node.ClosedAt)
if !process || terminate {
continue
}

var labels []string
for _, lEdge := range iEdge.Node.Labels.Edges {
labels = append(labels, string(lEdge.Node.Name))
Expand All @@ -242,6 +261,7 @@ func fetchClosedIssues(user, repo string) ([]ghIssue, error) {
break
}
variables["issuesCursor"] = githubv4.NewString(query.Repository.Issues.PageInfo.EndCursor)
pages++
}

// for idx, is := range allIssues {
Expand All @@ -250,5 +270,7 @@ func fetchClosedIssues(user, repo string) ([]ghIssue, error) {
// printJSON(limit)
}

log.WithFields("kept", len(allIssues), "saw", saw, "pages", pages, "since", since).Trace("closed PRs fetched from github.com")

return allIssues, nil
}
51 changes: 45 additions & 6 deletions chronicle/release/releasers/github/gh_pull_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -238,14 +238,18 @@ func keepPRsWithCommits(prs []ghPullRequest, commits []string, filters ...prFilt
}

//nolint:funlen
func fetchMergedPRs(user, repo string) ([]ghPullRequest, error) {
func fetchMergedPRs(user, repo string, since *time.Time) ([]ghPullRequest, error) {
src := oauth2.StaticTokenSource(
// TODO: DI this
&oauth2.Token{AccessToken: os.Getenv("GITHUB_TOKEN")},
)
httpClient := oauth2.NewClient(context.Background(), src)
client := githubv4.NewClient(httpClient)
var allPRs []ghPullRequest
var (
pages = 0
saw = 0
allPRs []ghPullRequest
)

{
// TODO: act on hitting a rate limit
Expand Down Expand Up @@ -276,8 +280,9 @@ func fetchMergedPRs(user, repo string) ([]ghPullRequest, error) {
MergeCommit struct {
OID githubv4.String
}
MergedAt githubv4.DateTime
Labels struct {
UpdatedAt githubv4.DateTime
MergedAt githubv4.DateTime
Labels struct {
Edges []struct {
Node struct {
Name githubv4.String
Expand Down Expand Up @@ -305,7 +310,7 @@ func fetchMergedPRs(user, repo string) ([]ghPullRequest, error) {
} `graphql:"closingIssuesReferences(last:10)"`
}
}
} `graphql:"pullRequests(first:100, states:MERGED, after:$prCursor)"`
} `graphql:"pullRequests(first:100, states:MERGED, after:$prCursor, orderBy:{field: UPDATED_AT, direction: DESC})"`
} `graphql:"repository(owner:$repositoryOwner, name:$repositoryName)"`

RateLimit rateLimit
Expand All @@ -317,14 +322,28 @@ func fetchMergedPRs(user, repo string) ([]ghPullRequest, error) {
}

// var limit rateLimit
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this meant to be commented out? (I see it's not part of this diff, so not a blocking comment; just surprised.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, this is a partial implementation of #9

for {
var (
process bool
terminate = false
)

for !terminate {
log.WithFields("user", user, "repo", repo, "page", pages).Trace("fetching merged PRs from github.com")

err := client.Query(context.Background(), &query, variables)
if err != nil {
return nil, err
}

// limit = query.RateLimit

for _, prEdge := range query.Repository.PullRequests.Edges {
saw++
process, terminate = checkSearchTermination(since, &prEdge.Node.UpdatedAt, &prEdge.Node.MergedAt)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to understand the logic here. What I think is happening:

  1. We've asked GH for relevant PRs, with more recently updated PRs first.
  2. For each PR, we check two things: Should we process the current PR (process) and should we break off running additional queries (terminate).
  3. If an PR was merged before the last release, we don't process it (because it was presumably part of the last release).
  4. If a PR was last updated before the last release, we break off searching, because we're ordering GH events by last update, and we got to one before the ones we care about.

Is that right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup yup -- right on. The go case is that a PR isn't updated after it's merged, so merge-timestamp == update-timestamp, but it is possible to update a PR that may or may not be part of this release after it's been merged (e.g. adding a label to the issue, adding a comment, etc)... we want to curtail the search to save on time but not include PRs that were merged before the last release.

if !process || terminate {
continue
}

var labels []string
for _, lEdge := range prEdge.Node.Labels.Edges {
labels = append(labels, string(lEdge.Node.Name))
Expand Down Expand Up @@ -359,8 +378,28 @@ func fetchMergedPRs(user, repo string) ([]ghPullRequest, error) {
break
}
variables["prCursor"] = githubv4.NewString(query.Repository.PullRequests.PageInfo.EndCursor)
pages++
}
}

log.WithFields("kept", len(allPRs), "saw", saw, "pages", pages, "since", since).Trace("merged PRs fetched from github.com")

return allPRs, nil
}

func checkSearchTermination(since *time.Time, updatedAt, closedAt *githubv4.DateTime) (process bool, terminate bool) {
process = true
if since == nil {
return
}

if closedAt.Before(*since) {
process = false
}

if updatedAt.Before(*since) {
terminate = true
}

return
}
78 changes: 78 additions & 0 deletions chronicle/release/releasers/github/gh_pull_request_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"testing"
"time"

"github.com/shurcooL/githubv4"
"github.com/stretchr/testify/assert"

"github.com/anchore/chronicle/chronicle/release/change"
Expand Down Expand Up @@ -471,3 +472,80 @@ func Test_prsWithoutLinkedIssues(t *testing.T) {
})
}
}

func Test_checkSearchTermination(t *testing.T) {
since := githubv4.DateTime{Time: time.Date(1987, time.September, 16, 19, 34, 0, 0, time.UTC)}
hourAfter := &githubv4.DateTime{Time: since.Add(time.Hour)}
minuteAfter := &githubv4.DateTime{Time: since.Add(time.Minute)}
minuteBefore := &githubv4.DateTime{Time: since.Add(-time.Minute)}

type args struct {
since *time.Time
updatedAt *githubv4.DateTime
mergedAt *githubv4.DateTime
}
tests := []struct {
name string
args args
wantProcess bool
wantTerminate bool
}{
{
name: "go case candidate",
args: args{
since: &since.Time,
updatedAt: hourAfter,
mergedAt: hourAfter,
},
wantProcess: true,
wantTerminate: false,
},
{
name: "candidate updated after the merge, and merged after the compare date",
args: args{
since: &since.Time,
updatedAt: hourAfter,
mergedAt: minuteAfter,
},
wantProcess: true,
wantTerminate: false,
},
{
name: "candidate updated after the merge, but merged before the compare date",
args: args{
since: &since.Time,
updatedAt: hourAfter,
mergedAt: minuteBefore,
},
wantProcess: false,
wantTerminate: false,
},
{
name: "candidate updated before the merge, and merged before the compare date",
args: args{
since: &since.Time,
updatedAt: minuteBefore,
mergedAt: minuteBefore,
},
wantProcess: false,
wantTerminate: true,
},
{
name: "impossible: candidate updated before the merge, but merged after the compare date",
args: args{
since: &since.Time,
updatedAt: minuteBefore,
mergedAt: minuteAfter,
},
wantProcess: true,
wantTerminate: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotProcess, gotTerminate := checkSearchTermination(tt.args.since, tt.args.updatedAt, tt.args.mergedAt)
assert.Equalf(t, tt.wantProcess, gotProcess, "wantProcess: checkSearchTermination(%v, %v, %v)", tt.args.since, tt.args.updatedAt, tt.args.mergedAt)
assert.Equalf(t, tt.wantTerminate, gotTerminate, "wantTerminate: checkSearchTermination(%v, %v, %v)", tt.args.since, tt.args.updatedAt, tt.args.mergedAt)
})
}
}
20 changes: 15 additions & 5 deletions chronicle/release/releasers/github/summarizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"net/url"
"strings"
"time"

"github.com/anchore/chronicle/chronicle/release"
"github.com/anchore/chronicle/chronicle/release/change"
Expand Down Expand Up @@ -118,6 +119,15 @@ func (s *Summarizer) Changes(sinceRef, untilRef string) ([]change.Change, error)
includeStart = true
}

var sinceTime *time.Time
if sinceTag != nil {
sinceRelease, err := fetchRelease(s.userName, s.repoName, sinceTag.Name)
if err != nil {
return nil, fmt.Errorf("unable to fetch release %q: %w", sinceTag.Name, err)
}
sinceTime = &sinceRelease.Date
}

var untilTag *git.Tag
untilHash := untilRef
if untilRef != "" {
Expand Down Expand Up @@ -150,18 +160,18 @@ func (s *Summarizer) Changes(sinceRef, untilRef string) ([]change.Change, error)
logCommits(includeCommits)
}

allMergedPRs, err := fetchMergedPRs(s.userName, s.repoName)
allMergedPRs, err := fetchMergedPRs(s.userName, s.repoName, sinceTime)
if err != nil {
return nil, err
}

log.Debugf("total merged PRs discovered: %d", len(allMergedPRs))
log.WithFields("since", sinceTime).Debugf("total merged PRs discovered: %d", len(allMergedPRs))

if s.config.IncludePRs {
changes = append(changes, changesFromStandardPRFilters(s.config, allMergedPRs, sinceTag, untilTag, includeCommits)...)
}

allClosedIssues, err := fetchClosedIssues(s.userName, s.repoName)
allClosedIssues, err := fetchClosedIssues(s.userName, s.repoName, sinceTime)
if err != nil {
return nil, err
}
Expand All @@ -170,7 +180,7 @@ func (s *Summarizer) Changes(sinceRef, untilRef string) ([]change.Change, error)
allClosedIssues = filterIssues(allClosedIssues, excludeIssuesNotPlanned(allMergedPRs))
}

log.Debugf("total closed issues discovered: %d", len(allClosedIssues))
log.WithFields("since", sinceTime).Debugf("total closed issues discovered: %d", len(allClosedIssues))

if s.config.IncludeIssues {
if s.config.IssuesRequireLinkedPR {
Expand Down Expand Up @@ -342,7 +352,7 @@ func changesFromUnlabeledPRs(config Config, allMergedPRs []ghPullRequest, sinceT

filteredIssues, _ := filterPRs(allMergedPRs, filters...)

log.Debugf("unlabeled prs contributing to changelog: %d", len(filteredIssues))
log.Debugf("unlabeled PRs contributing to changelog: %d", len(filteredIssues))

return createChangesFromPRs(config, filteredIssues)
}
Expand Down