Skip to content

Commit

Permalink
fix: token counting issue
Browse files Browse the repository at this point in the history
  • Loading branch information
sshivaditya committed Jan 14, 2025
1 parent f4e80ea commit 8411227
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 11 deletions.
5 changes: 3 additions & 2 deletions src/helpers/format-chat-history.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ async function buildTree(
try {
const [owner, repo, issueNum] = splitKey(key);
const response = await fetchIssueComments({ context, owner, repo, issueNum: parseInt(issueNum) }, tokenLimit);
logger.debug(`Tokens: ${tokenLimit.runningTokenCount}/${tokenLimit.tokensRemaining}`);
const issue = response.issue;

if (!issue) {
Expand Down Expand Up @@ -252,6 +251,7 @@ async function buildTree(

try {
const tree = await createNode(mainIssueKey);
console.log(`Map size: ${JSON.stringify(Array.from(processedNodes.keys()))}`);
return { tree };
} catch (error) {
logger.error("Error building tree", { error: error as Error });
Expand Down Expand Up @@ -377,6 +377,8 @@ export async function formatChatHistory(context: Context, maxDepth: number = 2,
return ["No main issue found."];
}

logger.debug(`Tokens: ${fetchTokenLimits.runningTokenCount}/${fetchTokenLimits.tokensRemaining}`);

if ("pull_request" in context.payload) {
const { diff_hunk, position, original_position, path, body } = context.payload.comment || {};
if (diff_hunk) {
Expand All @@ -393,6 +395,5 @@ export async function formatChatHistory(context: Context, maxDepth: number = 2,
const formatTokenLimits = createDefaultTokenLimits(context);
await processTreeNode(tree, "", treeOutput, formatTokenLimits);
logger.debug(`Final tokens: ${formatTokenLimits.runningTokenCount}/${formatTokenLimits.tokensRemaining}`);

return treeOutput;
}
60 changes: 53 additions & 7 deletions src/helpers/issue-fetching.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { TokenLimits } from "../types/llm";
import { logger } from "./errors";
import { idIssueFromComment } from "./issue";
import { fetchPullRequestComments, fetchPullRequestDetails } from "./pull-request-fetching";
import { createDefaultTokenLimits } from "./token-utils";
import { createDefaultTokenLimits, updateTokenCount } from "./token-utils";

/**
* Create a unique key for an issue based on its URL and optional issue number
Expand Down Expand Up @@ -84,6 +84,21 @@ export async function fetchIssue(params: FetchParams, tokenLimits?: TokenLimits)

const issue: Issue = response.data;

if (tokenLimits) {
updateTokenCount(
JSON.stringify({
issue: issue.body,
comments: issue.comments,
}),
tokenLimits
);
if (issue.pull_request) {
logger.debug(`Fetched PR #${targetIssueNum} and updated token count`);
} else {
logger.debug(`Fetched issue #${targetIssueNum} and updated token count`);
}
}

// If this is a PR, fetch additional details
if (issue.pull_request) {
tokenLimits = tokenLimits || createDefaultTokenLimits(params.context);
Expand Down Expand Up @@ -151,17 +166,34 @@ export async function fetchIssueComments(params: FetchParams, tokenLimits?: Toke
issueNum: targetIssueNum,
});

// Update token count
updateTokenCount(
JSON.stringify(
prData.comments.map((comment: SimplifiedComment) => {
return {
id: comment.id,
body: comment.body,
user: comment.user,
...(comment.referencedCode ? { referencedCode: comment.referencedCode } : {}),
};
})
),
currentTokenLimits
);
comments = prData.comments;

// Process linked issues from PR with their full content
for (const linked of prData.linkedIssues) {
// First fetch the issue/PR to determine its type
const linkedIssue = await fetchIssue({
...params,
owner: linked.owner,
repo: linked.repo,
issueNum: linked.number,
});
const linkedIssue = await fetchIssue(
{
...params,
owner: linked.owner,
repo: linked.repo,
issueNum: linked.number,
},
currentTokenLimits
);

if (linkedIssue) {
const linkedComments = await fetchIssueComments(
Expand Down Expand Up @@ -211,6 +243,20 @@ export async function fetchIssueComments(params: FetchParams, tokenLimits?: Toke
commentType: "issue_comment",
}));

// Update token count
updateTokenCount(
JSON.stringify(
comments.map((comment: SimplifiedComment) => {
return {
body: comment.body,
id: comment.id,
user: comment.user,
};
})
),
currentTokenLimits
);

// Process any linked issues found in comments
const linkedIssuesFromComments = comments
.map((comment) => idIssueFromComment(comment.body, params))
Expand Down
10 changes: 8 additions & 2 deletions src/helpers/token-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,28 @@ import { TokenLimits } from "../types/llm";
import { encode } from "gpt-tokenizer";

export function createDefaultTokenLimits(context: Context): TokenLimits {
const modelMaxTokenLimit = context.adapters.openai.completions.getModelMaxTokenLimit(context.config.model);
const maxCompletionTokens = context.adapters.openai.completions.getModelMaxOutputLimit(context.config.model);
// const modelMaxTokenLimit = context.adapters.openai.completions.getModelMaxTokenLimit(context.config.model);
// const maxCompletionTokens = context.adapters.openai.completions.getModelMaxOutputLimit(context.config.model);

const modelMaxTokenLimit = 128_000;
const maxCompletionTokens = 16_384;
return {
modelMaxTokenLimit,
maxCompletionTokens,
runningTokenCount: 0,
context,
tokensRemaining: modelMaxTokenLimit - maxCompletionTokens,
};
}

export function updateTokenCount(text: string, tokenLimits: TokenLimits): boolean {
const tokenCount = encode(text, { disallowedSpecial: new Set() }).length;
if (tokenLimits.runningTokenCount + tokenCount > tokenLimits.tokensRemaining) {
tokenLimits.context.logger.debug(`Skipping ${text} to stay within token limits.`);
return false;
}
tokenLimits.context.logger.debug(`Added ${tokenCount} tokens. Running total: ${tokenLimits.runningTokenCount}. Remaining: ${tokenLimits.tokensRemaining}`);
tokenLimits.runningTokenCount += tokenCount;
tokenLimits.tokensRemaining -= tokenCount;
return true;
}
2 changes: 2 additions & 0 deletions src/types/llm.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { Context } from "@ubiquity-os/plugin-sdk";
import { GROUND_TRUTHS_SYSTEM_MESSAGES } from "../handlers/ground-truths/prompts";

export type ModelApplications = "code-review" | "chat-bot";
Expand Down Expand Up @@ -54,6 +55,7 @@ export type StreamlinedComment = {

export type TokenLimits = {
modelMaxTokenLimit: number;
context: Context;
maxCompletionTokens: number;
runningTokenCount: number;
tokensRemaining: number;
Expand Down

0 comments on commit 8411227

Please sign in to comment.