-
Notifications
You must be signed in to change notification settings - Fork 146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: fix parser #95
feat: fix parser #95
Changes from 12 commits
d26e185
e9075c6
4868a48
fb45e89
8b04c0b
5a9e5aa
80436e3
6449f69
e0de7af
1362575
8fe2d31
f79735c
65d3533
bb0baf3
3ea28cd
e7b394e
0ff12b6
f7e801f
f6719f1
444eeef
7b1abaf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ export async function parseStreamedMessages({ | |
let isProcessingStep = false; | ||
let isLastStep = false; | ||
let isFollowupQuestion = false; | ||
let followUpQuestionIndex = 0; | ||
let stepIndex = 0; | ||
let textBlockIndex = 0; | ||
const result = { | ||
|
@@ -41,7 +42,7 @@ export async function parseStreamedMessages({ | |
|
||
// we use numeric values to identify the beginning of a step | ||
// if we match a number, store it in the buffer and move on to the next iteration | ||
const LIST_ITEM_NUMBER = /(\d+)/; | ||
const LIST_ITEM_NUMBER: RegExp = /(\d+)/; | ||
let matchedStepIndex = chunkValue.match(LIST_ITEM_NUMBER)?.[0]; | ||
if (matchedStepIndex) { | ||
stepsBuffer.push(matchedStepIndex); | ||
|
@@ -50,15 +51,31 @@ export async function parseStreamedMessages({ | |
|
||
// followup questions are marked either with the word 'Next Questions:' or '<<text>>' or both at the same time | ||
// these markers may be split across multiple chunks, so we need to buffer them! | ||
// TODO: support followup questions wrapped in <<text>> markers | ||
const matchedFollowupQuestionMarker = !isFollowupQuestion && chunkValue.includes('Next'); | ||
// TODO: remove all this logic from the frontend and implement a solution on the backend or with TypeChat | ||
// we start by creating a buffer when we match the first marker | ||
const matchedFollowupQuestionMarker = | ||
(!isFollowupQuestion && chunkValue.includes('Next')) || chunkValue.includes('<<'); | ||
// once we do, we can assume that we are processing a followup question and set the flag to true | ||
if (matchedFollowupQuestionMarker) { | ||
isFollowupQuestion = true; | ||
followupQuestionsBuffer.push(chunkValue); | ||
continue; | ||
} else if (followupQuestionsBuffer.length > 0 && chunkValue.includes('Question')) { | ||
isFollowupQuestion = true; | ||
followupQuestionsBuffer.push(chunkValue); | ||
continue; | ||
// if we're already processing questions, we don't need to check for the marker again | ||
// but we need to check if we reached the end of the followup questions | ||
} else if (chunkValue.includes('<<') && isFollowupQuestion) { | ||
isFollowupQuestion = true; | ||
continue; | ||
// this updates the index, so we add each question to a different array entry | ||
// to simplify styling | ||
} else if (chunkValue.includes('?>') || chunkValue.includes('>')) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are we checking for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because in all tests I ran, ?> and the last > were returned as part of 2 different chunks, and there was never a match. |
||
followUpQuestionIndex = followUpQuestionIndex + 1; | ||
isFollowupQuestion = true; | ||
continue; | ||
// additional returns need to be removed, but only after we have processed the whole set of chunks | ||
} else if (isFollowupQuestion) { | ||
isFollowupQuestion = true; | ||
chunkValue = chunkValue.replace(/:?\n/, ''); | ||
manekinekko marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
@@ -87,18 +104,18 @@ export async function parseStreamedMessages({ | |
|
||
// if we are at the beginning of a step, we need to remove the step number and dot from the chunk value | ||
// we simply clear the current chunk value | ||
if (matchedStepIndex || isProcessingStep) { | ||
if (matchedStepIndex || isProcessingStep || isFollowupQuestion) { | ||
if (matchedStepIndex) { | ||
chunkValue = ''; | ||
} | ||
|
||
// set the step index that is needed to update the correct step entry | ||
stepIndex = matchedStepIndex ? Number(matchedStepIndex) - 1 : stepIndex; | ||
updateFollowingStepOrFollowupQuestionEntry({ | ||
chunkValue, | ||
textBlockIndex, | ||
stepIndex, | ||
isFollowupQuestion, | ||
followUpQuestionIndex, | ||
chatThread, | ||
}); | ||
|
||
|
@@ -136,7 +153,7 @@ export function updateCitationsEntry({ | |
const updateCitationReference = (match, capture) => { | ||
const citation = citations.find((citation) => citation.text === capture); | ||
if (citation) { | ||
return `<sup>[${citation.ref}]</sup>`; | ||
return `<sup class="citation">${citation.ref}</sup>`; | ||
} | ||
return match; | ||
}; | ||
|
@@ -202,18 +219,20 @@ export function updateFollowingStepOrFollowupQuestionEntry({ | |
textBlockIndex, | ||
stepIndex, | ||
isFollowupQuestion, | ||
followUpQuestionIndex, | ||
chatThread, | ||
}: { | ||
chunkValue: string; | ||
textBlockIndex: number; | ||
stepIndex: number; | ||
isFollowupQuestion: boolean; | ||
followUpQuestionIndex: number; | ||
chatThread: ChatThreadEntry[]; | ||
}) { | ||
// following steps and followup questions are treated the same way. They are just stored in different arrays | ||
const { followupQuestions, text: lastChatMessageTextEntry } = chatThread.at(-1) as ChatThreadEntry; | ||
if (isFollowupQuestion && followupQuestions) { | ||
followupQuestions[stepIndex] = (followupQuestions[stepIndex] || '') + chunkValue; | ||
followupQuestions[followUpQuestionIndex] = (followupQuestions[followUpQuestionIndex] || '') + chunkValue; | ||
return; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
btw should we still parse
Next
markers? Is the new model still usingNext
to format followup questions?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the updated model uses only
<<
and>>
to format followup questions, then I'd suggest changing the condition to:const matchedFollowupQuestionMarker = !isFollowupQuestion && chunkValue.includes('<<');
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good question. @sinedied can you clarify?