Skip to content

Commit

Permalink
fix: html preview with citations (cohere-ai#200)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomtobac authored Jun 13, 2024
1 parent 5155eee commit 644567c
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import { visit } from 'unist-util-visit';

export const renderRemarkCites: Plugin<void[], Root> = () => {
return (tree, file) => {
visit(tree, (node: any, index, parent) => {
visit(tree, (node: any) => {
if (
node.type === 'textDirective' ||
node.type === 'leafDirective' ||
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import type { Component, ExtraProps } from 'hast-util-to-jsx-runtime/lib/components';
import type { Component } from 'hast-util-to-jsx-runtime/lib/components';
import { useEffect } from 'react';
import { useRef } from 'react';
import { useState } from 'react';
Expand Down
17 changes: 10 additions & 7 deletions src/interfaces/coral_web/src/hooks/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -333,30 +333,33 @@ export const useChat = (config?: { onSend?: (msg: string) => void }) => {
saveCitations(generationId, citations, documentsMap);
saveOutputFiles({ ...savedOutputFiles, ...outputFiles });

const outputText =
data?.finish_reason === FinishReason.FINISH_REASON_MAX_TOKENS
? botResponse
: responseText;

// Replace HTML code blocks with iframes
const transformedText = replaceCodeBlockWithIframe(outputText);

const finalText = isRAGOn
? replaceTextWithCitations(
// TODO(@wujessica): temporarily use the text generated from the stream when MAX_TOKENS
// because the final response doesn't give us the full text yet. Note - this means that
// citations will only appear for the first 'block' of text generated.
data?.finish_reason === FinishReason.FINISH_REASON_MAX_TOKENS
? botResponse
: responseText,
transformedText,
citations,
generationId
)
: botResponse;

// Replace HTML code blocks with iframes
const text = replaceCodeBlockWithIframe(finalText);

setStreamingMessage({
type: MessageType.BOT,
state: BotState.FULFILLED,
generationId,
// TODO(@wujessica): TEMPORARY - we don't pass citations for langchain multihop right now
// so we need to manually apply this fix. Otherwise, this comes for free when we call
// `replaceTextWithCitations`.
text: citations.length > 0 ? text : fixMarkdownImagesInText(text),
text: citations.length > 0 ? finalText : fixMarkdownImagesInText(transformedText),
citations,
isRAGOn,
originalText: isRAGOn ? responseText : botResponse,
Expand Down
9 changes: 2 additions & 7 deletions src/interfaces/coral_web/src/pages/c/[id]/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import { appSSR } from '@/pages/_app';
import { useCitationsStore, useConversationStore, useParamsStore } from '@/stores';
import { OutputFiles } from '@/stores/slices/citationsSlice';
import { createStartEndKey, mapHistoryToMessages } from '@/utils';
import { replaceCodeBlockWithIframe } from '@/utils/preview';
import { parsePythonInterpreterToolFields } from '@/utils/tools';

type Props = {
Expand Down Expand Up @@ -77,13 +76,9 @@ const ConversationPage: NextPage<Props> = () => {
if (!conversation) return;

const messages = mapHistoryToMessages(
conversation?.messages
?.sort((a, b) => a.position - b.position)
.map((message) => ({
...message,
text: replaceCodeBlockWithIframe(message.text),
}))
conversation?.messages?.sort((a, b) => a.position - b.position)
);

setConversation({ name: conversation.title, messages });

let documentsMap: { [documentId: string]: Document } = {};
Expand Down
28 changes: 27 additions & 1 deletion src/interfaces/coral_web/src/utils/citations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ export const fixMarkdownImagesInText = (text: string) => {
return text.replace('! [', '![');
};

const formatter = new Intl.ListFormat('en', { style: 'long', type: 'conjunction' });

/**
* Replace text string with citations following the format:
* :cite[<text>]{generationId="<generationId>" start="<startIndex>" end"<endIndex>"}
Expand All @@ -26,10 +28,20 @@ export const replaceTextWithCitations = (
let replacedText = text;

let lengthDifference = 0; // Track the cumulative length difference
citations.forEach(({ start = 0, end = 0, text: citationText }) => {
let notFoundReferences: string[] = [];
citations.forEach(({ start = 0, end = 0, text: citationText }, index) => {
const citeStart = start + lengthDifference;
const citeEnd = end + lengthDifference;

// if citeStart is higher than the length of the text, add it to the bottom of the text as "Reference #n"
if (start >= text.length || isReferenceBetweenIframes(replacedText, start)) {
const ref = `Reference #${index + 1}`;
notFoundReferences.push(
`:cite[${ref}]{generationId="${generationId}" start="${start}" end="${end}"}`
);
return;
}

const fixedText = fixMarkdownImagesInText(citationText);

// Encode the citationText in case there are any weird characters or unclosed brackets that will
Expand All @@ -42,8 +54,22 @@ export const replaceTextWithCitations = (
replacedText = replacedText.slice(0, citeStart) + citationId + replacedText.slice(citeEnd);
lengthDifference += citationId.length - (citeEnd - citeStart);
});

const references = 'From: ' + formatter.format(notFoundReferences);
if (notFoundReferences.length > 0) {
return references + '\n' + replacedText;
}
return replacedText;
};

export const createStartEndKey = (start: number | string, end: number | string) =>
`${start}-${end}`;

function isReferenceBetweenIframes(replacedText: string, citeStart: number): boolean {
const IFRAME_REGEX = /<iframe.*<\/iframe>/g;
const match = IFRAME_REGEX.exec(replacedText);

if (!match) return false;

return match.index < citeStart && citeStart < match.index + match[0].length;
}
10 changes: 8 additions & 2 deletions src/interfaces/coral_web/src/utils/conversation.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
import { Message, MessageAgent } from '@/cohere-client';
import { BotMessage, BotState, MessageType, UserMessage } from '@/types/message';
import { replaceTextWithCitations } from '@/utils/citations';
import { replaceCodeBlockWithIframe } from '@/utils/preview';

export const mapHistoryToMessages = (history?: Message[]) => {
return history
? history.map<UserMessage | BotMessage>((message) => {
const isBotMessage = message.agent === MessageAgent.CHATBOT;
let text = message.text;
if (isBotMessage) {
text = replaceCodeBlockWithIframe(message.text);
}
return {
...(message.agent === MessageAgent.CHATBOT
...(isBotMessage
? { type: MessageType.BOT, state: BotState.FULFILLED, originalText: message.text ?? '' }
: { type: MessageType.USER }),
text: replaceTextWithCitations(
message.text ?? '',
text ?? '',
message.citations ?? [],
message.generation_id ?? ''
),
Expand Down

0 comments on commit 644567c

Please sign in to comment.