From f1f6147a9287efa21791ccac04d9c1f7bfe65cd6 Mon Sep 17 00:00:00 2001 From: charlesLoder Date: Tue, 18 Nov 2025 15:19:52 -0500 Subject: [PATCH 01/10] Update Button to be default button --- src/components/Button/index.tsx | 2 ++ .../Panel/ChatInput/SelectedMedia/index.tsx | 1 - src/plugin/Panel/ChatInput/index.tsx | 3 +-- src/providers/mediaPipeProvider/index.tsx | 15 +++------------ .../components/ModelSelection/index.tsx | 2 +- src/providers/userTokenProvider/index.tsx | 4 ++-- 6 files changed, 9 insertions(+), 18 deletions(-) diff --git a/src/components/Button/index.tsx b/src/components/Button/index.tsx index ac2f4a0..dff78be 100644 --- a/src/components/Button/index.tsx +++ b/src/components/Button/index.tsx @@ -19,6 +19,7 @@ export const Button: FC = ({ state = "idle", children, disabled, + type = "button", ...props }) => { return ( @@ -30,6 +31,7 @@ export const Button: FC = ({ data-state={state} data-variant={variant} disabled={state === "loading" || disabled} + type={type} > {children} diff --git a/src/plugin/Panel/ChatInput/SelectedMedia/index.tsx b/src/plugin/Panel/ChatInput/SelectedMedia/index.tsx index 81ae75f..84ac1e9 100644 --- a/src/plugin/Panel/ChatInput/SelectedMedia/index.tsx +++ b/src/plugin/Panel/ChatInput/SelectedMedia/index.tsx @@ -22,7 +22,6 @@ export const SelectedMedia: React.FC = ({ media }) => { shape="circle" size="small" title="Remove media" - type="button" variant="secondary" onClick={() => handleClick(media.id)} > diff --git a/src/plugin/Panel/ChatInput/index.tsx b/src/plugin/Panel/ChatInput/index.tsx index 8eaca1c..15a2a16 100644 --- a/src/plugin/Panel/ChatInput/index.tsx +++ b/src/plugin/Panel/ChatInput/index.tsx @@ -114,7 +114,6 @@ export const ChatInput: FC = () => { size="small" state={formState !== "success" ? formState : undefined} title="Clear conversation" - type="button" onClick={clearConversation} > @@ -125,7 +124,6 @@ export const ChatInput: FC = () => { size="small" state={formState !== "success" ? formState : undefined} title="Add media" - type="button" onClick={openDialog} > @@ -136,6 +134,7 @@ export const ChatInput: FC = () => { size="small" state={formState !== "success" ? formState : undefined} title="Submit question" + type="submit" > diff --git a/src/providers/mediaPipeProvider/index.tsx b/src/providers/mediaPipeProvider/index.tsx index 89cf91f..d34f4cb 100644 --- a/src/providers/mediaPipeProvider/index.tsx +++ b/src/providers/mediaPipeProvider/index.tsx @@ -354,15 +354,8 @@ export class MediaPipeProvider extends BaseProvider { .

- - + @@ -378,9 +371,7 @@ export class MediaPipeProvider extends BaseProvider {

Check the browser console for detailed error information.

- + ); } diff --git a/src/providers/userTokenProvider/components/ModelSelection/index.tsx b/src/providers/userTokenProvider/components/ModelSelection/index.tsx index 45a15a3..518aa27 100644 --- a/src/providers/userTokenProvider/components/ModelSelection/index.tsx +++ b/src/providers/userTokenProvider/components/ModelSelection/index.tsx @@ -19,7 +19,7 @@ export function ModelSelection({ handleBack, handleClick, models }: Props) { ))}
-
diff --git a/src/providers/userTokenProvider/index.tsx b/src/providers/userTokenProvider/index.tsx index 78219e5..9953055 100644 --- a/src/providers/userTokenProvider/index.tsx +++ b/src/providers/userTokenProvider/index.tsx @@ -375,8 +375,8 @@ export class UserTokenProvider extends BaseProvider { onChange={(e) => setInputValue(e.target.value)} />
- - +
From 131d02f0339f8d7e4c8a22ad08dc80c08e2a753a Mon Sep 17 00:00:00 2001 From: charlesLoder Date: Tue, 18 Nov 2025 15:20:20 -0500 Subject: [PATCH 02/10] Update PromptInput with default aria label --- src/components/PromptInput/index.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/PromptInput/index.tsx b/src/components/PromptInput/index.tsx index ba4338c..a729a69 100644 --- a/src/components/PromptInput/index.tsx +++ b/src/components/PromptInput/index.tsx @@ -19,7 +19,7 @@ export const PromptInput: FC = ({ {error &&
{error}
}
From 7a4f6c87dc9b86258ac5589356c0716fbe729eb1 Mon Sep 17 00:00:00 2001 From: charlesLoder Date: Tue, 18 Nov 2025 15:38:20 -0500 Subject: [PATCH 03/10] Add scaffold for prompt input buttons --- src/plugin/Panel/ChatInput/index.tsx | 2 ++ src/plugin/base_provider.tsx | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/src/plugin/Panel/ChatInput/index.tsx b/src/plugin/Panel/ChatInput/index.tsx index 15a2a16..cf5c790 100644 --- a/src/plugin/Panel/ChatInput/index.tsx +++ b/src/plugin/Panel/ChatInput/index.tsx @@ -84,6 +84,7 @@ export const ChatInput: FC = () => { dispatch({ type: "setMediaDialogState", state: "open" }); } + const PromptInputButtons = state?.provider?.PromptInputButtons?.bind(state.provider) ?? null; // bind `this`; return (
{ @@ -108,6 +109,7 @@ export const ChatInput: FC = () => { )}
+ {PromptInputButtons && } + ))} +
+
+ {!SelectedTaskComponent ? ( +
Choose a task to see more details
+ ) : ( + + )} +
+ + + + + ); + } + setup_model(provider: Provider, token: string, modelName: string) { switch (provider) { case "google": { @@ -383,4 +539,434 @@ export class UserTokenProvider extends BaseProvider { ); } + + TaskCreateAnnotation() { + /* eslint-disable react-hooks/rules-of-hooks */ + const { state: pluginState } = usePlugin(); + const [state, setState] = useState<"info" | "processing" | "error">("info"); + const [conversationState, setConversationState] = useState("idle"); + const [isButtonDisabled, setIsButtonDisabled] = useState(true); + const [updateViewerButton, setUpdateViewerButton] = useState<"hidden" | "visible">("hidden"); + const [encodedContentState, setEncodedContentState] = useState(""); + const [errorText, setErrorText] = useState(""); + const [inputValue, setInputValue] = useState(""); + const [messages, setMessages] = useState([]); + /* eslint-enable react-hooks/rules-of-hooks */ + + function encodeContentState(plainContentState: string): string { + const uriEncoded = encodeURIComponent(plainContentState); // using built in function + const base64 = btoa(uriEncoded); // using built in function + const base64url = base64.replace(/\+/g, "-").replace(/\//g, "_"); + const base64urlNoPadding = base64url.replace(/=/g, ""); + return base64urlNoPadding; + } + + const startTask = async () => { + try { + setState("processing"); + setConversationState("assistant_responding"); + + const first_tool_message: Message = { + role: "assistant", + type: "tool-call", + content: { + type: "text", + tool_name: "CreateAnnotation", + content: "Getting current canvas content for transcription.", + }, + }; + setMessages([first_tool_message]); + + // step 1: get the current canvas from the plugin state + const canvas: Canvas = pluginState.vault.serialize( + { + type: "Canvas", + id: pluginState.activeCanvas.id, + }, + serializeConfigPresentation3, + ); + + // step 2: get the first painting from the canvas + const paintings: ContentResource[] = []; + const traverse = new Traverse({ + contentResource: [ + (resource) => { + if (resource.type === "Image") { + paintings.push(resource); + } + }, + ], + }); + traverse.traverseCanvasItems(canvas); + + const painting = paintings[0].id; + + if (!painting) { + throw new Error("No painting found on canvas"); + } + + const width = canvas.width || 0; + const height = canvas.height || 0; + + const second_tool_message: Message = { + role: "assistant", + type: "tool-call", + content: { + type: "text", + tool_name: "CreateAnnotation", + content: "Sending canvas to model.", + }, + }; + setMessages((prevMessages) => [...prevMessages, second_tool_message]); + + // step 3: set up the messages to send to the model + const systemMessage: ModelMessage = { + role: "system", + content: dedent` + You are an AI assistant that helps with creating IIIF annotations. + `, + }; + + const userMessageText = dedent` + ## Context + You will be generating a IIIF annotation for an image based on user input. + + ## Details + Here are some important details to consider when generating the annotation: + + - The "text" field should contain HTML formatted text that will be displayed in the annotation. + - The "language" field should specify the language of the text using a standard language code (e.g., "en" for English). + - The "region" defines the area on the canvas for the annotation so be VERY precise with the coordinates and size. + - The image has a width of ${width} pixels and a height of ${height} pixels. + - Ensure that the x and y coordinates, as well as the width and height of the region, fit within the dimensions of the image. + - The region should be relevant to the user input provided. + + ## Task + Here is the user input for the annotation: + ${inputValue} + Generate text and the region to be used in an annotation for the provided image. + + ## Thinking + Think about the user instructions and the image details carefully before you respond. + + ## Output Format + Provide the response in JSON format as follows: + + { + "text": "

The text for the annotation.

", + "language": string (the language code you are providing the text in, e.g., "en"), + "region": { + "x": number (0 to ${width}), + "y": number (0 to ${height}), + "width": number (0 to ${width}), + "height": number (0 to ${height}) + } + } + + - Do NOT include any extra text outside the JSON object + - Only respond with the JSON object + `; + + const userMessage: ModelMessage = { + role: "user", + content: [ + { + type: "image", + image: painting, + }, + { + type: "text", + text: userMessageText, + }, + ], + }; + + // step 4: call a custom function to generate the response + const result = (await this.#task_generate_response([systemMessage, userMessage])) + .replace("```json", "") + .replace("```", "") + .trim(); + + // step 5: update the messages to show user model call is done + const modelResponseMessage: Message = { + role: "assistant", + type: "tool-call", + content: { + type: "text", + tool_name: "CreateAnnotation", + content: "Parsing model response", + }, + }; + setMessages((prevMessages) => [...prevMessages, modelResponseMessage]); + + // step 6: parse the response + const parsed = JSON.parse(result); + + if ( + !parsed.text || + !parsed.region || + typeof parsed.region.x !== "number" || + typeof parsed.region.y !== "number" || + typeof parsed.region.width !== "number" || + typeof parsed.region.height !== "number" + ) { + throw new Error("Invalid response format from model"); + } + + // step 7: create the annotation for the canvas and encode it + const annotation = { + "@context": "http://iiif.io/api/presentation/3/context.json", + id: "https://example.org/import/1", + type: "Annotation", + motivation: ["contentState"], + target: { + id: `${canvas.id}#xywh=${parsed.region.x},${parsed.region.y},${parsed.region.width},${parsed.region.height}`, + type: "Canvas", + partOf: [ + { + id: pluginState.manifest.id, + type: "Manifest", + }, + ], + }, + body: { + type: "TextualBody", + value: parsed.text, + format: "text/html", + language: [parsed.language || "en"], + }, + }; + + const contentState = encodeContentState(JSON.stringify(annotation)); + + setEncodedContentState(contentState); + + const contentStateResponse: Message = { + role: "assistant", + type: "response", + content: { + type: "text", + content: this.viewer_iiif_content_callback + ? "Annotation created successfully. Click the button below to update the viewer." + : `Annotation created successfully. Here is the encoded Content State annotation:\n${contentState}`, + }, + }; + setMessages((prevMessages) => [...prevMessages, contentStateResponse]); + setConversationState("idle"); + setUpdateViewerButton("visible"); + } catch (error) { + console.error(error); // eslint-disable-line no-console + setErrorText(error instanceof Error ? error.message : "An unknown error occurred."); + setState("error"); + } + }; + + if (state === "info") { + return ( + <> +
+ Create Annotation +

+ This task will create a IIIF annotation for the cuurent canvas based on the provided + user input. +

+
+ { + setInputValue(currentTarget.value.trim()); + if (currentTarget.value.trim()) { + setIsButtonDisabled(false); + } else { + setIsButtonDisabled(true); + } + }} + /> + + + ); + } + + if (state === "error") { + return ( + <> +
+ Error +

There was an error processing the transcription task:

+
+
{errorText}
+ + + ); + } + + return ( + <> + + {conversationState === "assistant_responding" ? ( +
Processing...
+ ) : null} + {updateViewerButton === "visible" && this.viewer_iiif_content_callback ? ( + + ) : null} + + ); + } + + TaskTranscribeCanvas() { + /* eslint-disable react-hooks/rules-of-hooks */ + const { state: pluginState } = usePlugin(); + const [state, setState] = useState<"info" | "processing" | "error">("info"); + const [errorText, setErrorText] = useState(""); + const [inputValue, setInputValue] = useState(""); + const [messages, setMessages] = useState([]); + /* eslint-enable react-hooks/rules-of-hooks */ + + const startTask = async () => { + try { + setState("processing"); + + const first_tool_message: Message = { + role: "assistant", + type: "tool-call", + content: { + type: "text", + tool_name: "TranscribeCanvas", + content: "Getting current canvas content for transcription.", + }, + }; + setMessages([first_tool_message]); + + // step 1: get the current canvas from the plugin state + const canvas: Canvas = pluginState.vault.serialize( + { + type: "Canvas", + id: pluginState.activeCanvas.id, + }, + serializeConfigPresentation3, + ); + + // step 2: get the first painting from the canvas + const paintings: ContentResource[] = []; + const traverse = new Traverse({ + contentResource: [ + (resource) => { + if (resource.type === "Image") { + paintings.push(resource); + } + }, + ], + }); + traverse.traverseCanvasItems(canvas); + + const painting = paintings[0].id; + + if (!painting) { + throw new Error("No painting found on canvas"); + } + + const second_tool_message: Message = { + role: "assistant", + type: "tool-call", + content: { + type: "text", + tool_name: "TranscribeCanvas", + content: "Sending painting for transcription.", + }, + }; + setMessages((prevMessages) => [...prevMessages, second_tool_message]); + + // step 3: set up the messages to send to the model + const systemMessage: ModelMessage = { + role: "system", + content: dedent` + You are an AI assistant that transcribes text from images, providing detailed and accurate transcriptions. + `, + }; + + const userMessageText = dedent` + Please transcribe any text you can find in the provided image. + Provide the transcription in a clear and structured format. + ${inputValue ? `Here is some additional context to consider: ${inputValue}` : ""} + `; + const userMessage: ModelMessage = { + role: "user", + content: [ + { + type: "image", + image: painting, + }, + { + type: "text", + text: userMessageText, + }, + ], + }; + + // step 4: call a custom function to generate the response + const result = await this.#task_generate_response([systemMessage, userMessage]); + + // step 5: show the response in the messages container + const assistantMessage: Message = { + role: "assistant", + type: "response", + content: { + type: "text", + content: result, + }, + }; + setMessages((prevMessages) => [...prevMessages, assistantMessage]); + } catch (error) { + console.error(error); // eslint-disable-line no-console + setErrorText(error instanceof Error ? error.message : "An unknown error occurred."); + setState("error"); + } + }; + + if (state === "info") { + return ( + <> +
+ Transcribe Text from Canvas +

+ This task will analyze the current canvas and attempt to transcribe any text it finds + within the image. +

+
+ { + setInputValue(currentTarget.value.trim()); + }} + /> + + + ); + } + + if (state === "error") { + return ( + <> +
+ Error +

There was an error processing the transcription task:

+
+
{errorText}
+ + ); + } + + return ; + } } diff --git a/src/providers/userTokenProvider/style.module.css b/src/providers/userTokenProvider/style.module.css index 817cc78..9d90661 100644 --- a/src/providers/userTokenProvider/style.module.css +++ b/src/providers/userTokenProvider/style.module.css @@ -8,3 +8,45 @@ display: flex; gap: var(--clover-ai-sizes-2); } + +.tasksContainer { + display: grid; + grid-template-columns: auto 1fr; +} + +.tasksList { + display: flex; + flex-direction: column; + gap: var(--clover-ai-space-2); + padding-inline-end: var(--clover-ai-space-4); +} + +.taskDescription { + margin-top: 0; + margin-bottom: 0; + color: var(--clover-ai-colors-text-secondary); + font-style: italic; +} + +.taskDetails { + display: flex; + flex-direction: column; + gap: var(--clover-ai-space-4); + border-left: 1px solid var(--clover-ai-colors-primary); + padding-inline: var(--clover-ai-space-4); + overflow-wrap: anywhere; + + * { + margin: 0; + } +} + +.taskPlaceholder { + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; + height: 100%; + color: var(--clover-ai-colors-text-secondary); + font-style: italic; +} From d80800d3680229e7e9c9e89d508ab688e5b433be Mon Sep 17 00:00:00 2001 From: charlesLoder Date: Thu, 20 Nov 2025 12:47:37 -0500 Subject: [PATCH 09/10] Update docs --- stories/docs/creating-a-provider.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stories/docs/creating-a-provider.mdx b/stories/docs/creating-a-provider.mdx index 21f4c62..11c91c7 100644 --- a/stories/docs/creating-a-provider.mdx +++ b/stories/docs/creating-a-provider.mdx @@ -74,7 +74,7 @@ export class MyCustomProvider extends BaseProvider {
Welcome to My Custom Provider

Please acknowledge that you won't use this tool for evil.

-
@@ -131,7 +131,7 @@ export class MyCustomProvider extends BaseProvider { ); } - async send_messages(messages: Message[], conversationHistory: Message[]): Promise { + async generate_response(messages: Message[], conversationHistory: Message[]): Promise { this.set_conversation_state("assistant_responding"); const mockWebSocket = new WebSocket("ws://mock-websocket-server"); @@ -163,7 +163,7 @@ export class MyCustomProvider extends BaseProvider { } ``` -In this step, we implement the `send_messages` method, which is responsible for sending messages to the mock LLM provider. +In this step, we implement the `generate_response` method, which is responsible for sending messages to the mock LLM provider. First, we set the conversation state to `"assistant_responding"` to indicate that the assistant is processing the request. From 76420c42cfa68618457230bf724644fb11b20a98 Mon Sep 17 00:00:00 2001 From: Charles Loder Date: Thu, 20 Nov 2025 12:52:56 -0500 Subject: [PATCH 10/10] Fix typo Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/providers/userTokenProvider/index.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/providers/userTokenProvider/index.tsx b/src/providers/userTokenProvider/index.tsx index 35377ed..16d5463 100644 --- a/src/providers/userTokenProvider/index.tsx +++ b/src/providers/userTokenProvider/index.tsx @@ -767,7 +767,7 @@ export class UserTokenProvider extends BaseProvider {
Create Annotation

- This task will create a IIIF annotation for the cuurent canvas based on the provided + This task will create a IIIF annotation for the current canvas based on the provided user input.