add more features

This commit is contained in:
ropzislaw
2026-02-15 20:25:18 +08:00
committed by GitHub
89 changed files with 9259 additions and 378 deletions

View File

@@ -1,6 +1,6 @@
{
"root": true,
"$schema": "https://biomejs.dev/schemas/2.3.13/schema.json",
"$schema": "https://biomejs.dev/schemas/2.3.14/schema.json",
"formatter": {
"enabled": true,
"indentStyle": "space",

View File

@@ -1,5 +1,11 @@
import type { AgentEvent } from "@aipexstudio/aipex-core";
import { generateId } from "@aipexstudio/aipex-core";
import { ScreenshotStorage } from "../lib/screenshot-storage";
import {
extractScreenshotFromToolResult,
isCaptureScreenshotTool,
type ScreenshotExtraction,
} from "../lib/screenshot-utils";
import type {
ChatAdapterOptions,
ChatAdapterState,
@@ -412,6 +418,15 @@ export class ChatAdapter {
return;
}
// Extract screenshot data from screenshot tools
if (isCaptureScreenshotTool(toolName)) {
const screenshotInfo = extractScreenshotFromToolResult(toolName, result);
if (screenshotInfo) {
this.applyScreenshotToolResult(callId, result, screenshotInfo);
return;
}
}
this.updateToolPart(callId, (toolPart) => ({
...toolPart,
state: "completed",
@@ -419,6 +434,57 @@ export class ChatAdapter {
}));
}
/**
* Handle a completed screenshot tool result.
*
* Uses the tool-provided screenshotUid (the tool already saved to IndexedDB)
* rather than generating a new one. Falls back to UI-side storage only if
* screenshotUid is missing (e.g., IndexedDB save failed in the tool).
*/
private applyScreenshotToolResult(
callId: string,
result: unknown,
info: ScreenshotExtraction,
): void {
if (info.screenshotUid) {
// Tool already saved to IndexedDB — use its uid directly
this.updateToolPart(callId, (toolPart) => ({
...toolPart,
state: "completed",
output: result,
screenshotUid: info.screenshotUid!,
// Keep inline screenshot for immediate rendering if base64 is present
...(info.imageData ? { screenshot: info.imageData } : {}),
}));
} else if (info.imageData) {
// Fallback: tool didn't provide a uid (storage failure) — save in UI
this.updateToolPart(callId, (toolPart) => ({
...toolPart,
state: "completed",
output: result,
screenshot: info.imageData!,
}));
ScreenshotStorage.saveScreenshot(info.imageData)
.then((uid) => {
this.updateToolPart(callId, (toolPart) => ({
...toolPart,
screenshotUid: uid,
}));
})
.catch(() => {
// Storage failed — screenshot still visible via inline data
});
} else {
// No image data at all (sendToLLM=false path) — just complete
this.updateToolPart(callId, (toolPart) => ({
...toolPart,
state: "completed",
output: result,
...(info.screenshotUid ? { screenshotUid: info.screenshotUid } : {}),
}));
}
}
/**
* Check if a tool result indicates a business-level failure.
* Many tools return { success: false, error: "..." } instead of throwing.

View File

@@ -12,6 +12,7 @@ import {
Loader2Icon,
PaperclipIcon,
PlusIcon,
PuzzleIcon,
SendIcon,
SquareIcon,
XIcon,
@@ -211,6 +212,35 @@ export const usePromptInputContexts = () => {
return context;
};
// ============ Skill Items Context ============
export type SkillItem = {
id: string;
name: string;
description?: string;
};
type SkillItemsContext = {
items: SkillItem[];
add: (item: SkillItem) => void;
remove: (id: string) => void;
clear: () => void;
availableSkills: SkillItem[];
setAvailableSkills: (items: SkillItem[]) => void;
};
const SkillItemsContext = createContext<SkillItemsContext | null>(null);
export const usePromptInputSkills = () => {
const context = useContext(SkillItemsContext);
if (!context) {
throw new Error("usePromptInputSkills must be used within a PromptInput");
}
return context;
};
export type PromptInputAttachmentProps = HTMLAttributes<HTMLDivElement> & {
data: FileUIPart & { id: string };
className?: string;
@@ -421,10 +451,122 @@ export function PromptInputContextTags({
);
}
// ============ Skill Items Components ============
export type PromptInputSkillTagProps = HTMLAttributes<HTMLDivElement> & {
data: SkillItem;
className?: string;
};
export function PromptInputSkillTag({
data,
className,
...props
}: PromptInputSkillTagProps) {
const skills = usePromptInputSkills();
const handleLabelClick = () => {
// Open options page with skills tab
if (typeof chrome !== "undefined" && chrome.tabs?.create) {
const skillParam = encodeURIComponent(data.name.slice(0, 200));
chrome.tabs.create({
url: chrome.runtime.getURL(
`src/pages/options/index.html?tab=skills&skill=${skillParam}`,
),
});
}
};
return (
<div
className={cn(
"group inline-flex items-center gap-1.5 px-2 py-1 text-sm rounded-md",
"bg-primary/10 hover:bg-primary/20 transition-colors",
"border border-primary/30",
className,
)}
{...props}
>
<span className="text-primary">
<PuzzleIcon className="size-4" />
</span>
<button
type="button"
className="max-w-[200px] truncate cursor-pointer hover:underline text-primary bg-transparent border-none p-0 font-inherit text-left"
onClick={handleLabelClick}
title="Click to open skill settings"
>
{data.name}
</button>
<Button
aria-label="Remove skill"
className="h-4 w-4 p-0 opacity-0 group-hover:opacity-100 transition-opacity"
onClick={() => skills.remove(data.id)}
size="icon"
type="button"
variant="ghost"
>
<XIcon className="h-3 w-3" />
</Button>
</div>
);
}
export type PromptInputSkillTagsProps = Omit<
HTMLAttributes<HTMLDivElement>,
"children"
> & {
children?: (item: SkillItem) => ReactNode;
};
export function PromptInputSkillTags({
className,
children,
...props
}: PromptInputSkillTagsProps) {
const skills = usePromptInputSkills();
const [height, setHeight] = useState(0);
const contentRef = useRef<HTMLDivElement>(null);
useLayoutEffect(() => {
const el = contentRef.current;
if (!el) {
return;
}
const ro = new ResizeObserver(() => {
setHeight(el.getBoundingClientRect().height);
});
ro.observe(el);
setHeight(el.getBoundingClientRect().height);
return () => ro.disconnect();
}, []);
return (
<div
aria-live="polite"
className={cn(
"overflow-hidden transition-[height] duration-200 ease-out",
className,
)}
style={{ height: skills.items.length ? height : 0 }}
{...props}
>
<div className="flex flex-wrap gap-2 p-3 pb-0" ref={contentRef}>
{skills.items.map((item) => (
<Fragment key={item.id}>
{children ? children(item) : <PromptInputSkillTag data={item} />}
</Fragment>
))}
</div>
</div>
);
}
export type PromptInputMessage = {
text?: string;
files?: FileUIPart[];
contexts?: ContextItem[];
skills?: SkillItem[];
};
export type PromptInputProps = Omit<
@@ -466,6 +608,8 @@ export const PromptInput = ({
const [items, setItems] = useState<(FileUIPart & { id: string })[]>([]);
const [contextItems, setContextItems] = useState<ContextItem[]>([]);
const [availableContexts, setAvailableContexts] = useState<ContextItem[]>([]);
const [skillItems, setSkillItems] = useState<SkillItem[]>([]);
const [availableSkills, setAvailableSkills] = useState<SkillItem[]>([]);
const inputRef = useRef<HTMLInputElement | null>(null);
const anchorRef = useRef<HTMLSpanElement>(null);
const formRef = useRef<HTMLFormElement | null>(null);
@@ -586,6 +730,25 @@ export const PromptInput = ({
setContextItems([]);
}, []);
// Skill management callbacks
const addSkill = useCallback((skill: SkillItem) => {
setSkillItems((prev) => {
// Avoid duplicates
if (prev.some((item) => item.id === skill.id)) {
return prev;
}
return [...prev, skill];
});
}, []);
const removeSkill = useCallback((id: string) => {
setSkillItems((prev) => prev.filter((item) => item.id !== id));
}, []);
const clearSkills = useCallback(() => {
setSkillItems([]);
}, []);
// Note: File input cannot be programmatically set for security reasons
// The syncHiddenInput prop is no longer functional
useEffect(() => {
@@ -684,9 +847,13 @@ export const PromptInput = ({
return item;
}),
).then((files: FileUIPart[]) => {
onSubmit({ text, files, contexts: contextItems }, event);
onSubmit(
{ text, files, contexts: contextItems, skills: skillItems },
event,
);
clear();
clearContexts();
clearSkills();
});
};
@@ -714,28 +881,42 @@ export const PromptInput = ({
[contextItems, addContext, removeContext, clearContexts, availableContexts],
);
const skillsCtx = useMemo<SkillItemsContext>(
() => ({
items: skillItems,
add: addSkill,
remove: removeSkill,
clear: clearSkills,
availableSkills,
setAvailableSkills,
}),
[skillItems, addSkill, removeSkill, clearSkills, availableSkills],
);
return (
<AttachmentsContext.Provider value={ctx}>
<ContextItemsContext.Provider value={contextsCtx}>
<span aria-hidden="true" className="hidden" ref={anchorRef} />
<input
accept={accept}
className="hidden"
multiple={multiple}
onChange={handleChange}
ref={inputRef}
type="file"
/>
<form
className={cn(
"w-full divide-y overflow-hidden rounded-xl border bg-background shadow-sm",
className,
)}
onSubmit={handleSubmit}
{...props}
>
{children}
</form>
<SkillItemsContext.Provider value={skillsCtx}>
<span aria-hidden="true" className="hidden" ref={anchorRef} />
<input
accept={accept}
className="hidden"
multiple={multiple}
onChange={handleChange}
ref={inputRef}
type="file"
/>
<form
className={cn(
"w-full divide-y overflow-hidden rounded-xl border bg-background shadow-sm",
className,
)}
onSubmit={handleSubmit}
{...props}
>
{children}
</form>
</SkillItemsContext.Provider>
</ContextItemsContext.Provider>
</AttachmentsContext.Provider>
);
@@ -781,6 +962,7 @@ export const PromptInputTextarea = ({
}: PromptInputTextareaProps) => {
const attachments = usePromptInputAttachments();
const contexts = usePromptInputContexts();
const skills = usePromptInputSkills();
const [isFocused, setIsFocused] = useState(false);
const [hasValue, setHasValue] = useState(false);
const [showContextMenu, setShowContextMenu] = useState(false);
@@ -796,6 +978,13 @@ export const PromptInputTextarea = ({
const scrollContainerRef = useRef<HTMLDivElement>(null);
const selectedItemRef = useRef<HTMLButtonElement>(null);
// Skill slash command state
const [showSkillMenu, setShowSkillMenu] = useState(false);
const [slashSearchQuery, setSlashSearchQuery] = useState("");
const [slashPosition, setSlashPosition] = useState<number | null>(null);
const [selectedSkillIndex, setSelectedSkillIndex] = useState(0);
const selectedSkillItemRef = useRef<HTMLButtonElement>(null);
// Sync hasValue with external value prop (for controlled components)
useEffect(() => {
setHasValue(!!props.value);
@@ -820,6 +1009,40 @@ export const PromptInputTextarea = ({
: placeholder;
const handleKeyDown: KeyboardEventHandler<HTMLTextAreaElement> = (e) => {
// Handle skill menu navigation
if (showSkillMenu && filteredSkills.length > 0) {
if (e.key === "ArrowDown") {
e.preventDefault();
setSelectedSkillIndex((prev) => (prev + 1) % filteredSkills.length);
return;
}
if (e.key === "ArrowUp") {
e.preventDefault();
setSelectedSkillIndex(
(prev) => (prev - 1 + filteredSkills.length) % filteredSkills.length,
);
return;
}
if (e.key === "Enter" && !e.shiftKey) {
e.preventDefault();
const selectedSkill = filteredSkills[selectedSkillIndex];
if (selectedSkill) {
handleSkillSelect(selectedSkill);
}
return;
}
if (e.key === "Escape") {
e.preventDefault();
setShowSkillMenu(false);
setSlashSearchQuery("");
setSlashPosition(null);
return;
}
}
// Handle context menu navigation
if (showContextMenu && filteredContexts.length > 0) {
if (e.key === "ArrowDown") {
@@ -917,6 +1140,50 @@ export const PromptInputTextarea = ({
[atPosition, contexts, props.value, onChange],
);
// Handle skill selection
const handleSkillSelect = useCallback(
(skill: SkillItem) => {
if (!textareaRef.current || slashPosition === null) return;
// Add skill to the skills context
skills.add({
id: skill.id,
name: skill.name,
description: skill.description,
});
// Remove /xxx from textarea (similar to @ handling)
const currentValue = String(props.value || "");
const beforeSlash = currentValue.slice(0, slashPosition);
const afterSearch = currentValue.slice(
textareaRef.current.selectionStart,
);
const newValue = beforeSlash + afterSearch;
// Trigger onChange with new value
const syntheticEvent = {
target: { value: newValue },
currentTarget: { value: newValue },
} as ChangeEvent<HTMLTextAreaElement>;
onChange?.(syntheticEvent);
// Close menu
setShowSkillMenu(false);
setSlashSearchQuery("");
setSlashPosition(null);
// Refocus textarea
setTimeout(() => {
if (textareaRef.current) {
textareaRef.current.focus();
textareaRef.current.selectionStart = beforeSlash.length;
textareaRef.current.selectionEnd = beforeSlash.length;
}
}, 0);
},
[slashPosition, skills, props.value, onChange],
);
const handlePaste: ClipboardEventHandler<HTMLTextAreaElement> = (event) => {
const items = event.clipboardData?.items;
@@ -964,6 +1231,19 @@ export const PromptInputTextarea = ({
setAtPosition(null);
}
// Detect / command for skills
const slashMatch = beforeCursor.match(/\/([^\s]*)$/);
if (slashMatch) {
const slashQuery = slashMatch[1]; // Text after /
setSlashPosition(beforeCursor.lastIndexOf("/"));
setSlashSearchQuery(slashQuery ?? "");
setShowSkillMenu(true);
} else {
setShowSkillMenu(false);
setSlashSearchQuery("");
setSlashPosition(null);
}
onChange?.(e);
};
@@ -1011,11 +1291,43 @@ export const PromptInputTextarea = ({
});
}, [contexts.availableContexts, searchQuery]);
// Filter skills based on search query with fuzzy matching
const filteredSkills = useMemo(() => {
if (!slashSearchQuery) return skills.availableSkills;
const query = slashSearchQuery.toLowerCase();
return skills.availableSkills.filter((skill) => {
// Match against name
if (skill.name.toLowerCase().includes(query)) return true;
// Match against description
if (skill.description?.toLowerCase().includes(query)) return true;
// Fuzzy match: check if query characters appear in order in name
const nameLower = skill.name.toLowerCase();
let queryIndex = 0;
for (let i = 0; i < nameLower.length && queryIndex < query.length; i++) {
if (nameLower[i] === query[queryIndex]) {
queryIndex++;
}
}
if (queryIndex === query.length) return true;
return false;
});
}, [skills.availableSkills, slashSearchQuery]);
// Reset selected index when filtered contexts change
useEffect(() => {
setSelectedIndex(filteredContexts.length ? 0 : -1);
}, [filteredContexts]);
// Reset selected skill index when filtered skills change
useEffect(() => {
setSelectedSkillIndex(0);
}, []);
// Auto-scroll to selected item when navigating with keyboard
useEffect(() => {
if (selectedIndex < 0) {
@@ -1030,10 +1342,20 @@ export const PromptInputTextarea = ({
}
}, [selectedIndex]);
// Calculate menu position when showing context menu
// Auto-scroll to selected skill item when navigating with keyboard
useEffect(() => {
if (selectedSkillItemRef.current) {
selectedSkillItemRef.current.scrollIntoView({
block: "nearest",
behavior: "smooth",
});
}
}, []);
// Calculate menu position when showing context menu or skill menu
useEffect(() => {
const updatePosition = () => {
if (showContextMenu && textareaRef.current) {
if ((showContextMenu || showSkillMenu) && textareaRef.current) {
const rect = textareaRef.current.getBoundingClientRect();
const windowHeight = window.innerHeight;
setMenuPosition({
@@ -1047,7 +1369,7 @@ export const PromptInputTextarea = ({
updatePosition();
// Update position on scroll and resize
if (!showContextMenu) {
if (!showContextMenu && !showSkillMenu) {
return;
}
@@ -1058,7 +1380,7 @@ export const PromptInputTextarea = ({
window.removeEventListener("scroll", updatePosition, true);
window.removeEventListener("resize", updatePosition);
};
}, [showContextMenu]);
}, [showContextMenu, showSkillMenu]);
return (
<div className="relative">
@@ -1144,6 +1466,76 @@ export const PromptInputTextarea = ({
</div>,
document.body,
)}
{/* Skill Command Menu - Portal Implementation */}
{showSkillMenu &&
filteredSkills.length > 0 &&
typeof document !== "undefined" &&
createPortal(
<div
className="fixed z-[9999] animate-in fade-in-0 zoom-in-95 slide-in-from-bottom-2"
style={{
bottom: `${menuPosition.bottom}px`,
left: `${menuPosition.left}px`,
width: `${menuPosition.width}px`,
}}
>
<div className="bg-popover border rounded-lg shadow-xl max-h-[400px] overflow-hidden mb-2">
{/* Search hint */}
<div className="px-3 py-2 text-xs text-muted-foreground border-b bg-muted/50">
{slashSearchQuery ? (
<>
Searching skills:{" "}
<span className="font-medium">/{slashSearchQuery}</span>
{filteredSkills.length > 0 && (
<span className="ml-2">
({filteredSkills.length} found)
</span>
)}
</>
) : (
<span>{filteredSkills.length} skills available</span>
)}
</div>
{/* Skills Results */}
<div className="max-h-[350px] overflow-y-auto">
{filteredSkills.map((skill, index) => (
<button
key={skill.id}
ref={
index === selectedSkillIndex ? selectedSkillItemRef : null
}
type="button"
className={cn(
"w-full flex flex-col gap-1 px-3 py-2 text-sm transition-colors text-left min-w-0 border-b last:border-b-0",
index === selectedSkillIndex
? "bg-accent text-accent-foreground"
: "hover:bg-accent/50",
)}
onClick={() => handleSkillSelect(skill)}
onMouseEnter={() => setSelectedSkillIndex(index)}
>
<div className="flex items-center gap-2 w-full">
<span className="font-medium truncate flex-1 min-w-0">
{skill.name}
</span>
<span className="text-xs text-muted-foreground shrink-0 px-1.5 py-0.5 rounded bg-background/50">
skill
</span>
</div>
{skill.description && (
<span className="text-xs text-muted-foreground line-clamp-2">
{skill.description}
</span>
)}
</button>
))}
</div>
</div>
</div>,
document.body,
)}
</div>
);
};

View File

@@ -9,7 +9,13 @@ import {
WrenchIcon,
XCircleIcon,
} from "lucide-react";
import type { ComponentProps, ReactNode } from "react";
import {
type ComponentProps,
type ReactNode,
useEffect,
useState,
} from "react";
import { ScreenshotStorage } from "../../lib/screenshot-storage";
import { cn } from "../../lib/utils";
import { Badge } from "../ui/badge";
import {
@@ -29,7 +35,8 @@ export const Tool = ({ className, ...props }: ToolProps) => (
);
export type ToolHeaderProps = {
type: ToolUIPart["type"];
/** Display label for the tool either a raw `tool-${name}` key or a translated name */
type: string;
state: ToolUIPart["state"] | "executing";
className?: string;
};
@@ -154,3 +161,76 @@ export const ToolOutput = ({
</div>
);
};
// ============ Screenshot Display ============
export type ToolScreenshotProps = ComponentProps<"div"> & {
/** Inline base64 screenshot data URL */
screenshot?: string;
/** UID referencing a screenshot stored in ScreenshotStorage (IndexedDB) */
screenshotUid?: string;
};
/**
* ToolScreenshot renders a screenshot captured by a tool.
* Supports both inline base64 data and IndexedDB uid references.
*/
export const ToolScreenshot = ({
className,
screenshot,
screenshotUid,
...props
}: ToolScreenshotProps) => {
const [imageData, setImageData] = useState<string | null>(screenshot ?? null);
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
useEffect(() => {
// Prefer inline screenshot
if (screenshot) {
setImageData(screenshot);
return;
}
// Load from IndexedDB by uid
if (screenshotUid) {
setLoading(true);
setError(null);
ScreenshotStorage.getScreenshot(screenshotUid)
.then((data) => {
setImageData(data);
if (!data) setError("Screenshot not found");
})
.catch(() => {
setError("Failed to load screenshot");
})
.finally(() => {
setLoading(false);
});
}
}, [screenshot, screenshotUid]);
if (!screenshot && !screenshotUid) return null;
return (
<div className={cn("space-y-2 p-4", className)} {...props}>
<h4 className="font-medium text-muted-foreground text-xs uppercase tracking-wide">
Screenshot
</h4>
{loading ? (
<div className="flex items-center gap-2 text-muted-foreground text-sm">
<ClockIcon className="size-4 animate-spin" />
<span>Loading screenshot...</span>
</div>
) : error ? (
<div className="text-destructive text-sm">{error}</div>
) : imageData ? (
<img
src={imageData}
alt="Screenshot"
className="cursor-pointer rounded-md max-w-full"
/>
) : null}
</div>
);
};

View File

@@ -0,0 +1,139 @@
import type React from "react";
import { buildWebsiteUrl } from "../../../lib/config/website.js";
interface BuyTokenPromptProps {
onBuyTokens?: () => void;
currentCredits?: number;
requiredCredits?: number;
pricingUrl?: string;
}
// Default translations for buy token prompt
const defaultTranslations = {
"buyTokenPrompt.title": "Insufficient Credits",
"buyTokenPrompt.description":
"You need more credits to continue. Purchase credits to keep using the AI assistant.",
"buyTokenPrompt.buyButton": "Buy Credits",
"buyTokenPrompt.viewPricing": "View Pricing",
"buyTokenPrompt.helpText": "Credits are used for AI model usage costs.",
};
export const BuyTokenPrompt: React.FC<BuyTokenPromptProps> = ({
onBuyTokens,
currentCredits,
requiredCredits,
pricingUrl = buildWebsiteUrl("/pricing"),
}) => {
// Simple translation function
const t = (key: string): string => {
return defaultTranslations[key as keyof typeof defaultTranslations] || key;
};
const handleBuyTokens = () => {
if (onBuyTokens) {
onBuyTokens();
} else {
// Default action: open ClaudeChrome website
window.open(pricingUrl, "_blank");
}
};
const handleViewPricing = () => {
window.open(pricingUrl, "_blank");
};
return (
<div className="bg-orange-50 border border-orange-200 rounded-lg p-4 mb-4">
<div className="flex items-start">
<div className="flex-shrink-0">
<svg
className="h-5 w-5 text-orange-400"
viewBox="0 0 20 20"
fill="currentColor"
aria-hidden="true"
>
<path
fillRule="evenodd"
d="M8.257 3.099c.765-1.36 2.722-1.36 3.486 0l5.58 9.92c.75 1.334-.213 2.98-1.742 2.98H4.42c-1.53 0-2.493-1.646-1.743-2.98l5.58-9.92zM11 13a1 1 0 11-2 0 1 1 0 012 0zm-1-8a1 1 0 00-1 1v3a1 1 0 002 0V6a1 1 0 00-1-1z"
clipRule="evenodd"
/>
</svg>
</div>
<div className="ml-3 flex-1">
<h3 className="text-sm font-medium text-orange-800">
{t("buyTokenPrompt.title")}
</h3>
<div className="mt-2 text-sm text-orange-700">
<p className="mb-3">{t("buyTokenPrompt.description")}</p>
{/* Credits information */}
{(currentCredits !== undefined ||
requiredCredits !== undefined) && (
<div className="mb-3 p-3 bg-orange-100 rounded-md">
<div className="flex items-center justify-between text-sm">
<span className="font-medium">Current Credits:</span>
<span className="font-mono">{currentCredits || 0}</span>
</div>
{requiredCredits && (
<div className="flex items-center justify-between text-sm mt-1">
<span className="font-medium">Required:</span>
<span className="font-mono">{requiredCredits}</span>
</div>
)}
</div>
)}
<div className="flex space-x-3">
<button
type="button"
onClick={handleBuyTokens}
className="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md text-white bg-orange-600 hover:bg-orange-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-orange-500 transition-colors"
>
<svg
className="w-4 h-4 mr-2"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
aria-hidden="true"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M12 8c-1.657 0-3 .895-3 2s1.343 2 3 2 3 .895 3 2-1.343 2-3 2m0-8c1.11 0 2.08.402 2.599 1M12 8V7m0 1v8m0 0v1m0-1c-1.11 0-2.08-.402-2.599-1"
/>
</svg>
{t("buyTokenPrompt.buyButton")}
</button>
<button
type="button"
onClick={handleViewPricing}
className="inline-flex items-center px-3 py-2 border border-orange-300 text-sm font-medium rounded-md text-orange-700 bg-white hover:bg-orange-50 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-orange-500 transition-colors"
>
<svg
className="w-4 h-4 mr-2"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
aria-hidden="true"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z"
/>
</svg>
{t("buyTokenPrompt.viewPricing")}
</button>
</div>
<div className="mt-3 text-xs text-orange-600">
{t("buyTokenPrompt.helpText")}
</div>
</div>
</div>
</div>
</div>
);
};

View File

@@ -104,7 +104,7 @@ describe("Chatbot Component", () => {
it("should render welcome screen when no messages", async () => {
await renderWithAct(<Chatbot agent={mockAgent} />);
expect(screen.getByText("Welcome to AIPex")).toBeInTheDocument();
expect(screen.getByText("Welcome to AIpex")).toBeInTheDocument();
});
});
@@ -233,14 +233,14 @@ describe("Chatbot Component", () => {
fireEvent.click(newChatButton);
// Verify the chat was reset (no messages)
expect(screen.getByText("Welcome to AIPex")).toBeInTheDocument();
expect(screen.getByText("Welcome to AIpex")).toBeInTheDocument();
});
it("should send suggestion text when welcome suggestion is clicked", async () => {
await renderWithAct(<Chatbot agent={mockAgent} />);
const suggestion = screen.getByText(
"Help me organize my browser tabs by topic",
"Please organize my open tabs by topic and purpose",
);
fireEvent.click(suggestion);
@@ -408,7 +408,7 @@ describe("Chatbot State Management", () => {
await renderWithAct(<Chatbot agent={mockAgent} />);
// Initially should show welcome screen
expect(screen.getByText("Welcome to AIPex")).toBeInTheDocument();
expect(screen.getByText("Welcome to AIpex")).toBeInTheDocument();
});
it("should preserve state across re-renders", async () => {
@@ -429,7 +429,7 @@ describe("Chatbot State Management", () => {
});
// Should still show the same content
expect(screen.getByText("Welcome to AIPex")).toBeInTheDocument();
expect(screen.getByText("Welcome to AIpex")).toBeInTheDocument();
});
});

View File

@@ -1,5 +1,6 @@
import { useCallback, useContext, useMemo, useState } from "react";
import { useChat, useChatConfig } from "../../../hooks";
import { useTranslation } from "../../../i18n/context";
import { cn } from "../../../lib/utils";
import type { ChatbotThemeVariables, ContextItem } from "../../../types";
import { DEFAULT_MODELS } from "../constants";
@@ -15,6 +16,10 @@ import { ConfigurationGuide } from "./configuration-guide";
import { Header } from "./header";
import { InputArea } from "./input-area";
import { MessageList } from "./message-list";
import {
type UxAuditFormData,
UxAuditGoalDialog,
} from "./ux-audit-goal-dialog";
/**
* Convert theme variables to CSS style object
@@ -139,6 +144,8 @@ export interface ChatbotProps extends Omit<ChatbotProviderProps, "children"> {
placeholderTexts?: string[];
/** Header title */
title?: string;
/** Initial input value to pre-fill the text area */
initialInput?: string;
}
/**
@@ -177,6 +184,7 @@ export function Chatbot({
models = DEFAULT_MODELS,
placeholderTexts,
title = "AIPex",
initialInput,
}: ChatbotProps) {
return (
<ChatbotProvider
@@ -195,6 +203,7 @@ export function Chatbot({
models={models}
placeholderTexts={placeholderTexts}
title={title}
initialInput={initialInput}
/>
</ChatbotProvider>
);
@@ -207,10 +216,12 @@ function ChatbotContent({
models,
placeholderTexts,
title,
initialInput: initialInputProp,
}: {
models: Array<{ name: string; value: string }>;
placeholderTexts?: string[];
title: string;
initialInput?: string;
}) {
const themeCtx = useContext(ThemeContext);
const chatCtx = useContext(ChatContext);
@@ -221,8 +232,10 @@ function ChatbotContent({
chatCtx || {};
const { isReady: isAgentReady } = agentCtx || {};
const [input, setInput] = useState("");
const { t } = useTranslation();
const [input, setInput] = useState(initialInputProp ?? "");
const [inputResetCount, setInputResetCount] = useState(0);
const [isUxAuditDialogOpen, setIsUxAuditDialogOpen] = useState(false);
const handleSubmit = useCallback(
(text: string, files?: File[], contexts?: ContextItem[]) => {
@@ -239,6 +252,28 @@ function ChatbotContent({
[sendMessage],
);
const handleUxAuditClick = useCallback(() => {
setIsUxAuditDialogOpen(true);
}, []);
const handleUxAuditSubmit = useCallback(
(formData: UxAuditFormData) => {
const platformDisplay = t(`uxAuditGoal.platform.${formData.platform}`);
const targetUsersLine = formData.targetUsers
? `\n**Target Users:** ${formData.targetUsers}`
: "";
const messageText = t("uxAuditGoal.messageTemplate")
.replace("{{url}}", formData.targetLink)
.replace("{{platform}}", platformDisplay)
.replace("{{jtbd}}", formData.jtbd)
.replace("{{targetUsersLine}}", targetUsersLine);
void sendMessage?.(messageText);
},
[t, sendMessage],
);
const handleCopy = useCallback((text: string) => {
navigator.clipboard.writeText(text);
}, []);
@@ -272,6 +307,7 @@ function ChatbotContent({
onRegenerate={regenerate}
onCopy={handleCopy}
onSuggestionClick={handleSuggestion}
onUxAuditClick={handleUxAuditClick}
/>
{/* Input Area */}
@@ -287,6 +323,13 @@ function ChatbotContent({
/>
</>
)}
{/* UX Audit Goal Dialog */}
<UxAuditGoalDialog
open={isUxAuditDialogOpen}
onOpenChange={setIsUxAuditDialogOpen}
onSubmit={handleUxAuditSubmit}
/>
</div>
);
}

View File

@@ -1,6 +1,7 @@
// Main component
export { Chatbot, type ChatbotProps, ChatbotProvider } from "./chatbot";
export { BuyTokenPrompt } from "./buy-token-prompt";
export { Chatbot, type ChatbotProps, ChatbotProvider } from "./chatbot";
// Individual components
export {
ConfigurationGuide,
@@ -12,10 +13,29 @@ export {
type ExtendedInputAreaProps,
InputArea,
} from "./input-area";
export { LoginPrompt } from "./login-prompt";
export { DefaultMessageItem, MessageItem } from "./message-item";
export { DefaultMessageList, MessageList } from "./message-list";
// Mode and state components
export {
type AutomationModeValue,
ModeIndicator,
} from "./mode-indicator";
export {
ModelChangePrompt,
type ModelInfo,
} from "./model-change-prompt";
export {
default as StreamingStateManager,
type StreamChunk,
type StreamingState,
type StreamingStateManagerProps,
useStreamingState,
} from "./streaming-state-manager";
export {
TokenUsageIndicator,
type TokenUsageIndicatorProps,
} from "./token-usage-indicator";
// Prompt components
export { UpdateBanner, type VersionCheckResult } from "./update-banner";
export { DefaultWelcomeScreen, WelcomeScreen } from "./welcome-screen";

View File

@@ -3,6 +3,7 @@ import type { ChatStatus } from "ai";
import { ClockIcon } from "lucide-react";
import { useCallback, useEffect, useMemo, useState } from "react";
import { useTranslation } from "../../../i18n/context";
import { fetchModelsForSelector } from "../../../lib/models";
import { cn } from "../../../lib/utils";
import type { ContextItem, InputAreaProps } from "../../../types";
import {
@@ -22,6 +23,8 @@ import {
PromptInputModelSelectItem,
PromptInputModelSelectTrigger,
PromptInputModelSelectValue,
PromptInputSkillTag,
PromptInputSkillTags,
PromptInputSubmit,
PromptInputTextarea,
PromptInputToolbar,
@@ -31,7 +34,7 @@ import { DEFAULT_MODELS } from "../constants";
import { useComponentsContext, useConfigContext } from "../context";
export interface ExtendedInputAreaProps extends InputAreaProps {
/** Available models for selection */
/** Available models for selection (used as fallback if API fetch fails) */
models?: Array<{ name: string; value: string }>;
/** Placeholder texts for typing animation */
placeholderTexts?: string[];
@@ -58,16 +61,48 @@ export function DefaultInputArea({
}: ExtendedInputAreaProps) {
const { t } = useTranslation();
const { slots } = useComponentsContext();
const { settings } = useConfigContext();
const { settings, updateSetting, updateSettings } = useConfigContext();
const effectivePlaceholder = placeholder ?? t("input.placeholder1");
// Fetch model list from API on mount (self-contained, no prop dependency)
const [fetchedModels, setFetchedModels] = useState<Array<{
name: string;
value: string;
}> | null>(null);
const [isLoadingModels, setIsLoadingModels] = useState(false);
useEffect(() => {
let cancelled = false;
setIsLoadingModels(true);
fetchModelsForSelector()
.then((serverModels) => {
if (!cancelled && serverModels.length > 0) {
setFetchedModels(serverModels);
}
})
.catch(() => {
// Fallback to prop-provided models (used via `models` below)
})
.finally(() => {
if (!cancelled) {
setIsLoadingModels(false);
}
});
return () => {
cancelled = true;
};
}, []);
const enabledCustomModels = useMemo(() => {
if (!settings.byokEnabled) return [] as CustomModelConfig[];
return (settings.customModels ?? []).filter((model) => model.enabled);
}, [settings.byokEnabled, settings.customModels]);
// Compute effective models list - only show enabled custom models when BYOK is enabled
// Compute effective models list:
// 1. BYOK enabled with custom models → show only custom models
// 2. Otherwise → prefer API-fetched models, fall back to prop-provided models
// 3. If current aiModel is not in the list, prepend it as a custom entry
const effectiveModels = useMemo(() => {
if (settings.byokEnabled && enabledCustomModels.length > 0) {
// When BYOK is enabled, only show enabled custom models
@@ -79,9 +114,26 @@ export function DefaultInputArea({
}));
}
// When BYOK is disabled, show default models
return models;
}, [settings.byokEnabled, enabledCustomModels, models]);
// Prefer API-fetched models, fall back to prop-provided models
const base = fetchedModels ?? models;
// If the user's current model is not in the list, prepend it as a custom entry
const currentModel = settings.aiModel?.trim();
if (currentModel && !base.some((m) => m.value === currentModel)) {
return [
{ name: `${currentModel} (Custom)`, value: currentModel },
...base,
];
}
return base;
}, [
settings.byokEnabled,
enabledCustomModels,
fetchedModels,
models,
settings.aiModel,
]);
const resolvedDefaultModel = useMemo(() => {
const candidates = [
@@ -131,11 +183,44 @@ export function DefaultInputArea({
[onSubmit],
);
const handleModelChange = useCallback((newModel: string) => {
const trimmed = newModel?.trim();
if (!trimmed) return;
setSelectedModel(trimmed);
}, []);
const handleModelChange = useCallback(
(newModel: string) => {
const trimmed = newModel?.trim();
if (!trimmed) return;
// Skip if unchanged
if (trimmed === selectedModel) return;
setSelectedModel(trimmed);
// Persist the model selection to settings so the agent recreates with the new model
if (settings.byokEnabled && enabledCustomModels.length > 0) {
// BYOK mode: find the matching custom model config and update all provider settings
const customConfig = enabledCustomModels.find(
(m) => m.aiModel === trimmed,
);
if (customConfig) {
void updateSettings({
aiModel: trimmed,
aiToken: customConfig.aiToken,
aiHost: customConfig.aiHost ?? "",
providerType: customConfig.providerType,
});
return;
}
}
// Non-BYOK mode (or custom model not found): just update aiModel
void updateSetting("aiModel", trimmed);
},
[
selectedModel,
settings.byokEnabled,
enabledCustomModels,
updateSetting,
updateSettings,
],
);
// Map status to ChatStatus type
const submitStatus: ChatStatus | undefined =
@@ -150,6 +235,14 @@ export function DefaultInputArea({
{(context) => <PromptInputContextTag data={context} />}
</PromptInputContextTags>
{/* Skill Tags */}
<PromptInputSkillTags>
{(skill) => <PromptInputSkillTag data={skill} />}
</PromptInputSkillTags>
{/* Platform-specific extras (e.g., context/skill data loaders) */}
{slots.promptExtras?.()}
{/* Attachments */}
<PromptInputAttachments>
{(attachment) => <PromptInputAttachment data={attachment} />}
@@ -197,19 +290,30 @@ export function DefaultInputArea({
<PromptInputModelSelect
onValueChange={handleModelChange}
value={selectedModel}
disabled={isLoadingModels}
>
<PromptInputModelSelectTrigger>
<PromptInputModelSelectValue />
</PromptInputModelSelectTrigger>
<PromptInputModelSelectContent>
{effectiveModels.map((model) => (
<PromptInputModelSelectItem
key={model.value}
value={model.value}
>
{model.name}
</PromptInputModelSelectItem>
))}
{isLoadingModels ? (
<div className="px-2 py-1.5 text-sm text-muted-foreground">
Loading...
</div>
) : effectiveModels.length > 0 ? (
effectiveModels.map((model) => (
<PromptInputModelSelectItem
key={model.value}
value={model.value}
>
{model.name}
</PromptInputModelSelectItem>
))
) : (
<div className="px-2 py-1.5 text-sm text-muted-foreground">
No models available
</div>
)}
</PromptInputModelSelectContent>
</PromptInputModelSelect>
)}

View File

@@ -0,0 +1,119 @@
import type React from "react";
interface LoginPromptProps {
onLogin?: () => void;
showByokOption?: boolean;
onOpenSettings?: () => void;
}
// Default translations for login prompt
const defaultTranslations = {
"loginPrompt.title": "Login Required",
"loginPrompt.description":
"Please login to continue using the AI assistant, or configure your own API key.",
"loginPrompt.loginButton": "Login",
"loginPrompt.configureByok": "Use Own Key",
};
export const LoginPrompt: React.FC<LoginPromptProps> = ({
onLogin,
showByokOption = true,
onOpenSettings,
}) => {
// Simple translation function
const t = (key: string): string => {
return defaultTranslations[key as keyof typeof defaultTranslations] || key;
};
const handleLogin = () => {
console.log("LoginPrompt: handleLogin called");
onLogin?.();
};
const handleOpenSettings = () => {
console.log("LoginPrompt: handleOpenSettings called");
onOpenSettings?.();
};
return (
<div className="bg-red-50 border border-red-200 rounded-lg p-4 mb-4">
<div className="flex items-start">
<div className="flex-shrink-0">
<svg
className="h-5 w-5 text-red-400"
viewBox="0 0 20 20"
fill="currentColor"
aria-hidden="true"
>
<path
fillRule="evenodd"
d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7-4a1 1 0 11-2 0 1 1 0 012 0zM9 9a1 1 0 000 2v3a1 1 0 001 1h1a1 1 0 100-2v-3a1 1 0 00-1-1H9z"
clipRule="evenodd"
/>
</svg>
</div>
<div className="ml-3 flex-1">
<h3 className="text-sm font-medium text-red-800">
{t("loginPrompt.title")}
</h3>
<div className="mt-2 text-sm text-red-700">
<p className="mb-3">{t("loginPrompt.description")}</p>
<div className="flex flex-wrap gap-3">
<button
type="button"
onClick={handleLogin}
className="inline-flex items-center px-4 py-2 border border-transparent text-sm font-medium rounded-md text-white bg-red-600 hover:bg-red-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-red-500 transition-colors"
>
<svg
className="w-4 h-4 mr-2"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
aria-hidden="true"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M11 16l-4-4m0 0l4-4m-4 4h14m-5 4v1a3 3 0 01-3 3H6a3 3 0 01-3-3V7a3 3 0 013-3h7a3 3 0 013 3v1"
/>
</svg>
{t("loginPrompt.loginButton")}
</button>
{showByokOption && onOpenSettings && (
<button
type="button"
onClick={handleOpenSettings}
className="inline-flex items-center px-4 py-2 border border-red-300 text-sm font-medium rounded-md text-red-700 bg-white hover:bg-red-50 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-red-500 transition-colors"
>
<svg
className="w-4 h-4 mr-2"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
aria-hidden="true"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M10.325 4.317c.426-1.756 2.924-1.756 3.35 0a1.724 1.724 0 002.573 1.066c1.543-.94 3.31.826 2.37 2.37a1.724 1.724 0 001.065 2.572c1.756.426 1.756 2.924 0 3.35a1.724 1.724 0 00-1.066 2.573c.94 1.543-.826 3.31-2.37 2.37a1.724 1.724 0 00-2.572 1.065c-.426 1.756-2.924 1.756-3.35 0a1.724 1.724 0 00-2.573-1.066c-1.543.94-3.31-.826-2.37-2.37a1.724 1.724 0 00-1.065-2.572c-1.756-.426-1.756-2.924 0-3.35a1.724 1.724 0 001.066-2.573c-.94-1.543.826-3.31 2.37-2.37.996.608 2.296.07 2.572-1.065z"
/>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M15 12a3 3 0 11-6 0 3 3 0 016 0z"
/>
</svg>
{t("loginPrompt.configureByok")}
</button>
)}
</div>
</div>
</div>
</div>
</div>
);
};

View File

@@ -1,7 +1,15 @@
import { CopyIcon, RefreshCcwIcon } from "lucide-react";
import { Fragment } from "react";
import { CopyIcon, RefreshCcwIcon, WrenchIcon } from "lucide-react";
import { Fragment, useMemo } from "react";
import { useTranslation } from "../../../i18n/context";
import { translatedToolName } from "../../../i18n/tool-names";
import { transformScreenshotPlaceholders } from "../../../lib/screenshot-utils";
import { cn } from "../../../lib/utils";
import type { MessageItemProps, UISourceUrlPart } from "../../../types";
import type {
MessageItemProps,
UIMessage,
UISourceUrlPart,
UIToolPart,
} from "../../../types";
import { Action, Actions } from "../../ai-elements/actions";
import { Message, MessageContent } from "../../ai-elements/message";
import {
@@ -17,6 +25,9 @@ import {
SourcesTrigger,
} from "../../ai-elements/sources";
import { useComponentsContext } from "../context";
import { BuyTokenPrompt } from "./buy-token-prompt";
import { LoginPrompt } from "./login-prompt";
import { ModelChangePrompt } from "./model-change-prompt";
import { DefaultToolDisplay } from "./slots/tool-display";
/**
@@ -47,6 +58,21 @@ export function DefaultMessageItem({
}: MessageItemProps) {
const { slots } = useComponentsContext();
// Collect screenshot data from tool parts for placeholder resolution
const { screenshotUidList, screenshotDataMap } = useMemo(() => {
const uids: string[] = [];
const dataMap = new Map<string, string>();
for (const p of message.parts) {
if (p.type === "tool" && p.screenshotUid) {
uids.push(p.screenshotUid);
if (p.screenshot) {
dataMap.set(p.screenshotUid, p.screenshot);
}
}
}
return { screenshotUidList: uids, screenshotDataMap: dataMap };
}, [message.parts]);
// Filter out system messages
if (message.role === "system") {
return null;
@@ -76,12 +102,27 @@ export function DefaultMessageItem({
const key = `${message.id}-${i}`;
switch (part.type) {
case "text":
case "text": {
// Transform [[screenshot:...]] placeholders to markdown images.
// First resolve to special URLs, then replace with actual
// base64 data URLs when available for inline rendering.
let processedText = part.text;
if (screenshotUidList.length > 0) {
processedText = transformScreenshotPlaceholders(
processedText,
screenshotUidList,
);
// Replace aipex-screenshot.invalid URLs with actual data
for (const [uid, data] of screenshotDataMap) {
const placeholder = `https://aipex-screenshot.invalid/${uid}`;
processedText = processedText.split(placeholder).join(data);
}
}
return (
<Fragment key={key}>
<Message from={message.role as "user" | "assistant" | "system"}>
<MessageContent>
<Response>{part.text}</Response>
<Response>{processedText}</Response>
</MessageContent>
</Message>
{/* Actions for last assistant message */}
@@ -109,6 +150,7 @@ export function DefaultMessageItem({
))}
</Fragment>
);
}
case "file":
return (
@@ -208,6 +250,85 @@ export function DefaultMessageItem({
return null;
}
})}
{/* Metadata-driven prompts for assistant error messages */}
{message.role === "assistant" && message.metadata && (
<>
{message.metadata.needLogin && (
<LoginPrompt
showByokOption
onOpenSettings={() => chrome.runtime?.openOptionsPage?.()}
/>
)}
{message.metadata.needBuyToken && (
<BuyTokenPrompt
currentCredits={message.metadata.currentCredits}
requiredCredits={message.metadata.requiredCredits}
/>
)}
{message.metadata.needChangeModel && (
<ModelChangePrompt
supportedModels={message.metadata.supportedModels || []}
onModelChange={(modelId) => {
chrome.storage?.local?.set?.({ aiModel: modelId });
}}
/>
)}
</>
)}
</div>
);
}
// ============ Collapsed tool display for folded messages ============
function CollapsedToolDisplay({ tool }: { tool: UIToolPart }) {
const { t } = useTranslation();
const displayName = translatedToolName(t, tool.toolName);
return (
<div className="text-xs text-muted-foreground py-1 px-2 flex items-center gap-1.5">
<WrenchIcon className="size-3" />
{displayName}
</div>
);
}
// ============ Collapsed message item for intermediate assistant messages ============
/**
* CollapsedMessageItem simplified rendering for intermediate assistant
* messages inside a folded "thinking details" section.
* Shows text as bullet points and tools as compact single-line displays.
*/
export function CollapsedMessageItem({ message }: { message: UIMessage }) {
return (
<div>
{message.parts.map((part, i) => {
const key = `${message.id}-collapsed-${i}`;
switch (part.type) {
case "text":
return (
<div key={key} className="text-sm text-muted-foreground py-1">
- {part.text}
</div>
);
case "tool":
return <CollapsedToolDisplay key={key} tool={part} />;
case "reasoning":
return (
<div
key={key}
className="text-xs text-muted-foreground/70 py-0.5 italic"
>
{part.text.length > 120
? `${part.text.slice(0, 120)}`
: part.text}
</div>
);
default:
return null;
}
})}
</div>
);
}

View File

@@ -1,15 +1,55 @@
import { BrainIcon, ChevronDownIcon } from "lucide-react";
import { useMemo } from "react";
import { useTranslation } from "../../../i18n/context";
import { cn } from "../../../lib/utils";
import type { MessageListProps } from "../../../types";
import type { MessageListProps, UIMessage } from "../../../types";
import {
Conversation,
ConversationContent,
ConversationScrollButton,
} from "../../ai-elements/conversation";
import { Loader } from "../../ai-elements/loader";
import {
Collapsible,
CollapsibleContent,
CollapsibleTrigger,
} from "../../ui/collapsible";
import { useComponentsContext } from "../context";
import { MessageItem } from "./message-item";
import { CollapsedMessageItem, MessageItem } from "./message-item";
import { WelcomeScreen } from "./welcome-screen";
/**
* A conversation turn: one optional user message followed by one or more
* assistant messages produced before the next user message.
*/
interface ConversationTurn {
userMessage?: UIMessage;
assistantMessages: UIMessage[];
}
/**
* Group a flat message list into conversation turns so we can collapse
* intermediate assistant messages (thinking / tool-call steps).
*/
function groupIntoTurns(messages: UIMessage[]): ConversationTurn[] {
const turns: ConversationTurn[] = [];
let current: ConversationTurn | null = null;
for (const message of messages) {
if (message.role === "user") {
if (current) turns.push(current);
current = { userMessage: message, assistantMessages: [] };
} else if (message.role === "assistant") {
if (!current) {
current = { assistantMessages: [] };
}
current.assistantMessages.push(message);
}
}
if (current) turns.push(current);
return turns;
}
/**
* Default MessageList component
*/
@@ -19,37 +59,120 @@ export function DefaultMessageList({
onRegenerate,
onCopy,
onSuggestionClick,
onUxAuditClick,
className,
...props
}: MessageListProps & {
onSuggestionClick?: (text: string) => void;
onUxAuditClick?: () => void;
}) {
const { slots } = useComponentsContext();
const { t } = useTranslation();
// Filter out system messages for display
const displayMessages = messages.filter((m) => m.role !== "system");
// Group into conversation turns for folding
const turns = useMemo(
() => groupIntoTurns(displayMessages),
[displayMessages],
);
// Determine if a message is the very last display message
const lastMessage = displayMessages[displayMessages.length - 1];
const lastMessageId = lastMessage?.id ?? null;
return (
<div className={cn("flex-1 overflow-hidden", className)} {...props}>
<Conversation className="h-full">
<ConversationContent>
{/* Before messages slot - for banners, announcements */}
{slots.beforeMessages?.()}
{displayMessages.length === 0 ? (
<WelcomeScreen
onSuggestionClick={(text) => {
onSuggestionClick?.(text);
}}
onUxAuditClick={onUxAuditClick}
/>
) : (
displayMessages.map((message, index) => (
<MessageItem
key={message.id}
message={message}
isLast={index === displayMessages.length - 1}
isStreaming={status === "streaming"}
onRegenerate={onRegenerate}
onCopy={onCopy}
/>
))
turns.map((turn) => {
// Generate stable key from message IDs
const turnKey = turn.userMessage
? turn.userMessage.id
: (turn.assistantMessages[0]?.id ?? "");
return (
<div key={turnKey}>
{/* Render user message */}
{turn.userMessage && (
<MessageItem
key={turn.userMessage.id}
message={turn.userMessage}
isLast={turn.userMessage.id === lastMessageId}
isStreaming={status === "streaming"}
onRegenerate={onRegenerate}
onCopy={onCopy}
/>
)}
{/* Render assistant messages with folding */}
{turn.assistantMessages.length > 1
? (() => {
const finalMsg =
turn.assistantMessages[
turn.assistantMessages.length - 1
]!;
return (
<>
{/* Intermediate messages collapsed by default */}
<Collapsible defaultOpen={false} className="mb-2">
<CollapsibleTrigger className="flex w-full cursor-pointer items-center gap-2 rounded-md border border-muted bg-muted/30 px-3 py-2 text-sm text-muted-foreground transition-colors hover:bg-muted/50 hover:text-foreground">
<BrainIcon className="size-4" />
<span className="flex-1 text-left">
{t("common.showThinkingDetails")}
</span>
<ChevronDownIcon className="size-4 transition-transform [[data-state=open]>&]:rotate-180" />
</CollapsibleTrigger>
<CollapsibleContent className="mt-2">
<div className="rounded-md border border-muted/50 bg-muted/10 p-3 space-y-2">
{turn.assistantMessages
.slice(0, -1)
.map((msg) => (
<CollapsedMessageItem
key={msg.id}
message={msg}
/>
))}
</div>
</CollapsibleContent>
</Collapsible>
{/* Final assistant message always expanded */}
<MessageItem
key={finalMsg.id}
message={finalMsg}
isLast={finalMsg.id === lastMessageId}
isStreaming={status === "streaming"}
onRegenerate={onRegenerate}
onCopy={onCopy}
/>
</>
);
})()
: // Single assistant message render normally
turn.assistantMessages.map((msg) => (
<MessageItem
key={msg.id}
message={msg}
isLast={msg.id === lastMessageId}
isStreaming={status === "streaming"}
onRegenerate={onRegenerate}
onCopy={onCopy}
/>
))}
</div>
);
})
)}
{/* Loading indicator */}
{status === "submitted" &&
@@ -67,7 +190,10 @@ export function DefaultMessageList({
* MessageList - Renders either custom or default message list
*/
export function MessageList(
props: MessageListProps & { onSuggestionClick?: (text: string) => void },
props: MessageListProps & {
onSuggestionClick?: (text: string) => void;
onUxAuditClick?: () => void;
},
) {
const { components } = useComponentsContext();

View File

@@ -0,0 +1,159 @@
/**
* Mode Indicator Component
*
* Displays the current automation mode (immersive/background) with animated transitions.
* Provides visual feedback when the system switches between modes.
*/
import { EyeIcon, MoonIcon } from "lucide-react";
import { cn } from "../../../lib/utils";
import { Tooltip, TooltipContent, TooltipTrigger } from "../../ui/tooltip";
export type AutomationModeValue = "immersive" | "background";
interface ModeIndicatorProps {
mode: AutomationModeValue;
isTransitioning?: boolean;
className?: string;
}
/**
* Mode Indicator Component
*
* Shows current automation mode with icon, label, and transition animations
*/
// Default translations for mode indicator
const defaultTranslations = {
"mode.immersive": "Focus Mode",
"mode.background": "Background Mode",
"mode.immersiveDescription": "Visual feedback and window focus enabled",
"mode.backgroundDescription": "Silent operation, no window focus changes",
};
export function ModeIndicator({
mode,
isTransitioning = false,
className,
}: ModeIndicatorProps) {
// Simple translation function
const t = (key: string): string => {
return defaultTranslations[key as keyof typeof defaultTranslations] || key;
};
const isImmersive = mode === "immersive";
const modeLabel = t(isImmersive ? "mode.immersive" : "mode.background");
const modeDescription = t(
isImmersive ? "mode.immersiveDescription" : "mode.backgroundDescription",
);
const tooltipContent = (
<div className="max-w-xs space-y-1">
<p className="font-semibold">{modeLabel}</p>
<p className="text-xs opacity-90">{modeDescription}</p>
</div>
);
return (
<Tooltip>
<TooltipTrigger asChild>
<div
className={cn(
// Base styles
"flex items-center gap-1.5 px-2.5 py-1.5 rounded-full text-xs font-medium",
"transition-all duration-600 ease-out",
"select-none cursor-help",
// Mode-specific styles
isImmersive
? [
// Immersive mode: blue gradient with glow
"bg-gradient-to-r from-blue-600 to-blue-500",
"text-white shadow-lg shadow-blue-500/40",
"animate-mode-pulse",
]
: [
// Background mode: gray, subtle
"bg-gray-600/80 text-gray-200",
"opacity-70",
],
// Transition animation
isTransitioning &&
(isImmersive
? "animate-transition-enter-immersive"
: "animate-transition-enter-background"),
className,
)}
role="status"
aria-live="polite"
aria-label={`Current mode: ${isImmersive ? "Immersive" : "Background"}`}
>
{/* Icon */}
<span className="flex-shrink-0">
{isImmersive ? (
<EyeIcon className="size-3.5" />
) : (
<MoonIcon className="size-3.5" />
)}
</span>
{/* Label */}
<span className="whitespace-nowrap">{modeLabel}</span>
</div>
</TooltipTrigger>
<TooltipContent side="bottom" align="start">
{tooltipContent}
</TooltipContent>
</Tooltip>
);
}
/**
* Add custom animations to global CSS
* (This should be added to your global stylesheet or tailwind config)
*
* @keyframes mode-pulse {
* 0%, 100% {
* box-shadow: 0 0 20px rgba(59, 130, 246, 0.4);
* }
* 50% {
* box-shadow: 0 0 30px rgba(59, 130, 246, 0.8);
* }
* }
*
* @keyframes transition-enter-immersive {
* 0% {
* transform: scale(0.9);
* opacity: 0.5;
* }
* 50% {
* transform: scale(1.1);
* }
* 100% {
* transform: scale(1);
* opacity: 1;
* }
* }
*
* @keyframes transition-enter-background {
* 0% {
* transform: scale(1.05);
* opacity: 1;
* }
* 100% {
* transform: scale(1);
* opacity: 0.7;
* }
* }
*
* .animate-mode-pulse {
* animation: mode-pulse 2s ease-in-out infinite;
* }
*
* .animate-transition-enter-immersive {
* animation: transition-enter-immersive 0.6s ease-out;
* }
*
* .animate-transition-enter-background {
* animation: transition-enter-background 0.4s ease-out;
* }
*/

View File

@@ -0,0 +1,227 @@
import type React from "react";
import { useCallback, useEffect, useState } from "react";
import { fetchModelsForPrompt } from "../../../lib/models";
export interface ModelInfo {
id: string;
name: string;
description: string;
priceLevel: "cheap" | "normal" | "expensive";
}
interface ModelChangePromptProps {
supportedModels: string[];
onModelChange: (modelId: string) => void;
currentModel?: string;
/** Available models to choose from */
availableModels?: ModelInfo[];
/** Function to fetch models from API */
onFetchModels?: () => Promise<ModelInfo[]>;
}
// Default translations for model change prompt
const defaultTranslations = {
"modelChangePrompt.noMatchingModels": "No Matching Models",
"modelChangePrompt.contactSupport":
"None of the requested models are available. Please contact support.",
"modelChangePrompt.title": "Model Selection",
"modelChangePrompt.description":
"The requested model is not available. Please select an alternative:",
"modelChangePrompt.clickToSwitch": "Click on a model to switch",
"modelSelector.priceLevel.cheap": "Economy",
"modelSelector.priceLevel.normal": "Standard",
"modelSelector.priceLevel.expensive": "Premium",
};
export const ModelChangePrompt: React.FC<ModelChangePromptProps> = ({
supportedModels,
onModelChange,
currentModel,
availableModels = [],
onFetchModels,
}) => {
// Simple translation function
const t = (key: string): string => {
return defaultTranslations[key as keyof typeof defaultTranslations] || key;
};
const [allModels, setAllModels] = useState<ModelInfo[]>(availableModels);
const [isLoadingModels, setIsLoadingModels] = useState(false);
// Resolve the fetch function: use the provided callback or fall back to
// the built-in fetchModelsForPrompt so models are always loaded.
const resolvedFetch = useCallback(
() => (onFetchModels ? onFetchModels() : fetchModelsForPrompt()),
[onFetchModels],
);
// Fetch models from API (always runs — no longer gated on onFetchModels)
useEffect(() => {
let cancelled = false;
const loadModels = async () => {
setIsLoadingModels(true);
try {
const fetched = await resolvedFetch();
if (!cancelled) {
setAllModels(fetched);
}
} catch (_error) {
// Keep using availableModels as fallback
} finally {
if (!cancelled) {
setIsLoadingModels(false);
}
}
};
loadModels();
return () => {
cancelled = true;
};
}, [resolvedFetch]);
// Update models when availableModels prop changes
useEffect(() => {
if (availableModels.length > 0) {
setAllModels(availableModels);
}
}, [availableModels]);
// Find matching models from supported models list
const getMatchingModels = (): ModelInfo[] => {
return allModels.filter((model: ModelInfo) =>
supportedModels.some(
(supportedModel) =>
model.id === supportedModel ||
model.name.toLowerCase().includes(supportedModel.toLowerCase()) ||
supportedModel.toLowerCase().includes(model.name.toLowerCase()),
),
);
};
const matchingModels = getMatchingModels();
if (isLoadingModels) {
return (
<div className="bg-blue-50 border border-blue-200 rounded-lg p-4 mb-4">
<div className="flex items-center">
<div className="animate-spin rounded-full h-4 w-4 border-b-2 border-blue-600 mr-3"></div>
<span className="text-sm text-blue-700">
Loading available models...
</span>
</div>
</div>
);
}
if (matchingModels.length === 0) {
return (
<div className="bg-yellow-50 border border-yellow-200 rounded-lg p-4 mb-4">
<div className="flex items-start">
<div className="flex-shrink-0">
<svg
className="h-5 w-5 text-yellow-400"
viewBox="0 0 20 20"
fill="currentColor"
aria-hidden="true"
>
<path
fillRule="evenodd"
d="M8.257 3.099c.765-1.36 2.722-1.36 3.486 0l5.58 9.92c.75 1.334-.213 2.98-1.742 2.98H4.42c-1.53 0-2.493-1.646-1.743-2.98l5.58-9.92zM11 13a1 1 0 11-2 0 1 1 0 012 0zm-1-8a1 1 0 00-1 1v3a1 1 0 002 0V6a1 1 0 00-1-1z"
clipRule="evenodd"
/>
</svg>
</div>
<div className="ml-3">
<h3 className="text-sm font-medium text-yellow-800">
{t("modelChangePrompt.noMatchingModels")}
</h3>
<div className="mt-2 text-sm text-yellow-700">
<p>{t("modelChangePrompt.contactSupport")}</p>
</div>
</div>
</div>
</div>
);
}
return (
<div className="bg-blue-50 border border-blue-200 rounded-lg p-4 mb-4">
<div className="flex items-start">
<div className="flex-shrink-0">
<svg
className="h-5 w-5 text-blue-400"
viewBox="0 0 20 20"
fill="currentColor"
aria-hidden="true"
>
<path
fillRule="evenodd"
d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7-4a1 1 0 11-2 0 1 1 0 012 0zM9 9a1 1 0 000 2v3a1 1 0 001 1h1a1 1 0 100-2v-3a1 1 0 00-1-1H9z"
clipRule="evenodd"
/>
</svg>
</div>
<div className="ml-3 flex-1">
<h3 className="text-sm font-medium text-blue-800">
{t("modelChangePrompt.title")}
</h3>
<div className="mt-2 text-sm text-blue-700">
<p className="mb-3">{t("modelChangePrompt.description")}</p>
<div className="space-y-2">
{matchingModels.map((model) => (
<button
type="button"
key={model.id}
onClick={() => onModelChange(model.id)}
className={`w-full text-left p-3 rounded-md border transition-colors ${
currentModel === model.id
? "bg-blue-100 border-blue-300 text-blue-900"
: "bg-white border-blue-200 hover:bg-blue-50 hover:border-blue-300"
}`}
>
<div className="flex items-center justify-between">
<div>
<div className="font-medium text-sm">{model.name}</div>
<div className="text-xs text-gray-600 mt-1">
{model.description}
</div>
</div>
<div className="flex items-center space-x-2">
<span
className={`px-2 py-1 rounded text-xs font-medium ${
model.priceLevel === "cheap"
? "bg-green-100 text-green-800"
: model.priceLevel === "normal"
? "bg-yellow-100 text-yellow-800"
: "bg-red-100 text-red-800"
}`}
>
{t(`modelSelector.priceLevel.${model.priceLevel}`)}
</span>
{currentModel === model.id && (
<svg
className="h-4 w-4 text-blue-600"
fill="currentColor"
viewBox="0 0 20 20"
aria-hidden="true"
>
<path
fillRule="evenodd"
d="M16.707 5.293a1 1 0 010 1.414l-8 8a1 1 0 01-1.414 0l-4-4a1 1 0 011.414-1.414L8 12.586l7.293-7.293a1 1 0 011.414 0z"
clipRule="evenodd"
/>
</svg>
)}
</div>
</div>
</button>
))}
</div>
<div className="mt-3 text-xs text-blue-600">
{t("modelChangePrompt.clickToSwitch")}
</div>
</div>
</div>
</div>
</div>
);
};

View File

@@ -4,6 +4,8 @@ import {
WrenchIcon,
XCircleIcon,
} from "lucide-react";
import { useTranslation } from "../../../../i18n/context";
import { translatedToolName } from "../../../../i18n/tool-names";
import { cn } from "../../../../lib/utils";
import type { ToolDisplaySlotProps } from "../../../../types";
import { Response } from "../../../ai-elements/response";
@@ -13,6 +15,7 @@ import {
ToolHeader,
ToolInput,
ToolOutput,
ToolScreenshot,
} from "../../../ai-elements/tool";
import {
Collapsible,
@@ -26,15 +29,14 @@ import { formatToolOutput, mapToolState } from "../../tools";
* Opens by default when there's an error so users can see the failure reason
*/
export function DefaultToolDisplay({ tool }: ToolDisplaySlotProps) {
const { t } = useTranslation();
const displayName = translatedToolName(t, tool.toolName);
// Expand by default when in error state to make failure reasons visible
const shouldExpandByDefault = tool.state === "error";
return (
<Tool defaultOpen={shouldExpandByDefault}>
<ToolHeader
type={`tool-${tool.toolName}`}
state={mapToolState(tool.state)}
/>
<ToolHeader type={displayName} state={mapToolState(tool.state)} />
<ToolContent>
<ToolInput input={tool.input} />
<ToolOutput
@@ -45,6 +47,10 @@ export function DefaultToolDisplay({ tool }: ToolDisplaySlotProps) {
}
errorText={tool.errorText}
/>
<ToolScreenshot
screenshot={tool.screenshot}
screenshotUid={tool.screenshotUid}
/>
</ToolContent>
</Tool>
);
@@ -55,6 +61,8 @@ export function DefaultToolDisplay({ tool }: ToolDisplaySlotProps) {
* Opens by default when there's an error so users can see the failure reason
*/
export function CompactToolDisplay({ tool }: ToolDisplaySlotProps) {
const { t } = useTranslation();
const displayName = translatedToolName(t, tool.toolName);
const getStatusIcon = () => {
switch (tool.state) {
case "pending":
@@ -75,7 +83,7 @@ export function CompactToolDisplay({ tool }: ToolDisplaySlotProps) {
<Collapsible defaultOpen={shouldExpandByDefault}>
<CollapsibleTrigger className="flex items-center gap-2 w-full p-2 rounded-md hover:bg-muted/50 transition-colors">
{getStatusIcon()}
<span className="text-sm font-medium">{tool.toolName}</span>
<span className="text-sm font-medium">{displayName}</span>
{tool.duration && (
<span className="text-xs text-muted-foreground ml-auto">
{tool.duration}ms
@@ -118,6 +126,8 @@ export function CompactToolDisplay({ tool }: ToolDisplaySlotProps) {
* Minimal tool display (just status indicator)
*/
export function MinimalToolDisplay({ tool }: ToolDisplaySlotProps) {
const { t } = useTranslation();
const displayName = translatedToolName(t, tool.toolName);
const getStatusColor = () => {
switch (tool.state) {
case "pending":
@@ -134,7 +144,7 @@ export function MinimalToolDisplay({ tool }: ToolDisplaySlotProps) {
return (
<div className="inline-flex items-center gap-1.5 px-2 py-1 text-xs rounded-full bg-muted">
<div className={cn("w-2 h-2 rounded-full", getStatusColor())} />
<span>{tool.toolName}</span>
<span>{displayName}</span>
{tool.state === "executing" && (
<Loader2Icon className="size-3 animate-spin" />
)}

View File

@@ -0,0 +1,316 @@
import type React from "react";
import { useCallback, useEffect, useRef, useState } from "react";
/**
* Stream chunk types for AI streaming responses
*/
export interface StreamChunk {
type:
| "text"
| "tool_call"
| "tool_result"
| "thinking"
| "planning"
| "complete"
| "error";
content?: string;
name?: string;
args?: unknown;
result?: string;
error?: string;
timestamp: number;
messageId: string;
}
export interface StreamingState {
isStreaming: boolean;
content: string;
toolCalls: StreamChunk[];
currentToolCall: {
name: string;
args: unknown;
startTime: number;
} | null;
steps: StreamChunk[];
error: string | null;
startTime: number | null;
endTime: number | null;
}
export interface StreamingStateManagerProps {
messageId: string;
onStateChange?: (state: StreamingState) => void;
onComplete?: (finalState: StreamingState) => void;
onError?: (error: string) => void;
/** Optional message listener for environments without chrome.runtime */
onMessage?: (callback: (message: unknown) => void) => () => void;
}
const initialState: StreamingState = {
isStreaming: false,
content: "",
toolCalls: [],
currentToolCall: null,
steps: [],
error: null,
startTime: null,
endTime: null,
};
export const useStreamingState = (
messageId: string,
onMessage?: (callback: (message: unknown) => void) => () => void,
) => {
const [state, setState] = useState<StreamingState>(initialState);
const startTimeRef = useRef<number>(0);
const toolCallStartTimeRef = useRef<number>(0);
// Update state with new chunk
const addChunk = useCallback((chunk: StreamChunk) => {
setState((prev) => {
const newState = { ...prev };
switch (chunk.type) {
case "text":
newState.content += chunk.content || "";
newState.steps = [...prev.steps, chunk];
break;
case "tool_call":
newState.currentToolCall = {
name: chunk.name || "",
args: chunk.args,
startTime: Date.now(),
};
newState.toolCalls = [...prev.toolCalls, chunk];
newState.steps = [...prev.steps, chunk];
toolCallStartTimeRef.current = Date.now();
break;
case "tool_result":
newState.currentToolCall = null;
newState.steps = [...prev.steps, chunk];
break;
case "thinking":
case "planning":
newState.steps = [...prev.steps, chunk];
break;
case "complete":
newState.isStreaming = false;
newState.endTime = Date.now();
newState.steps = [...prev.steps, chunk];
break;
case "error":
newState.error = chunk.error || "Unknown error";
newState.isStreaming = false;
newState.endTime = Date.now();
newState.steps = [...prev.steps, chunk];
break;
}
return newState;
});
}, []);
// Start streaming
const startStreaming = useCallback(() => {
setState((prev) => ({
...prev,
isStreaming: true,
startTime: Date.now(),
error: null,
}));
startTimeRef.current = Date.now();
}, []);
// Stop streaming
const stopStreaming = useCallback((error?: string) => {
setState((prev) => ({
...prev,
isStreaming: false,
endTime: Date.now(),
error: error || prev.error,
}));
}, []);
// Add tool result
const addToolResult = useCallback(
(name: string, result: unknown, error?: string) => {
const chunk: StreamChunk = {
type: "tool_result",
name,
result: typeof result === "string" ? result : JSON.stringify(result),
error,
timestamp: Date.now(),
messageId,
};
addChunk(chunk);
},
[addChunk, messageId],
);
// Add thinking step
const addThinking = useCallback(
(content: string) => {
const chunk: StreamChunk = {
type: "thinking",
content,
timestamp: Date.now(),
messageId,
};
addChunk(chunk);
},
[addChunk, messageId],
);
// Add planning step
const addPlanning = useCallback(
(content: string) => {
const chunk: StreamChunk = {
type: "planning",
content,
timestamp: Date.now(),
messageId,
};
addChunk(chunk);
},
[addChunk, messageId],
);
// Reset state
const reset = useCallback(() => {
setState(initialState);
}, []);
// Listen for streaming messages
useEffect(() => {
const handleStreamMessage = (message: unknown) => {
const msg = message as {
messageId?: string;
request?: string;
chunk?: string;
step?: {
type?: string;
name?: string;
args?: unknown;
result?: unknown;
error?: string;
content?: string;
};
error?: string;
};
if (msg.messageId !== messageId) return;
switch (msg.request) {
case "ai-chat-stream":
addChunk({
type: "text",
content: msg.chunk,
timestamp: Date.now(),
messageId,
});
break;
case "ai-chat-tools-step":
if (msg.step?.type === "call_tool") {
addChunk({
type: "tool_call",
name: msg.step.name,
args: msg.step.args,
timestamp: Date.now(),
messageId,
});
} else if (msg.step?.type === "tool_result") {
addToolResult(msg.step.name || "", msg.step.result, msg.step.error);
} else if (msg.step?.type === "think") {
addThinking(msg.step.content || "");
}
break;
case "ai-chat-planning-step":
addPlanning(msg.step?.content || "");
break;
case "ai-chat-complete":
addChunk({
type: "complete",
timestamp: Date.now(),
messageId,
});
break;
case "ai-chat-error":
addChunk({
type: "error",
error: msg.error,
timestamp: Date.now(),
messageId,
});
break;
}
};
// Use custom message listener if provided, otherwise try chrome.runtime
if (onMessage) {
return onMessage(handleStreamMessage);
}
if (typeof chrome !== "undefined" && chrome.runtime?.onMessage) {
chrome.runtime.onMessage.addListener(handleStreamMessage);
return () => {
chrome.runtime.onMessage.removeListener(handleStreamMessage);
};
}
return undefined;
}, [messageId, addChunk, addToolResult, addThinking, addPlanning, onMessage]);
return {
state,
addChunk,
startStreaming,
stopStreaming,
addToolResult,
addThinking,
addPlanning,
reset,
};
};
const StreamingStateManager: React.FC<StreamingStateManagerProps> = ({
messageId,
onStateChange,
onComplete,
onError,
onMessage,
}) => {
const { state } = useStreamingState(messageId, onMessage);
// Notify state changes
useEffect(() => {
onStateChange?.(state);
}, [state, onStateChange]);
// Notify completion
useEffect(() => {
if (!state.isStreaming && state.endTime && !state.error) {
onComplete?.(state);
}
}, [state.isStreaming, state.endTime, state.error, state, onComplete]);
// Notify errors
useEffect(() => {
if (state.error) {
onError?.(state.error);
}
}, [state.error, onError]);
return null; // This component doesn't render anything
};
export default StreamingStateManager;

View File

@@ -0,0 +1,285 @@
import { ArrowUpCircleIcon, SparklesIcon, XIcon } from "lucide-react";
import { useEffect, useState } from "react";
import { cn } from "../../../lib/utils";
import { Button } from "../../ui/button";
export interface VersionCheckResult {
hasUpdate: boolean;
currentVersion: string;
latestVersion: string | null;
changelogUrl: string;
isNewlyUpdated: boolean;
notes: string | null;
}
interface UpdateBannerProps {
className?: string;
/** Function to check version */
onCheckVersion?: () => Promise<VersionCheckResult>;
/** Function to check if update is dismissed for a version */
onIsUpdateDismissed?: (version: string) => Promise<boolean>;
/** Function to dismiss update for a version */
onDismissUpdate?: (version: string) => Promise<void>;
/** Function to request update from Chrome */
onRequestUpdate?: () => Promise<{
status: "update_available" | "no_update" | "throttled" | "error";
version?: string;
}>;
/** Function to open changelog */
onOpenChangelog?: (url: string) => void;
/** Function to open update page */
onOpenUpdatePage?: () => void;
}
/**
* Update Banner Component
* Shows update notification when a new version is available
* Also shows "What's New" for users who just updated
*/
// Default translations for the update banner
const defaultTranslations = {
"update.whatsNewTitle": "What's New in v{{version}}",
"update.whatsNewDescription": "See what's changed in this version",
"update.viewChanges": "View Changes",
"update.newVersionAvailable": "Update Available: v{{version}}",
"update.currentVersion": "Current: v{{version}}",
"update.restartRequired": "Restart to complete update",
"update.updating": "Checking...",
"update.openStore": "Open Store",
"update.updateNow": "Update Now",
};
export function UpdateBanner({
className,
onCheckVersion,
onIsUpdateDismissed,
onDismissUpdate,
onRequestUpdate,
onOpenChangelog,
onOpenUpdatePage,
}: UpdateBannerProps) {
// Simple translation function with variable substitution
const t = (key: string, vars?: Record<string, unknown>): string => {
let text =
defaultTranslations[key as keyof typeof defaultTranslations] || key;
if (vars) {
Object.entries(vars).forEach(([k, v]) => {
text = text.replace(`{{${k}}}`, String(v));
});
}
return text;
};
const [versionInfo, setVersionInfo] = useState<VersionCheckResult | null>(
null,
);
const [isVisible, setIsVisible] = useState(false);
const [showWhatsNew, setShowWhatsNew] = useState(false);
const [updateStatus, setUpdateStatus] = useState<
"idle" | "checking" | "ready" | "failed"
>("idle");
useEffect(() => {
const checkVersionAndShow = async () => {
if (!onCheckVersion) return;
try {
const result = await onCheckVersion();
setVersionInfo(result);
// Show "What's New" for newly updated users
if (result.isNewlyUpdated) {
setShowWhatsNew(true);
setIsVisible(true);
return;
}
// Check if update is available and not dismissed
if (result.hasUpdate && result.latestVersion) {
const dismissed = onIsUpdateDismissed
? await onIsUpdateDismissed(result.latestVersion)
: false;
if (!dismissed) {
setIsVisible(true);
}
}
} catch (error) {
console.error("[UpdateBanner] Failed to check version:", error);
}
};
checkVersionAndShow();
}, [onCheckVersion, onIsUpdateDismissed]);
const handleDismiss = async () => {
if (versionInfo?.latestVersion && onDismissUpdate) {
await onDismissUpdate(versionInfo.latestVersion);
}
setIsVisible(false);
setShowWhatsNew(false);
};
const handleUpdate = async () => {
if (!onRequestUpdate) {
onOpenUpdatePage?.();
return;
}
setUpdateStatus("checking");
try {
const result = await onRequestUpdate();
if (result.status === "update_available") {
// Update is available and will be installed
setUpdateStatus("ready");
console.log(
"[UpdateBanner] Update available, version:",
result.version,
);
} else {
// Failed or throttled, fallback to opening store
console.log("[UpdateBanner] Update check result:", result.status);
setUpdateStatus("failed");
// Open store page as fallback
setTimeout(() => {
onOpenUpdatePage?.();
}, 1000);
}
} catch (error) {
console.error("[UpdateBanner] Update request failed:", error);
setUpdateStatus("failed");
// Open store page as fallback
setTimeout(() => {
onOpenUpdatePage?.();
}, 1000);
}
};
const handleWhatsNew = () => {
if (versionInfo?.changelogUrl && onOpenChangelog) {
onOpenChangelog(versionInfo.changelogUrl);
}
// After viewing What's New, hide the banner
setShowWhatsNew(false);
setIsVisible(false);
};
if (!isVisible) {
return null;
}
// "What's New" banner for newly updated users
if (showWhatsNew) {
return (
<div
className={cn(
"relative flex items-center justify-between gap-3 px-4 py-3",
"bg-gradient-to-r from-purple-500/10 via-pink-500/10 to-purple-500/10",
"border-b border-purple-200 dark:border-purple-800",
"animate-in slide-in-from-top duration-300",
className,
)}
>
<div className="flex items-center gap-3 min-w-0 flex-1">
<div className="flex-shrink-0 p-1.5 rounded-full bg-purple-500/20">
<SparklesIcon className="size-4 text-purple-600 dark:text-purple-400" />
</div>
<div className="min-w-0 flex-1">
<p className="text-sm font-medium text-purple-900 dark:text-purple-100 truncate">
{t("update.whatsNewTitle", {
version: versionInfo?.currentVersion,
})}
</p>
<p className="text-xs text-purple-700 dark:text-purple-300 truncate">
{t("update.whatsNewDescription")}
</p>
</div>
</div>
<div className="flex items-center gap-2 flex-shrink-0">
<Button
variant="ghost"
size="sm"
className="h-8 px-3 text-purple-700 hover:text-purple-900 hover:bg-purple-500/20 dark:text-purple-300 dark:hover:text-purple-100"
onClick={handleWhatsNew}
>
{t("update.viewChanges")}
</Button>
<Button
variant="ghost"
size="icon"
className="h-8 w-8 text-purple-700 hover:text-purple-900 hover:bg-purple-500/20 dark:text-purple-300 dark:hover:text-purple-100"
onClick={handleDismiss}
>
<XIcon className="size-4" />
</Button>
</div>
</div>
);
}
// Update available banner
if (versionInfo?.hasUpdate) {
return (
<div
className={cn(
"relative flex items-center justify-between gap-3 px-4 py-3",
"bg-gradient-to-r from-blue-500/10 via-cyan-500/10 to-blue-500/10",
"border-b border-blue-200 dark:border-blue-800",
"animate-in slide-in-from-top duration-300",
className,
)}
>
<div className="flex items-center gap-3 min-w-0 flex-1">
<div className="flex-shrink-0 p-1.5 rounded-full bg-blue-500/20">
<ArrowUpCircleIcon className="size-4 text-blue-600 dark:text-blue-400" />
</div>
<div className="min-w-0 flex-1">
<p className="text-sm font-medium text-blue-900 dark:text-blue-100 truncate">
{t("update.newVersionAvailable", {
version: versionInfo.latestVersion,
})}
</p>
<p className="text-xs text-blue-700 dark:text-blue-300 truncate">
{t("update.currentVersion", {
version: versionInfo.currentVersion,
})}
</p>
</div>
</div>
<div className="flex items-center gap-2 flex-shrink-0">
{updateStatus === "ready" ? (
<div className="text-xs text-blue-700 dark:text-blue-300 font-medium">
{t("update.restartRequired")}
</div>
) : (
<Button
variant="default"
size="sm"
className="h-8 px-3 bg-blue-600 hover:bg-blue-700 text-white"
onClick={handleUpdate}
disabled={
updateStatus === "checking" || updateStatus === "failed"
}
>
{updateStatus === "checking"
? t("update.updating")
: updateStatus === "failed"
? t("update.openStore")
: t("update.updateNow")}
</Button>
)}
<Button
variant="ghost"
size="icon"
className="h-8 w-8 text-blue-700 hover:text-blue-900 hover:bg-blue-500/20 dark:text-blue-300 dark:hover:text-blue-100"
onClick={handleDismiss}
>
<XIcon className="size-4" />
</Button>
</div>
</div>
);
}
return null;
}

View File

@@ -0,0 +1,246 @@
import { ScanSearchIcon } from "lucide-react";
import { useCallback, useEffect, useState } from "react";
import { useTranslation } from "../../../i18n/context";
import { Button } from "../../ui/button";
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from "../../ui/dialog";
import { Input } from "../../ui/input";
import { Label } from "../../ui/label";
import { Textarea } from "../../ui/textarea";
type UxAuditPlatform = "desktop" | "mobile" | "web";
export interface UxAuditFormData {
targetLink: string;
platform: UxAuditPlatform;
jtbd: string;
targetUsers: string;
}
interface UxAuditFormErrors {
targetLink?: string;
jtbd?: string;
}
/**
* URL validation: only http/https, max 2048 chars.
*/
function isValidUrl(value: string): boolean {
try {
const url = new URL(value);
return url.protocol === "http:" || url.protocol === "https:";
} catch {
return false;
}
}
interface UxAuditGoalDialogProps {
open: boolean;
onOpenChange: (open: boolean) => void;
onSubmit: (data: UxAuditFormData) => void;
}
export function UxAuditGoalDialog({
open,
onOpenChange,
onSubmit,
}: UxAuditGoalDialogProps) {
const { t } = useTranslation();
const [formData, setFormData] = useState<UxAuditFormData>({
targetLink: "",
platform: "desktop",
jtbd: "",
targetUsers: "",
});
const [errors, setErrors] = useState<UxAuditFormErrors>({});
// Reset form when dialog opens
useEffect(() => {
if (open) {
setFormData({
targetLink: "",
platform: "desktop",
jtbd: "",
targetUsers: "",
});
setErrors({});
}
}, [open]);
const validateForm = useCallback((): boolean => {
const newErrors: UxAuditFormErrors = {};
// Validate target link (required, valid URL)
const trimmedUrl = formData.targetLink.trim();
if (!trimmedUrl) {
newErrors.targetLink = t("uxAuditGoal.validation.required");
} else if (!isValidUrl(trimmedUrl)) {
newErrors.targetLink = t("uxAuditGoal.validation.invalidUrl");
} else if (trimmedUrl.length > 2048) {
newErrors.targetLink = t("uxAuditGoal.validation.invalidUrl");
}
// Validate JTBD (required, max 2000 chars)
const trimmedJtbd = formData.jtbd.trim();
if (!trimmedJtbd) {
newErrors.jtbd = t("uxAuditGoal.validation.required");
} else if (trimmedJtbd.length > 2000) {
newErrors.jtbd = t("uxAuditGoal.validation.required");
}
setErrors(newErrors);
return Object.keys(newErrors).length === 0;
}, [formData, t]);
const isFormValid =
formData.targetLink.trim().length > 0 && formData.jtbd.trim().length > 0;
const handleSubmit = useCallback(() => {
if (validateForm()) {
onSubmit(formData);
onOpenChange(false);
}
}, [formData, onOpenChange, onSubmit, validateForm]);
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-[480px]">
<DialogHeader>
<DialogTitle className="flex items-center gap-2">
<ScanSearchIcon className="w-5 h-5 text-cyan-600" />
{t("uxAuditGoal.dialog.title")}
</DialogTitle>
<DialogDescription>
{t("uxAuditGoal.dialog.description")}
</DialogDescription>
</DialogHeader>
<div className="grid gap-4 py-4">
{/* Target Link */}
<div className="grid gap-2">
<Label htmlFor="targetLink" className="flex items-center gap-1">
{t("uxAuditGoal.fields.targetLink")}
<span className="text-red-500">*</span>
</Label>
<Input
id="targetLink"
type="url"
placeholder={t("uxAuditGoal.fields.targetLinkPlaceholder")}
value={formData.targetLink}
onChange={(e) => {
setFormData((prev) => ({
...prev,
targetLink: e.target.value,
}));
if (errors.targetLink) {
setErrors((prev) => ({ ...prev, targetLink: undefined }));
}
}}
className={errors.targetLink ? "border-red-500" : ""}
maxLength={2048}
/>
{errors.targetLink && (
<p className="text-xs text-red-500">{errors.targetLink}</p>
)}
</div>
{/* Platform */}
<div className="grid gap-2">
<Label className="flex items-center gap-1">
{t("uxAuditGoal.fields.platform")}
<span className="text-red-500">*</span>
</Label>
<p className="text-xs text-muted-foreground">
{t("uxAuditGoal.fields.platformHint")}
</p>
<div className="flex gap-2">
{(["desktop", "mobile", "web"] as const).map((platform) => (
<Button
key={platform}
type="button"
variant={
formData.platform === platform ? "default" : "outline"
}
size="sm"
onClick={() => setFormData((prev) => ({ ...prev, platform }))}
>
{t(`uxAuditGoal.platform.${platform}`)}
</Button>
))}
</div>
</div>
{/* JTBD */}
<div className="grid gap-2">
<Label htmlFor="jtbd" className="flex items-center gap-1">
{t("uxAuditGoal.fields.jtbd")}
<span className="text-red-500">*</span>
</Label>
<p className="text-xs text-muted-foreground">
{t("uxAuditGoal.fields.jtbdHint")}
</p>
<Textarea
id="jtbd"
placeholder={t("uxAuditGoal.fields.jtbdPlaceholder")}
value={formData.jtbd}
onChange={(e) => {
setFormData((prev) => ({ ...prev, jtbd: e.target.value }));
if (errors.jtbd) {
setErrors((prev) => ({ ...prev, jtbd: undefined }));
}
}}
className={errors.jtbd ? "border-red-500" : ""}
rows={3}
maxLength={2000}
/>
{errors.jtbd && (
<p className="text-xs text-red-500">{errors.jtbd}</p>
)}
</div>
{/* Target Users (optional) */}
<div className="grid gap-2">
<Label htmlFor="targetUsers">
{t("uxAuditGoal.fields.targetUsers")}
</Label>
<p className="text-xs text-muted-foreground">
{t("uxAuditGoal.fields.targetUsersHint")}
</p>
<Input
id="targetUsers"
placeholder={t("uxAuditGoal.fields.targetUsersPlaceholder")}
value={formData.targetUsers}
onChange={(e) =>
setFormData((prev) => ({
...prev,
targetUsers: e.target.value,
}))
}
maxLength={500}
/>
</div>
</div>
<DialogFooter>
<Button
type="button"
variant="outline"
onClick={() => onOpenChange(false)}
>
{t("uxAuditGoal.actions.cancel")}
</Button>
<Button type="button" onClick={handleSubmit} disabled={!isFormValid}>
{t("uxAuditGoal.actions.start")}
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
);
}

View File

@@ -1,53 +1,75 @@
import {
DollarSignIcon,
CameraIcon,
FileTextIcon,
LayersIcon,
ScanSearchIcon,
SearchIcon,
} from "lucide-react";
import { useMemo } from "react";
import { useTranslation } from "../../../i18n/context";
import { cn } from "../../../lib/utils";
import type { WelcomeScreenProps, WelcomeSuggestion } from "../../../types";
import { Suggestion, Suggestions } from "../../ai-elements/suggestion";
import { useComponentsContext } from "../context";
/**
* Default suggestions for the welcome screen
* Build i18n-driven default suggestions matching legacy AIPex layout.
*/
const DEFAULT_SUGGESTIONS: WelcomeSuggestion[] = [
{
icon: LayersIcon,
text: "Help me organize my browser tabs by topic",
iconColor: "text-blue-600",
bgColor: "bg-blue-100",
},
{
icon: FileTextIcon,
text: "Summarize this page for me",
iconColor: "text-green-600",
bgColor: "bg-green-100",
},
{
icon: SearchIcon,
text: "Research a topic across multiple tabs",
iconColor: "text-purple-600",
bgColor: "bg-purple-100",
},
{
icon: DollarSignIcon,
text: "Compare prices across shopping tabs",
iconColor: "text-orange-600",
bgColor: "bg-orange-100",
},
];
function useDefaultSuggestions(): WelcomeSuggestion[] {
const { t } = useTranslation();
return useMemo(
() => [
{
icon: FileTextIcon,
text: t("welcome.analyzePage"),
iconColor: "text-green-600",
bgColor: "bg-green-100",
},
{
icon: LayersIcon,
text: t("welcome.organizeTabs"),
iconColor: "text-blue-600",
bgColor: "bg-blue-100",
},
{
icon: SearchIcon,
text: t("welcome.research"),
iconColor: "text-purple-600",
bgColor: "bg-purple-100",
},
{
icon: CameraIcon,
text: t("welcome.screenRecording"),
iconColor: "text-orange-600",
bgColor: "bg-orange-100",
},
{
icon: ScanSearchIcon,
text: t("welcome.uxAuditGoal"),
iconColor: "text-cyan-600",
bgColor: "bg-cyan-100",
isUxAudit: true,
},
],
[t],
);
}
/**
* Default WelcomeScreen component
*/
export function DefaultWelcomeScreen({
onSuggestionClick,
suggestions = DEFAULT_SUGGESTIONS,
onUxAuditClick,
suggestions,
className,
...props
}: WelcomeScreenProps) {
const { t } = useTranslation();
const defaultSuggestions = useDefaultSuggestions();
const effectiveSuggestions = suggestions ?? defaultSuggestions;
return (
<div
className={cn(
@@ -58,22 +80,27 @@ export function DefaultWelcomeScreen({
>
<div className="text-center mb-6 sm:mb-8">
<h3 className="text-xl font-semibold text-gray-900 dark:text-gray-100 mb-2">
Welcome to AIPex
{t("welcome.title")}
</h3>
<p className="text-sm text-gray-600 dark:text-gray-400">
Your AI-powered browser assistant
{t("welcome.subtitle")}
</p>
</div>
<div className="w-full max-w-2xl">
<Suggestions className="grid gap-3 sm:gap-4 sm:grid-cols-2 w-full">
{suggestions.map((suggestion) => {
{effectiveSuggestions.map((suggestion) => {
const Icon = suggestion.icon;
// For UX audit suggestion, use the special handler if available
const handleClick =
suggestion.isUxAudit && onUxAuditClick
? () => onUxAuditClick()
: onSuggestionClick;
return (
<Suggestion
key={suggestion.text}
suggestion={suggestion.text}
onClick={onSuggestionClick}
onClick={handleClick}
variant="outline"
size="lg"
className={cn(

View File

@@ -39,6 +39,8 @@ export type {
} from "../../types";
// Individual component exports
export {
type AutomationModeValue,
BuyTokenPrompt,
ConfigurationGuide,
type ConfigurationGuideProps,
DefaultHeader,
@@ -49,10 +51,16 @@ export {
type ExtendedInputAreaProps,
Header,
InputArea,
LoginPrompt,
MessageItem,
MessageList,
ModeIndicator,
ModelChangePrompt,
type ModelInfo,
TokenUsageIndicator,
type TokenUsageIndicatorProps,
UpdateBanner,
type VersionCheckResult,
WelcomeScreen,
} from "./components";
// Default export for backward compatibility

View File

@@ -19,6 +19,7 @@ import {
Mail,
MessageCircle,
MessageSquare,
Mic,
Package,
Palette,
Plus,
@@ -31,6 +32,7 @@ import {
} from "lucide-react";
import { useCallback, useEffect, useMemo, useState } from "react";
import { useTranslation } from "../../i18n/context";
import { buildWebsiteUrl } from "../../lib/config/website.js";
import { cn } from "../../lib/utils";
import { useTheme } from "../../theme/context";
import { DEFAULT_MODELS } from "../chatbot/constants";
@@ -219,7 +221,12 @@ export function SettingsPage({
onSave,
onTestConnection,
skillsContent,
sttConfig,
initialTab,
initialSkill: _initialSkill,
}: SettingsPageProps) {
// initialSkill is reserved for future use (pre-select a skill when initialTab="skills")
void _initialSkill;
const { t, language, changeLanguage } = useTranslation();
const { theme, changeTheme, effectiveTheme } = useTheme();
@@ -245,10 +252,18 @@ export function SettingsPage({
message: "",
});
const [showToken, setShowToken] = useState(false);
const [activeTab, setActiveTab] = useState<SettingsTab>("general");
const [activeTab, setActiveTab] = useState<SettingsTab>(
initialTab ?? "general",
);
const [searchTerm, setSearchTerm] = useState("");
const [dataSharingEnabled, setDataSharingEnabled] = useState(true);
// ElevenLabs STT state (independent of main settings blob)
const [sttApiKey, setSttApiKey] = useState("");
const [sttModelId, setSttModelId] = useState("");
const [showSttKey, setShowSttKey] = useState(false);
const [isSavingStt, setIsSavingStt] = useState(false);
useEffect(() => {
const loadSettings = async () => {
try {
@@ -315,6 +330,15 @@ export function SettingsPage({
loadSettings();
}, [storageAdapter, storageKey]);
// Load ElevenLabs STT config when adapter is provided
useEffect(() => {
if (!sttConfig) return;
sttConfig.load().then(({ apiKey, modelId }) => {
setSttApiKey(apiKey);
setSttModelId(modelId);
});
}, [sttConfig]);
const updateSettingsFromModel = useCallback((model: CustomModelConfig) => {
const providerKey = resolveProviderKey(model);
setSettings((prev: AppSettings) => ({
@@ -676,6 +700,21 @@ export function SettingsPage({
[storageAdapter, storageKey, settings],
);
const handleSaveStt = useCallback(async () => {
if (!sttConfig) return;
setIsSavingStt(true);
try {
await sttConfig.save({ apiKey: sttApiKey, modelId: sttModelId });
setSaveStatus({ type: "success", message: t("settings.saveSuccess") });
setTimeout(() => setSaveStatus({ type: "", message: "" }), 3000);
} catch (error) {
console.error("Error saving STT settings:", error);
setSaveStatus({ type: "error", message: t("settings.saveError") });
} finally {
setIsSavingStt(false);
}
}, [sttConfig, sttApiKey, sttModelId, t]);
const filteredModels = useMemo(() => {
const term = searchTerm.toLowerCase();
if (!term) return customModels;
@@ -930,6 +969,144 @@ export function SettingsPage({
</CardContent>
</Card>
{/* ElevenLabs STT Configuration (shown when adapter provided) */}
{sttConfig && (
<Card>
<CardHeader>
<CardTitle className="flex items-center gap-2">
<Mic className="h-5 w-5" />
{language === "zh"
? "ElevenLabs 语音转文本"
: "ElevenLabs Speech-to-Text"}
</CardTitle>
<CardDescription>
{language === "zh"
? "配置 ElevenLabs API 密钥以启用语音注释功能"
: "Configure ElevenLabs API key to enable voice annotation feature"}
</CardDescription>
</CardHeader>
<CardContent className="space-y-4">
<div className="space-y-2">
<Label htmlFor="sttApiKey">
{language === "zh" ? "API 密钥" : "API Key"}
<span className="text-destructive ml-1">*</span>
</Label>
<div className="relative">
<Input
id="sttApiKey"
type={showSttKey ? "text" : "password"}
value={sttApiKey}
onChange={(e) => setSttApiKey(e.target.value)}
placeholder="xi-..."
className="pr-10"
/>
<Button
type="button"
variant="ghost"
size="icon"
onClick={() => setShowSttKey(!showSttKey)}
className="absolute right-0 top-0 h-full px-3 py-2 hover:bg-transparent"
>
{showSttKey ? (
<EyeOff className="h-4 w-4" />
) : (
<Eye className="h-4 w-4" />
)}
</Button>
</div>
<p className="text-xs text-muted-foreground">
{language === "zh"
? "在 ElevenLabs 获取 API 密钥:"
: "Get your API key from ElevenLabs:"}{" "}
<a
href="https://elevenlabs.io/app/developers/api-keys"
target="_blank"
rel="noopener noreferrer"
className="text-primary hover:underline inline-flex items-center gap-1"
>
elevenlabs.io
<ExternalLink className="h-3 w-3" />
</a>
</p>
</div>
<div className="space-y-2">
<Label htmlFor="sttModelId">
{language === "zh"
? "模型 ID可选"
: "Model ID (Optional)"}
</Label>
<Input
id="sttModelId"
type="text"
value={sttModelId}
onChange={(e) => setSttModelId(e.target.value)}
placeholder={
language === "zh"
? "留空使用默认模型"
: "Leave blank to use default model"
}
/>
<p className="text-xs text-muted-foreground">
{language === "zh"
? "默认使用通用多语言模型。如需指定特定模型,请输入模型 ID。"
: "Default uses the general multilingual model. Specify a model ID if needed."}
</p>
</div>
<div className="flex gap-2">
<Button
onClick={handleSaveStt}
disabled={isSavingStt}
size="sm"
>
{isSavingStt ? (
<>
<div className="animate-spin rounded-full h-4 w-4 border-2 border-white border-t-transparent mr-2" />
{language === "zh" ? "保存中..." : "Saving..."}
</>
) : language === "zh" ? (
"保存配置"
) : (
"Save Configuration"
)}
</Button>
{sttApiKey && (
<Button
variant="outline"
size="sm"
onClick={() => {
setSttApiKey("");
setSaveStatus({
type: "info",
message:
language === "zh"
? "已清空,点击保存以生效"
: "Cleared. Click Save to apply.",
});
}}
>
{language === "zh" ? "清空" : "Clear"}
</Button>
)}
</div>
{sttApiKey && (
<Alert>
<AlertDescription className="flex items-center gap-2">
<CheckCircle className="h-4 w-4 text-green-600" />
<span className="text-sm">
{language === "zh"
? "API 密钥已配置"
: "API key is configured"}
</span>
</AlertDescription>
</Alert>
)}
</CardContent>
</Card>
)}
{/* About Us Section */}
<Card>
<CardHeader>
@@ -978,7 +1155,7 @@ export function SettingsPage({
<TooltipTrigger asChild>
<Button asChild size="icon" variant="outline">
<a
href="https://www.claudechrome.com/contact"
href={buildWebsiteUrl("/contact")}
target="_blank"
rel="noopener noreferrer"
>
@@ -1022,7 +1199,7 @@ export function SettingsPage({
<TooltipTrigger asChild>
<Button asChild size="icon" variant="outline">
<a
href="https://www.claudechrome.com/feedback"
href={buildWebsiteUrl("/feedback")}
target="_blank"
rel="noopener noreferrer"
>
@@ -1387,18 +1564,56 @@ export function SettingsPage({
{t("settings.aiModel")}
<span className="text-destructive ml-1">*</span>
</Label>
<Input
id="aiModel"
type="text"
value={selectedModel.aiModel || ""}
onChange={(e) =>
handleModelFieldChange(
"aiModel",
e.target.value,
)
}
placeholder={t("settings.modelPlaceholder")}
/>
{selectedProviderMeta.models.length > 0 ? (
<Select
value={selectedModel.aiModel || ""}
onValueChange={(value: string) =>
handleModelFieldChange("aiModel", value)
}
>
<SelectTrigger>
<SelectValue
placeholder={t("settings.modelPlaceholder")}
/>
</SelectTrigger>
<SelectContent>
{selectedProviderMeta.models.map(
(model: string) => (
<SelectItem key={model} value={model}>
{model}
</SelectItem>
),
)}
{/* Allow keeping a custom value that is not in the preset list */}
{selectedModel.aiModel &&
!selectedProviderMeta.models.includes(
selectedModel.aiModel as never,
) && (
<SelectItem value={selectedModel.aiModel}>
{selectedModel.aiModel}
</SelectItem>
)}
</SelectContent>
</Select>
) : (
<Input
id="aiModel"
type="text"
value={selectedModel.aiModel || ""}
onChange={(e) =>
handleModelFieldChange(
"aiModel",
e.target.value,
)
}
placeholder={t("settings.modelPlaceholder")}
/>
)}
<p className="text-xs text-muted-foreground">
{language === "zh"
? "提示: 选择适合你需求的模型。"
: "Tip: Choose a model that fits your needs."}
</p>
</div>
</>
) : (
@@ -1471,4 +1686,4 @@ export function SettingsPage({
);
}
export type { SettingsPageProps } from "./types";
export type { SettingsPageProps, STTConfigAdapter } from "./types";

View File

@@ -5,6 +5,16 @@ import type {
} from "@aipexstudio/aipex-core";
import type { ReactNode } from "react";
/**
* Callbacks for loading / saving ElevenLabs Speech-to-Text configuration.
* Keys are stored separately from the main settings blob so that VoiceInput
* can read them without loading the full settings.
*/
export interface STTConfigAdapter {
load: () => Promise<{ apiKey: string; modelId: string }>;
save: (values: { apiKey: string; modelId: string }) => Promise<void>;
}
export interface SettingsPageProps {
storageAdapter: KeyValueStorage<unknown>;
storageKey?: string;
@@ -12,6 +22,12 @@ export interface SettingsPageProps {
onSave?: (settings: AppSettings) => void;
onTestConnection?: (settings: AppSettings) => Promise<boolean>;
skillsContent?: ReactNode;
/** Optional ElevenLabs STT config adapter; when provided the STT card is shown. */
sttConfig?: STTConfigAdapter;
/** Pre-select a tab on mount (e.g. from URL params). */
initialTab?: SettingsTab;
/** Pre-select a skill to open details for (used with initialTab="skills"). */
initialSkill?: string;
}
export interface ProviderConfig {

View File

@@ -0,0 +1,62 @@
/**
* Breathing Border Overlay Component
* 全屏梦幻呼吸灯边框效果在AI对话进行时显示
*/
import type React from "react";
interface BreathingBorderOverlayProps {
isVisible: boolean;
}
export const BreathingBorderOverlay: React.FC<BreathingBorderOverlayProps> = ({
isVisible,
}) => {
if (!isVisible) return null;
return (
<>
{/* 全屏发光容器 - 从外向里的呼吸效果 */}
<div
style={{
position: "fixed",
top: 0,
left: 0,
width: "100vw",
height: "100vh",
zIndex: 999998,
pointerEvents: "none",
opacity: isVisible ? 1 : 0,
transition: "opacity 0.5s ease-in-out",
animation: "breatheInwardGlow 2.5s ease-in-out infinite",
boxShadow: `
inset 0 0 15px 3px rgba(37, 99, 235, 0.5),
inset 0 0 25px 5px rgba(59, 130, 246, 0.4),
inset 0 0 35px 7px rgba(96, 165, 250, 0.3),
inset 0 0 45px 9px rgba(147, 197, 253, 0.2)
`,
}}
/>
{/* CSS 动画 */}
<style>{`
@keyframes breatheInwardGlow {
0%, 100% {
box-shadow:
inset 0 0 12px 3px rgba(37, 99, 235, 0.35),
inset 0 0 20px 5px rgba(59, 130, 246, 0.28),
inset 0 0 28px 6px rgba(96, 165, 250, 0.22),
inset 0 0 35px 8px rgba(147, 197, 253, 0.15);
}
50% {
box-shadow:
inset 0 0 20px 5px rgba(37, 99, 235, 0.7),
inset 0 0 30px 7px rgba(59, 130, 246, 0.6),
inset 0 0 40px 9px rgba(96, 165, 250, 0.5),
inset 0 0 50px 11px rgba(147, 197, 253, 0.35);
}
}
`}</style>
</>
);
};

View File

@@ -0,0 +1,105 @@
import type React from "react";
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
import { oneLight } from "react-syntax-highlighter/dist/esm/styles/prism";
interface CodeBlockProps {
children: React.ReactNode;
className?: string;
[key: string]: unknown;
}
// Map common language aliases to proper language names
const languageMap: Record<string, string> = {
js: "javascript",
ts: "typescript",
jsx: "javascript",
tsx: "typescript",
py: "python",
rb: "ruby",
sh: "bash",
zsh: "bash",
bash: "bash",
shell: "bash",
json: "json",
html: "html",
css: "css",
scss: "scss",
sass: "sass",
md: "markdown",
markdown: "markdown",
sql: "sql",
php: "php",
java: "java",
cpp: "cpp",
c: "c",
cs: "csharp",
csharp: "csharp",
go: "go",
rust: "rust",
rs: "rust",
swift: "swift",
kotlin: "kotlin",
scala: "scala",
r: "r",
dart: "dart",
elixir: "elixir",
clojure: "clojure",
haskell: "haskell",
lua: "lua",
perl: "perl",
groovy: "groovy",
yaml: "yaml",
yml: "yaml",
toml: "toml",
ini: "ini",
xml: "xml",
svg: "xml",
diff: "diff",
git: "diff",
};
// Code block component for syntax highlighting
export const CodeBlock: React.FC<CodeBlockProps> = ({
children,
className,
...props
}) => {
const match = /language-(\w+)/.exec(className || "");
const language = match?.[1] ?? "text";
const mappedLanguage = languageMap[language] ?? language;
return (
<div className="relative group mb-6">
{/* Language label */}
<div className="absolute top-0 right-0 px-3 py-1 text-xs font-mono text-gray-600 bg-white rounded-bl-xl border-l border-b border-gray-200 z-10 shadow-sm font-medium">
{mappedLanguage}
</div>
<SyntaxHighlighter
style={oneLight}
language={mappedLanguage}
PreTag="div"
className="rounded-xl border border-gray-200 text-sm overflow-hidden shadow-sm"
customStyle={{
margin: 0,
padding: "1.25rem",
paddingTop: "1.75rem", // Extra padding for language label
backgroundColor: "#f8fafc",
fontSize: "0.875rem",
lineHeight: "1.7",
fontFamily:
'ui-monospace, SFMono-Regular, "SF Mono", Consolas, "Liberation Mono", Menlo, monospace',
borderRadius: "0.75rem",
}}
showLineNumbers={false}
wrapLines={false}
{...props}
>
{String(children).replace(/\n$/, "")}
</SyntaxHighlighter>
</div>
);
};
export default CodeBlock;

View File

@@ -6,6 +6,7 @@
import type React from "react";
import { useCallback, useEffect, useRef, useState } from "react";
import { useTranslation } from "../../i18n/hooks";
import { buildWebsiteUrl } from "../../lib/config/website.js";
import { cn } from "../../lib/utils";
import { isByokUserSimple } from "../../lib/voice/ai-config";
import { AudioRecorder } from "../../lib/voice/audio-recorder";
@@ -271,7 +272,7 @@ export const VoiceInput: React.FC<VoiceInputProps> = ({
);
setIsPermissionError(true);
// Open voice guide page
window.open("https://www.claudechrome.com/voice/guide", "_blank");
window.open(buildWebsiteUrl("/voice/guide"), "_blank");
} else {
setIsPermissionError(false);
}
@@ -442,10 +443,7 @@ export const VoiceInput: React.FC<VoiceInputProps> = ({
size="sm"
className="text-muted-foreground"
onClick={() => {
window.open(
"https://www.claudechrome.com/voice/guide",
"_blank",
);
window.open(buildWebsiteUrl("/voice/guide"), "_blank");
}}
>
View Guide

13
packages/aipex-react/src/env.d.ts vendored Normal file
View File

@@ -0,0 +1,13 @@
/**
* Environment type definitions for aipex-react
* These are build-time environment variables when bundled with Vite.
*/
interface ImportMetaEnv {
/** Base URL for the website (e.g., "https://www.claudechrome.com") */
readonly VITE_WEBSITE_URL?: string;
}
interface ImportMeta {
readonly env: ImportMetaEnv;
}

View File

@@ -91,6 +91,9 @@ export function useChat(
const [sessionId, setSessionId] = useState<string | null>(null);
const [metrics, setMetrics] = useState<AgentMetrics | null>(null);
// Cumulative session-level metrics (sum across all runs)
const cumulativeMetricsRef = useRef<AgentMetrics | null>(null);
// Refs for stable callbacks
const handlersRef = useRef(handlers);
handlersRef.current = handlers;
@@ -153,13 +156,24 @@ export function useChat(
handlersRef.current?.onError?.(event.error);
}
// Handle metrics update
// Handle metrics update accumulate across the session
if (event.type === "metrics_update") {
setMetrics(event.metrics);
handlersRef.current?.onMetricsUpdate?.(
event.metrics,
event.sessionId,
);
const prev = cumulativeMetricsRef.current;
const cumulative: AgentMetrics = {
tokensUsed: (prev?.tokensUsed ?? 0) + event.metrics.tokensUsed,
promptTokens:
(prev?.promptTokens ?? 0) + event.metrics.promptTokens,
completionTokens:
(prev?.completionTokens ?? 0) + event.metrics.completionTokens,
// Non-cumulative fields: use latest values
itemCount: event.metrics.itemCount,
maxTurns: event.metrics.maxTurns,
duration: (prev?.duration ?? 0) + event.metrics.duration,
startTime: prev?.startTime ?? event.metrics.startTime,
};
cumulativeMetricsRef.current = cumulative;
setMetrics(cumulative);
handlersRef.current?.onMetricsUpdate?.(cumulative, event.sessionId);
}
// Process the event through adapter
@@ -263,6 +277,7 @@ export function useChat(
activeGeneratorRef.current = null;
setSessionId(null);
setMetrics(null);
cumulativeMetricsRef.current = null;
adapter.reset(configRef.current?.initialMessages ?? []);
}, [adapter, agent, sessionId]);

View File

@@ -10,7 +10,9 @@
"send": "Send",
"stop": "Stop",
"processing": "Processing...",
"noActions": "No actions"
"noActions": "No actions",
"showThinkingDetails": "Show thinking details",
"clickToExpand": "Click to expand"
},
"settings": {
"title": "Settings",
@@ -22,7 +24,7 @@
"aiHost": "AI Host",
"aiToken": "API Key",
"aiModel": "AI Model",
"hostPlaceholder": "https://api.deepseek.com/chat/completions",
"hostPlaceholder": "https://api.deepseek.com/v1",
"tokenPlaceholder": "Enter your API token",
"modelPlaceholder": "deepseek-chat",
"saveSuccess": "Settings saved successfully!",
@@ -66,7 +68,9 @@
"newChat": "New Chat",
"settings": "Settings",
"close": "Close",
"stopResponse": "Stop AI response"
"stopResponse": "Stop AI response",
"switchToVoice": "Switch to voice mode",
"switchToText": "Switch to text mode"
},
"mode": {
"focus": "Focus Mode",
@@ -101,10 +105,43 @@
"welcome": {
"title": "Welcome to AIpex",
"subtitle": "Choose a quick action or ask anything to get started",
"analyzePage": "Summarize this page",
"organizeTabs": "Please organize my open tabs by topic and purpose",
"analyzePage": "Summarize this page and save key points to clipboard",
"research": "Please use Google to research topic 'MCP'",
"comparePrice": "Compare the price of iPhone 17"
"screenRecording": "Start User Manual Guide creation",
"uxAuditGoal": "Deep audit your UX goals"
},
"uxAuditGoal": {
"dialog": {
"title": "Deep Audit Your UX Goals",
"description": "Provide details about the interface you want to audit"
},
"fields": {
"targetLink": "Target Link",
"targetLinkPlaceholder": "Enter the URL you want to audit",
"platform": "Platform",
"platformHint": "Select the platform for this audit",
"jtbd": "Task Description (JTBD)",
"jtbdPlaceholder": "e.g., A Zoom user wants to schedule a meeting for tomorrow morning...",
"jtbdHint": "Briefly describe the flow you want to test",
"targetUsers": "Target Users",
"targetUsersPlaceholder": "e.g., First-time users, power users, elderly...",
"targetUsersHint": "Optional: Describe your target audience"
},
"platform": {
"desktop": "Desktop",
"mobile": "Mobile",
"web": "Web"
},
"actions": {
"start": "Start Audit",
"cancel": "Cancel"
},
"validation": {
"required": "This field is required",
"invalidUrl": "Please enter a valid URL (http:// or https://)"
},
"messageTemplate": "Please perform a UX audit walkthrough on the following target:\n\n**Target URL:** {{url}}\n**Platform:** {{platform}}\n**Task (JTBD):** {{jtbd}}{{targetUsersLine}}\n\nPlease use the ux-audit-walkthrough skill to conduct a thorough UX evaluation."
},
"config": {
"title": "Setup Required",

View File

@@ -10,7 +10,9 @@
"send": "发送",
"stop": "停止",
"processing": "处理中...",
"noActions": "无可用操作"
"noActions": "无可用操作",
"showThinkingDetails": "显示思考过程",
"clickToExpand": "点击展开"
},
"settings": {
"title": "设置",
@@ -22,7 +24,7 @@
"aiHost": "AI 服务地址",
"aiToken": "API 密钥",
"aiModel": "AI 模型",
"hostPlaceholder": "https://api.deepseek.com/chat/completions",
"hostPlaceholder": "https://api.deepseek.com/v1",
"tokenPlaceholder": "请输入您的 API 令牌",
"modelPlaceholder": "deepseek-chat",
"saveSuccess": "设置保存成功!",
@@ -66,7 +68,9 @@
"newChat": "新对话",
"settings": "设置",
"close": "关闭",
"stopResponse": "停止 AI 回应"
"stopResponse": "停止 AI 回应",
"switchToVoice": "切换到语音模式",
"switchToText": "切换到文字模式"
},
"mode": {
"focus": "聚焦模式",
@@ -101,10 +105,43 @@
"welcome": {
"title": "欢迎使用 AIpex",
"subtitle": "选择一个快捷操作或询问任何问题来开始",
"analyzePage": "总结此页面",
"organizeTabs": "请按主题和用途整理我的打开标签页",
"analyzePage": "总结此页面并将关键点保存到剪贴板",
"research": "请使用 Google 研究主题 'MCP'",
"comparePrice": "比较 iPhone 17 的价格"
"screenRecording": "开始创建用户操作手册",
"uxAuditGoal": "深度审计您的 UX 目标"
},
"uxAuditGoal": {
"dialog": {
"title": "深度审查你的UX目标",
"description": "请提供您想要审查的界面详情"
},
"fields": {
"targetLink": "目标链接",
"targetLinkPlaceholder": "输入想要测试的目标链接",
"platform": "运行平台",
"platformHint": "选择此次审查的平台",
"jtbd": "任务描述(JTBD)",
"jtbdPlaceholder": "举例: 用户想要预定明天早上的会议...",
"jtbdHint": "简单概括你想要测试的flow",
"targetUsers": "目标用户",
"targetUsersPlaceholder": "例如:首次用户、高级用户、老年人...",
"targetUsersHint": "可选:描述您的目标受众"
},
"platform": {
"desktop": "桌面端",
"mobile": "移动端",
"web": "网页端"
},
"actions": {
"start": "开始测试",
"cancel": "取消"
},
"validation": {
"required": "此字段为必填项",
"invalidUrl": "请输入有效的URLhttp:// 或 https://"
},
"messageTemplate": "请对以下目标进行UX审查走查\n\n**目标URL** {{url}}\n**平台:** {{platform}}\n**任务(JTBD)** {{jtbd}}{{targetUsersLine}}\n\n请使用 ux-audit-walkthrough 技能进行全面的UX评估。"
},
"config": {
"title": "需要配置",

View File

@@ -13,6 +13,8 @@ export interface TranslationResources {
stop: string;
processing: string;
noActions: string;
showThinkingDetails: string;
clickToExpand: string;
};
settings: {
title: string;
@@ -67,6 +69,8 @@ export interface TranslationResources {
settings: string;
close: string;
stopResponse: string;
switchToVoice: string;
switchToText: string;
};
mode: {
focus: string;
@@ -95,10 +99,43 @@ export interface TranslationResources {
welcome: {
title: string;
subtitle: string;
organizeTabs: string;
analyzePage: string;
organizeTabs: string;
research: string;
comparePrice: string;
screenRecording: string;
uxAuditGoal: string;
};
uxAuditGoal: {
dialog: {
title: string;
description: string;
};
fields: {
targetLink: string;
targetLinkPlaceholder: string;
platform: string;
platformHint: string;
jtbd: string;
jtbdPlaceholder: string;
jtbdHint: string;
targetUsers: string;
targetUsersPlaceholder: string;
targetUsersHint: string;
};
platform: {
desktop: string;
mobile: string;
web: string;
};
actions: {
start: string;
cancel: string;
};
validation: {
required: string;
invalidUrl: string;
};
messageTemplate: string;
};
config: {
title: string;
@@ -194,6 +231,8 @@ export type BaseTranslationKey =
| "common.stop"
| "common.processing"
| "common.noActions"
| "common.showThinkingDetails"
| "common.clickToExpand"
| "settings.title"
| "settings.subtitle"
| "settings.language"
@@ -244,6 +283,8 @@ export type BaseTranslationKey =
| "tooltip.settings"
| "tooltip.close"
| "tooltip.stopResponse"
| "tooltip.switchToVoice"
| "tooltip.switchToText"
| "mode.focus"
| "mode.background"
| "mode.selectMode"
@@ -262,10 +303,31 @@ export type BaseTranslationKey =
| "input.placeholder3"
| "welcome.title"
| "welcome.subtitle"
| "welcome.organizeTabs"
| "welcome.analyzePage"
| "welcome.organizeTabs"
| "welcome.research"
| "welcome.comparePrice"
| "welcome.screenRecording"
| "welcome.uxAuditGoal"
| "uxAuditGoal.dialog.title"
| "uxAuditGoal.dialog.description"
| "uxAuditGoal.fields.targetLink"
| "uxAuditGoal.fields.targetLinkPlaceholder"
| "uxAuditGoal.fields.platform"
| "uxAuditGoal.fields.platformHint"
| "uxAuditGoal.fields.jtbd"
| "uxAuditGoal.fields.jtbdPlaceholder"
| "uxAuditGoal.fields.jtbdHint"
| "uxAuditGoal.fields.targetUsers"
| "uxAuditGoal.fields.targetUsersPlaceholder"
| "uxAuditGoal.fields.targetUsersHint"
| "uxAuditGoal.platform.desktop"
| "uxAuditGoal.platform.mobile"
| "uxAuditGoal.platform.web"
| "uxAuditGoal.actions.start"
| "uxAuditGoal.actions.cancel"
| "uxAuditGoal.validation.required"
| "uxAuditGoal.validation.invalidUrl"
| "uxAuditGoal.messageTemplate"
| "config.title"
| "config.description"
| "config.apiTokenRequired"

View File

@@ -0,0 +1,54 @@
/**
* Website URL configuration for aipex-react
* Provides centralized, env-configurable website base URL.
*/
const DEFAULT_WEBSITE_URL = "https://www.claudechrome.com";
/**
* Resolve the website URL from environment or use default.
* Validates the URL and normalizes to origin (removes trailing paths).
*/
function resolveWebsiteUrl(): string {
// Try to read from import.meta.env (Vite) if available
const envUrl = (import.meta as any)?.env?.VITE_WEBSITE_URL;
if (!envUrl || typeof envUrl !== "string" || envUrl.trim() === "") {
return DEFAULT_WEBSITE_URL;
}
try {
const parsed = new URL(envUrl.trim());
// Security: only allow https in production
if (parsed.protocol !== "https:" && parsed.protocol !== "http:") {
console.warn(
"[website config] Invalid protocol, falling back to default",
);
return DEFAULT_WEBSITE_URL;
}
// Return origin (scheme + host + port, no path)
return parsed.origin;
} catch {
console.warn(
"[website config] Invalid VITE_WEBSITE_URL, falling back to default",
);
return DEFAULT_WEBSITE_URL;
}
}
/**
* The base website URL (origin only, no trailing slash).
* Example: "https://www.claudechrome.com"
*/
export const WEBSITE_URL: string = resolveWebsiteUrl();
/**
* Build a full URL from a path relative to the website.
* @param path - Path starting with "/" (e.g., "/pricing", "/api/speech-to-text")
* @returns Full URL string
*/
export function buildWebsiteUrl(path: string): string {
// Ensure path starts with /
const normalizedPath = path.startsWith("/") ? path : `/${path}`;
return `${WEBSITE_URL}${normalizedPath}`;
}

View File

@@ -0,0 +1,180 @@
// API response types (must match server contract)
interface ApiModelPricing {
input: number;
output: number;
}
interface ApiModel {
id: string;
name: string;
provider: string;
description: string;
pricing: ApiModelPricing;
}
interface ApiResponse {
success: boolean;
data: {
models: ApiModel[];
count: number;
cache: {
lastUpdate: number;
modelCount: number;
};
};
}
// Internal model info used by the chatbot UI
export interface ModelInfo {
id: string;
name: string;
provider: string;
description: string;
supportsTools: boolean;
contextLength?: number;
pricing?: {
input: string;
output: string;
};
priceLevel: "cheap" | "normal" | "expensive";
}
// Fallback models in case API fails
const FALLBACK_MODELS: ModelInfo[] = [
{
id: "anthropic/claude-3-haiku",
name: "Claude 3 Haiku",
provider: "Anthropic",
description: "Cost-effective choice for basic tasks",
supportsTools: true,
contextLength: 200_000,
pricing: {
input: "$0.30/1M tokens",
output: "$1.50/1M tokens",
},
priceLevel: "cheap",
},
{
id: "anthropic/claude-sonnet-4.5",
name: "Claude Sonnet 4.5",
provider: "Anthropic",
description: "AI model for various tasks",
supportsTools: true,
contextLength: 200_000,
pricing: {
input: "$3.60/1M tokens",
output: "$18.00/1M tokens",
},
priceLevel: "expensive",
},
];
const MODELS_API_URL = "https://www.claudechrome.com/api/models";
// Convert API pricing to price level
function getPriceLevel(
pricing: ApiModelPricing,
): "cheap" | "normal" | "expensive" {
const totalCost = pricing.input + pricing.output;
if (totalCost < 2) return "cheap";
if (totalCost < 10) return "normal";
return "expensive";
}
// Convert API model to internal ModelInfo
function convertApiModel(apiModel: ApiModel): ModelInfo {
return {
id: apiModel.id,
name: apiModel.name,
provider: apiModel.provider,
description: apiModel.description,
supportsTools: true,
pricing: {
input: `$${apiModel.pricing.input.toFixed(2)}/1M tokens`,
output: `$${apiModel.pricing.output.toFixed(2)}/1M tokens`,
},
priceLevel: getPriceLevel(apiModel.pricing),
};
}
// Validate that the API response matches the expected schema
function isValidApiResponse(data: unknown): data is ApiResponse {
if (typeof data !== "object" || data === null) return false;
const obj = data as Record<string, unknown>;
if (typeof obj.success !== "boolean") return false;
if (typeof obj.data !== "object" || obj.data === null) return false;
const d = obj.data as Record<string, unknown>;
if (!Array.isArray(d.models)) return false;
// Validate first model shape if present
if (d.models.length > 0) {
const first = d.models[0] as Record<string, unknown>;
if (typeof first.id !== "string" || typeof first.name !== "string") {
return false;
}
}
return true;
}
// Cache for models
let cachedModels: ModelInfo[] | null = null;
let lastFetchTime = 0;
const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
const MAX_MODELS = 200; // Safety cap on number of models
/**
* Fetch models from the server API with caching and fallback.
* Returns cached result if still valid (5 min TTL).
* Falls back to FALLBACK_MODELS on any error.
*/
export async function fetchModels(): Promise<ModelInfo[]> {
// Return cached models if still valid
if (cachedModels && Date.now() - lastFetchTime < CACHE_DURATION) {
return cachedModels;
}
try {
const response = await fetch(MODELS_API_URL);
console.log("response", response);
if (!response.ok) {
throw new Error(`API request failed: ${response.status}`);
}
const data: unknown = await response.json();
console.log("data", data);
if (!isValidApiResponse(data)) {
throw new Error("Invalid API response structure");
}
if (data.success && data.data.models.length > 0) {
// Apply safety cap
const models = data.data.models.slice(0, MAX_MODELS).map(convertApiModel);
cachedModels = models;
lastFetchTime = Date.now();
return cachedModels;
}
throw new Error("Empty model list from API");
} catch (_error) {
// Return fallback - do not log sensitive details
return FALLBACK_MODELS;
}
}
/**
* Fetch models and convert to the {name, value} format used by the model selector.
*/
export async function fetchModelsForSelector(): Promise<
Array<{ name: string; value: string }>
> {
const models = await fetchModels();
return models.map((m) => ({ name: m.name, value: m.id }));
}
/**
* Fetch models as ModelInfo[] for ModelChangePrompt compatibility.
*/
export async function fetchModelsForPrompt(): Promise<ModelInfo[]> {
return fetchModels();
}

View File

@@ -0,0 +1,177 @@
/**
* Screenshot storage using IndexedDB.
* Stores screenshots with a uid for efficient reference and retrieval.
* Applies an LRU eviction policy (max 50 screenshots).
*/
export interface ScreenshotData {
uid: string;
/** Complete data URL: data:image/png;base64,... */
base64Data: string;
timestamp: number;
tabId?: number;
metadata?: {
width: number;
height: number;
viewportWidth: number;
viewportHeight: number;
};
}
const DB_NAME = "aipex-screenshots-db";
const DB_VERSION = 1;
const STORE_NAME = "screenshots";
const MAX_SCREENSHOTS = 50;
let db: IDBDatabase | null = null;
let initPromise: Promise<void> | null = null;
function initialize(): Promise<void> {
if (initPromise) return initPromise;
if (db) return Promise.resolve();
initPromise = new Promise<void>((resolve, reject) => {
const request = indexedDB.open(DB_NAME, DB_VERSION);
request.onerror = () => {
initPromise = null;
reject(request.error);
};
request.onsuccess = () => {
db = request.result;
initPromise = null;
resolve();
};
request.onupgradeneeded = (event) => {
const database = (event.target as IDBOpenDBRequest).result;
if (!database.objectStoreNames.contains(STORE_NAME)) {
const store = database.createObjectStore(STORE_NAME, {
keyPath: "uid",
});
store.createIndex("timestamp", "timestamp", { unique: false });
}
};
});
return initPromise;
}
function generateUid(): string {
return `screenshot_${Date.now()}_${Math.random().toString(36).slice(2, 11)}`;
}
async function applyLRU(): Promise<void> {
if (!db) return;
const tx = db.transaction([STORE_NAME], "readonly");
const store = tx.objectStore(STORE_NAME);
const all: ScreenshotData[] = await new Promise((res, rej) => {
const req = store.getAll();
req.onsuccess = () => res(req.result as ScreenshotData[]);
req.onerror = () => rej(req.error);
});
if (all.length <= MAX_SCREENSHOTS) return;
all.sort((a, b) => b.timestamp - a.timestamp);
const toDelete = all.slice(MAX_SCREENSHOTS);
const delTx = db.transaction([STORE_NAME], "readwrite");
const delStore = delTx.objectStore(STORE_NAME);
for (const item of toDelete) {
delStore.delete(item.uid);
}
}
export const ScreenshotStorage = {
/**
* Save a screenshot and return its uid.
* The base64Data must be a valid data URL (validated before storing).
*/
async saveScreenshot(
base64Data: string,
metadata?: {
tabId?: number;
width?: number;
height?: number;
viewportWidth?: number;
viewportHeight?: number;
},
): Promise<string> {
// Validate that it's a data URL (not arbitrary content)
if (
typeof base64Data !== "string" ||
!base64Data.startsWith("data:image/")
) {
throw new Error("Invalid screenshot data: expected data:image/ URL");
}
await initialize();
if (!db) throw new Error("Database not initialized");
const uid = generateUid();
const entry: ScreenshotData = {
uid,
base64Data,
timestamp: Date.now(),
tabId: metadata?.tabId,
metadata: metadata
? {
width: metadata.width ?? 0,
height: metadata.height ?? 0,
viewportWidth: metadata.viewportWidth ?? 0,
viewportHeight: metadata.viewportHeight ?? 0,
}
: undefined,
};
await new Promise<void>((resolve, reject) => {
const tx = db!.transaction([STORE_NAME], "readwrite");
const store = tx.objectStore(STORE_NAME);
const req = store.put(entry);
req.onsuccess = () => resolve();
req.onerror = () => reject(req.error);
});
// Async LRU eviction — fire-and-forget
applyLRU().catch(() => {});
return uid;
},
/**
* Get screenshot base64 data by uid.
*/
async getScreenshot(uid: string): Promise<string | null> {
await initialize();
if (!db) throw new Error("Database not initialized");
return new Promise((resolve, reject) => {
const tx = db!.transaction([STORE_NAME], "readonly");
const store = tx.objectStore(STORE_NAME);
const req = store.get(uid);
req.onsuccess = () => {
const data = req.result as ScreenshotData | undefined;
resolve(data?.base64Data ?? null);
};
req.onerror = () => reject(req.error);
});
},
/**
* Clear all screenshots.
*/
async clearAll(): Promise<void> {
await initialize();
if (!db) throw new Error("Database not initialized");
await new Promise<void>((resolve, reject) => {
const tx = db!.transaction([STORE_NAME], "readwrite");
const store = tx.objectStore(STORE_NAME);
const req = store.clear();
req.onsuccess = () => resolve();
req.onerror = () => reject(req.error);
});
},
};

View File

@@ -0,0 +1,183 @@
/**
* Utilities for detecting screenshot tools and extracting image data
* from tool results.
*/
/** Tool names that produce screenshot image data */
const SCREENSHOT_TOOL_NAMES = new Set([
"capture_screenshot",
"capture_screenshot_with_highlight",
"capture_tab_screenshot",
]);
/** URL prefix used in markdown for screenshot references */
export const AIPEX_SCREENSHOT_URL_PREFIX = "https://aipex-screenshot.invalid/";
/** Regex matching [[screenshot:...]] placeholders */
const SCREENSHOT_PLACEHOLDER_REGEX = /\[\[screenshot:([^\]]+)\]\]/g;
/** Validate that a uid looks like a screenshot uid */
export function isValidScreenshotUid(uid: string): boolean {
return /^screenshot_\d+_[a-z0-9]{1,20}$/i.test(uid);
}
/**
* Check if a tool is a screenshot/capture tool.
*/
export function isCaptureScreenshotTool(toolName: string): boolean {
return SCREENSHOT_TOOL_NAMES.has(toolName);
}
export interface ScreenshotExtraction {
/** Base64 data URL if available (may be null if already stripped) */
imageData: string | null;
/** Whether the screenshot was intended for LLM vision */
sendToLLM: boolean;
/** Unique identifier for loading from IndexedDB storage */
screenshotUid: string | null;
}
/**
* Extract screenshot info from a tool result.
* Works with capture_screenshot and capture_tab_screenshot tools.
*
* Supports multiple result formats:
* - Object: { success, imageData, sendToLLM, screenshotUid }
* - Nested object: { success, data: { imageData, sendToLLM, screenshotUid } }
* - SDK structured array: [{ type: "text", text: JSON }, { type: "image", image: dataUrl }]
*
* Returns screenshot details if found, null if this is not a screenshot result.
*/
export function extractScreenshotFromToolResult(
toolName: string,
result: unknown,
): ScreenshotExtraction | null {
if (!isCaptureScreenshotTool(toolName)) return null;
try {
const content = typeof result === "string" ? JSON.parse(result) : result;
if (content === null || content === undefined) return null;
// SDK structured array format:
// [{ type: "text", text: '{"success":true,...}' }, { type: "image", image: "data:..." }]
if (Array.isArray(content)) {
return extractFromStructuredArray(content);
}
if (typeof content !== "object") return null;
const obj = content as Record<string, unknown>;
// Handle nested structure: { success, data: { imageData, sendToLLM } }
// or direct: { success, imageData, sendToLLM }
const middleLayer = obj.data as Record<string, unknown> | undefined;
const actualData =
(middleLayer?.data as Record<string, unknown>) ?? middleLayer ?? obj;
if (!obj.success) return null;
// Extract screenshotUid (always present if tool saved to IndexedDB)
const screenshotUid =
typeof actualData.screenshotUid === "string"
? actualData.screenshotUid
: null;
// Extract imageData (may be a real data URL or a placeholder)
const rawImageData = actualData.imageData;
const imageData =
typeof rawImageData === "string" && rawImageData.startsWith("data:image/")
? rawImageData
: null;
const sendToLLM = actualData.sendToLLM === true;
// Return if we have at least a uid or image data
if (screenshotUid || imageData) {
return { imageData, sendToLLM, screenshotUid };
}
} catch {
// parse failed ignore
}
return null;
}
/**
* Extract screenshot from SDK structured array format.
*/
function extractFromStructuredArray(
arr: unknown[],
): ScreenshotExtraction | null {
let imageData: string | null = null;
let screenshotUid: string | null = null;
let sendToLLM = false;
for (const item of arr) {
if (typeof item !== "object" || item === null) continue;
const part = item as Record<string, unknown>;
if (part.type === "image" && typeof part.image === "string") {
if (part.image.startsWith("data:image/")) {
imageData = part.image;
}
}
if (part.type === "text" && typeof part.text === "string") {
try {
const parsed = JSON.parse(part.text) as Record<string, unknown>;
if (parsed.sendToLLM === true) sendToLLM = true;
if (typeof parsed.screenshotUid === "string") {
screenshotUid = parsed.screenshotUid;
}
} catch {
// ignore
}
}
}
if (imageData) {
return { imageData, sendToLLM: sendToLLM || true, screenshotUid };
}
return null;
}
/**
* Transform [[screenshot:...]] placeholders in text into markdown images
* with the special aipex-screenshot.invalid URL prefix.
*
* Supported formats:
* - [[screenshot:screenshot_123_abc]] → ![](https://aipex-screenshot.invalid/screenshot_123_abc)
* - [[screenshot:1]] → 1-based index into screenshotUidList
*/
export function transformScreenshotPlaceholders(
text: string,
screenshotUidList: string[],
): string {
return text.replace(
SCREENSHOT_PLACEHOLDER_REGEX,
(match: string, content: string) => {
const trimmed = content.trim();
// Case 1: Direct uid
if (isValidScreenshotUid(trimmed)) {
return `![](${AIPEX_SCREENSHOT_URL_PREFIX}${trimmed})`;
}
// Case 2: Numeric 1-based index
const index = parseInt(trimmed, 10);
if (
!Number.isNaN(index) &&
index >= 1 &&
index <= screenshotUidList.length
) {
const uid = screenshotUidList[index - 1];
if (uid && isValidScreenshotUid(uid)) {
return `![](${AIPEX_SCREENSHOT_URL_PREFIX}${uid})`;
}
}
// Invalid leave as-is
return match;
},
);
}

View File

@@ -1,8 +1,9 @@
/**
* Server-side Speech-to-Text Integration
* Uses claudechrome.com server API for speech-to-text transcription
* Uses configured server API for speech-to-text transcription
*/
import { buildWebsiteUrl, WEBSITE_URL } from "../config/website.js";
import type { TranscriptionResult } from "./elevenlabs-stt";
export type ServerSTTConfig = Record<string, never>;
@@ -33,7 +34,7 @@ export async function transcribeAudioWithServer(
let cookieHeader = "";
try {
const cookies = await chrome.cookies.getAll({
url: "https://www.claudechrome.com",
url: WEBSITE_URL,
});
const relevantCookies = cookies.filter(
@@ -66,14 +67,11 @@ export async function transcribeAudioWithServer(
headers["Cookie"] = cookieHeader;
}
const response = await fetch(
"https://www.claudechrome.com/api/speech-to-text",
{
method: "POST",
headers,
body: formData,
},
);
const response = await fetch(buildWebsiteUrl("/api/speech-to-text"), {
method: "POST",
headers,
body: formData,
});
if (!response.ok) {
// Do not log detailed error response for security

View File

@@ -57,6 +57,8 @@ export interface InputAreaProps
export interface WelcomeScreenProps extends HTMLAttributes<HTMLDivElement> {
onSuggestionClick: (text: string) => void;
/** Handler for the UX audit suggestion; when provided, clicking that suggestion opens the audit dialog instead of sending the text directly. */
onUxAuditClick?: () => void;
suggestions?: WelcomeSuggestion[];
}
@@ -120,8 +122,12 @@ export interface ChatbotSlots {
emptyState?: (props: WelcomeScreenProps) => ReactNode;
/** Custom loading indicator */
loadingIndicator?: () => ReactNode;
/** Content to render before all messages (e.g., update banners, announcements) */
beforeMessages?: () => ReactNode;
/** Content to render after all messages (for platform-specific features like interventions) */
afterMessages?: () => ReactNode;
/** Extra content rendered inside PromptInput (e.g., context/skill loaders) */
promptExtras?: () => ReactNode;
}
// ============ Components Configuration ============

View File

@@ -32,6 +32,7 @@ export type {
UIContextPart,
UIFilePart,
UIMessage,
UIMessageMetadata,
UIPart,
UIReasoningPart,
UIRole,

View File

@@ -47,6 +47,10 @@ export interface UIToolPart {
state: UIToolState;
errorText?: string;
duration?: number;
/** Base64 data URL of the screenshot (inline) */
screenshot?: string;
/** UID referencing a screenshot in ScreenshotStorage (IndexedDB) */
screenshotUid?: string;
}
export interface UIContextPart {
@@ -65,11 +69,22 @@ export type UIPart =
| UIToolPart
| UIContextPart;
export interface UIMessageMetadata {
needLogin?: boolean;
needBuyToken?: boolean;
needChangeModel?: boolean;
supportedModels?: string[];
currentCredits?: number;
requiredCredits?: number;
errorCode?: string;
}
export interface UIMessage {
id: string;
role: UIRole;
parts: UIPart[];
timestamp?: number;
metadata?: UIMessageMetadata;
}
// ============ Context Item Types ============
@@ -96,4 +111,6 @@ export interface WelcomeSuggestion {
text: string;
iconColor?: string;
bgColor?: string;
/** When true, clicking this suggestion triggers the UX audit dialog instead of sending the text directly. */
isUxAudit?: boolean;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

View File

@@ -1,18 +1,18 @@
{
"manifest_version": 3,
"name": "AIPex",
"version": "0.0.2",
"description": "Automate your browser with natural language commands - The open source browser-use solution",
"version": "2.2.39",
"description": "Make your browser an AI browser - no migration, no browser installation, no code writing",
"icons": {
"16": "assets/icon16.svg",
"48": "assets/icon48.svg",
"128": "assets/icon128.svg"
"16": "assets/icon16.png",
"48": "assets/icon48.png",
"128": "assets/icon128.png"
},
"action": {
"default_icon": {
"16": "assets/icon16.svg",
"48": "assets/icon48.svg",
"128": "assets/icon128.svg"
"16": "assets/icon16.png",
"48": "assets/icon48.png",
"128": "assets/icon128.png"
}
},
"background": {
@@ -44,6 +44,14 @@
"matches": ["<all_urls>"]
}
],
"externally_connectable": {
"matches": [
"https://www.claudechrome.com/*",
"https://claudechrome.com/*",
"https://aipex.ing/*",
"http://localhost:*/*"
]
},
"host_permissions": ["https://*/*", "http://*/*", "<all_urls>"],
"commands": {
"open-aipex": {
@@ -72,6 +80,7 @@
"downloads",
"debugger",
"cookies",
"webNavigation"
"webNavigation",
"audioCapture"
]
}

View File

@@ -11,7 +11,13 @@ import React, {
useEffect,
useState,
} from "react";
import { AUTH_COOKIE_NAMES, WEBSITE_URL } from "../services/web-auth";
import {
buildWebsiteUrl,
isWebsiteDomain,
WEBSITE_ORIGIN,
WEBSITE_URL,
} from "../config/website";
import { AUTH_COOKIE_NAMES } from "../services/web-auth";
/**
* User data structure
@@ -105,7 +111,7 @@ export const AuthProvider: React.FC<AuthProviderProps> = ({ children }) => {
try {
console.log("[AuthProvider] Checking authentication via API...");
// Get all claudechrome.com cookies
// Get all website cookies
const cookies = await chrome.cookies.getAll({
url: WEBSITE_URL,
});
@@ -125,7 +131,7 @@ export const AuthProvider: React.FC<AuthProviderProps> = ({ children }) => {
// Call website's auth verify API
try {
const response = await fetch(`${WEBSITE_URL}/api/auth/verify`, {
const response = await fetch(buildWebsiteUrl("/api/auth/verify"), {
method: "GET",
credentials: "include",
});
@@ -208,7 +214,7 @@ export const AuthProvider: React.FC<AuthProviderProps> = ({ children }) => {
// Listen for message from auth success page
useEffect(() => {
const handleMessage = (event: MessageEvent) => {
if (event.origin !== WEBSITE_URL) return;
if (event.origin !== WEBSITE_ORIGIN) return;
if (event.data.type === "AUTH_SUCCESS") {
const { user: newUser } = event.data;
@@ -294,8 +300,8 @@ export const AuthProvider: React.FC<AuthProviderProps> = ({ children }) => {
const handleCookieChange = async (
changeInfo: chrome.cookies.CookieChangeInfo,
) => {
// Only care about claudechrome.com domain auth cookies
if (!changeInfo.cookie.domain.includes("claudechrome.com")) return;
// Only care about website domain auth cookies
if (!isWebsiteDomain(changeInfo.cookie.domain)) return;
if (!AUTH_COOKIE_NAMES.includes(changeInfo.cookie.name)) return;
console.log("[AuthProvider] Auth cookie changed:", {
@@ -376,7 +382,7 @@ export const AuthProvider: React.FC<AuthProviderProps> = ({ children }) => {
const login = useCallback(async () => {
console.log("[AuthProvider] Login function called");
try {
const authUrl = `${WEBSITE_URL}/auth/login?source=extension`;
const authUrl = buildWebsiteUrl("/auth/login?source=extension");
console.log("[AuthProvider] Opening auth URL");
let tabCreated = false;
@@ -429,7 +435,7 @@ export const AuthProvider: React.FC<AuthProviderProps> = ({ children }) => {
// 3. Notify website to sign out
try {
await fetch(`${WEBSITE_URL}/api/auth/signout`, {
await fetch(buildWebsiteUrl("/api/auth/signout"), {
method: "POST",
headers: {
"Content-Type": "application/json",

View File

@@ -5,7 +5,7 @@
import type React from "react";
import { useState } from "react";
import { WEBSITE_URL } from "../services/web-auth";
import { buildWebsiteUrl } from "../config/website";
import { useAuth } from "./AuthProvider";
export const UserProfile: React.FC = () => {
@@ -21,7 +21,7 @@ export const UserProfile: React.FC = () => {
const handleAccountClick = () => {
setShowDropdown(false);
chrome.tabs.create({ url: `${WEBSITE_URL}/settings/credits` });
chrome.tabs.create({ url: buildWebsiteUrl("/settings/credits") });
};
return (

View File

@@ -39,7 +39,37 @@ chrome.runtime.onInstalled.addListener((details) => {
}
});
// Handle messages for element capture relay
// =============================================================================
// Sidepanel port lifecycle
// =============================================================================
// Track whether a recording is active so we can clean up on disconnect
let isRecording = false;
chrome.runtime.onConnect.addListener((port) => {
if (port.name === "sidepanel") {
port.onDisconnect.addListener(() => {
// When sidepanel closes, stop capture on all tabs if recording was active
if (isRecording) {
isRecording = false;
chrome.tabs.query({}).then((tabs) => {
for (const tab of tabs) {
if (tab.id) {
chrome.tabs
.sendMessage(tab.id, { request: "stop-capture" })
.catch(() => {
/* tab may not have content script */
});
}
}
});
}
});
}
});
// =============================================================================
// Internal message router
// =============================================================================
chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => {
// Echo capture events to all extension contexts
if (message.request === "capture-click-event") {
@@ -70,7 +100,439 @@ chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => {
return true;
}
// Relay a message to the active tab's content script
if (message.request === "relay-to-active-tab") {
chrome.tabs.query({ active: true, currentWindow: true }, (tabs) => {
const tabId = tabs[0]?.id;
if (tabId && message.message) {
chrome.tabs
.sendMessage(tabId, message.message)
.then(() => sendResponse({ success: true }))
.catch((err) => {
sendResponse({
success: false,
error: err instanceof Error ? err.message : String(err),
});
});
} else {
sendResponse({ success: false, error: "No active tab" });
}
});
return true;
}
// Recording lifecycle markers
if (message.request === "start-recording") {
isRecording = true;
sendResponse({ success: true });
return true;
}
if (message.request === "stop-recording") {
isRecording = false;
sendResponse({ success: true });
return true;
}
// Open sidepanel on demand (e.g. from content script)
if (message.request === "open-sidepanel") {
(async () => {
try {
const tabId = _sender.tab?.id;
if (tabId) {
await chrome.sidePanel.open({ tabId });
} else {
const window = await chrome.windows.getCurrent();
if (window.id) {
await chrome.sidePanel.open({ windowId: window.id });
}
}
sendResponse({ success: true });
} catch (error) {
sendResponse({
success: false,
error: error instanceof Error ? error.message : String(error),
});
}
})();
return true;
}
// Collect screenshots from sidepanel and trigger downloads
if (message.request === "get-current-chat-images-for-download") {
(async () => {
try {
const { folderPrefix, imageNames, filenamingStrategy, displayResults } =
message as {
folderPrefix?: string;
imageNames?: string[];
filenamingStrategy?: string;
displayResults?: boolean;
};
// Try to get images from sidepanel
try {
const sidepanelResponse = await chrome.runtime.sendMessage({
request: "provide-current-chat-images",
folderPrefix,
imageNames,
filenamingStrategy,
displayResults,
});
if (
sidepanelResponse?.images &&
sidepanelResponse.images.length > 0
) {
const result = await downloadChatImagesInBackground(
sidepanelResponse.images,
folderPrefix,
imageNames,
);
sendResponse({
success: result.success,
downloadedCount: result.downloadedCount,
downloadIds: result.downloadIds,
folderPath: folderPrefix,
filesList: result.filesList,
error: result.errors?.join(", "),
});
} else {
sendResponse({
success: false,
error: "No images found in current chat",
});
}
} catch {
// Fallback: try active tab content script
try {
const [activeTab] = await chrome.tabs.query({
active: true,
currentWindow: true,
});
if (activeTab?.id) {
const tabResponse = await chrome.tabs.sendMessage(activeTab.id, {
request: "provide-current-chat-images",
folderPrefix,
imageNames,
filenamingStrategy,
displayResults,
});
if (tabResponse?.images && tabResponse.images.length > 0) {
const result = await downloadChatImagesInBackground(
tabResponse.images,
folderPrefix,
imageNames,
);
sendResponse({
success: result.success,
downloadedCount: result.downloadedCount,
downloadIds: result.downloadIds,
folderPath: folderPrefix,
filesList: result.filesList,
error: result.errors?.join(", "),
});
} else {
sendResponse({
success: false,
error: "No images found in current chat",
});
}
} else {
sendResponse({
success: false,
error: "Unable to access current chat",
});
}
} catch (_tabError) {
sendResponse({
success: false,
error: "Unable to access current chat images",
});
}
}
} catch (error) {
sendResponse({
success: false,
error: error instanceof Error ? error.message : String(error),
});
}
})();
return true;
}
return false;
});
// =============================================================================
// Download helpers for chat image export
// =============================================================================
/**
* Validate a path segment to prevent directory traversal and unsafe characters.
*/
function validatePathSegment(
segment: string | undefined,
fieldName: string,
): string | null {
if (segment === undefined || segment === "") return null;
const traversalPatterns = [
"..",
"%2e%2e",
"%2E%2E",
"..%2f",
"..%5c",
"%2f..",
"%5c..",
];
for (const pattern of traversalPatterns) {
if (segment.toLowerCase().includes(pattern.toLowerCase())) {
return `${fieldName} contains forbidden traversal pattern: ${pattern}`;
}
}
if (segment.includes("\\"))
return `${fieldName} must not contain backslashes`;
if (segment.startsWith("/") || segment.endsWith("/"))
return `${fieldName} must not have leading or trailing slashes`;
if (segment.includes("//"))
return `${fieldName} contains empty path segments`;
return null;
}
async function downloadChatImagesInBackground(
messages: Array<{
id: string;
parts?: Array<{
type: string;
imageData?: string;
imageTitle?: string;
}>;
}>,
folderPrefix?: string,
imageNames?: string[],
): Promise<{
success: boolean;
downloadedCount?: number;
downloadIds?: number[];
errors?: string[];
filesList?: string[];
}> {
try {
if (!chrome.downloads) {
return {
success: false,
errors: ["Downloads permission not available."],
};
}
const folderPrefixError = validatePathSegment(folderPrefix, "folderPrefix");
if (folderPrefixError)
return { success: false, errors: [folderPrefixError] };
if (imageNames) {
for (let i = 0; i < imageNames.length; i++) {
const nameError = validatePathSegment(
imageNames[i],
`imageNames[${i}]`,
);
if (nameError) return { success: false, errors: [nameError] };
}
}
const downloadIds: number[] = [];
const errors: string[] = [];
const filesList: string[] = [];
let downloadedCount = 0;
let imageIndex = 0;
for (const message of messages) {
if (!message.parts) continue;
for (const part of message.parts) {
if (part.type === "image" && part.imageData) {
try {
// Validate image data format
if (!part.imageData.startsWith("data:image/")) {
errors.push("Invalid image data format");
imageIndex++;
continue;
}
let filename: string;
const imageName = imageNames?.[imageIndex];
if (imageName) {
filename = imageName
.replace(/[^a-zA-Z0-9\u4e00-\u9fa5\s-]/g, "")
.trim();
} else {
const timestamp = new Date()
.toISOString()
.replace(/[:.]/g, "-")
.slice(0, -5);
const titleSlug = part.imageTitle
? part.imageTitle
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-+|-+$/g, "")
: "image";
filename = `${titleSlug}-${timestamp}`;
}
const fullFilename = folderPrefix
? `${folderPrefix}/${filename}`
: filename;
const mimeMatch = part.imageData.match(/data:image\/([^;]+)/);
const extension =
mimeMatch?.[1] === "jpeg" ? "jpg" : (mimeMatch?.[1] ?? "png");
const imageFilename = fullFilename.includes(".")
? fullFilename
: `${fullFilename}.${extension}`;
const downloadId = await chrome.downloads.download({
url: part.imageData,
filename: imageFilename,
saveAs: true,
});
downloadIds.push(downloadId);
filesList.push(imageFilename);
downloadedCount++;
imageIndex++;
} catch (error) {
errors.push(
`Error downloading image: ${error instanceof Error ? error.message : String(error)}`,
);
}
}
}
}
return {
success: downloadedCount > 0 || errors.length === 0,
downloadedCount,
downloadIds,
filesList,
errors: errors.length > 0 ? errors : undefined,
};
} catch (error) {
return {
success: false,
errors: [error instanceof Error ? error.message : String(error)],
};
}
}
// Global function callable from QuickJS skill runtime
(
globalThis as Record<string, unknown>
).downloadCurrentChatImagesFromBackground = async (
folderPrefix: string,
imageNames?: string[],
filenamingStrategy: string = "descriptive",
displayResults: boolean = true,
) => {
try {
const sidepanelResponse = await chrome.runtime.sendMessage({
request: "provide-current-chat-images",
folderPrefix,
imageNames,
filenamingStrategy,
displayResults,
});
if (sidepanelResponse?.images && sidepanelResponse.images.length > 0) {
const result = await downloadChatImagesInBackground(
sidepanelResponse.images,
folderPrefix,
imageNames,
);
return {
success: result.success,
downloadedCount: result.downloadedCount,
downloadIds: result.downloadIds,
folderPath: folderPrefix,
filesList: result.filesList ?? [],
error: result.errors?.join(", "),
};
}
return { success: false, error: "No images found in current chat" };
} catch (error) {
return {
success: false,
error: error instanceof Error ? error.message : String(error),
};
}
};
// =============================================================================
// External Message Listener - Website Integration
// =============================================================================
// Origin verification is handled by manifest.json's externally_connectable
chrome.runtime.onMessageExternal.addListener(
(message, sender, sendResponse) => {
// Handle "openWithPrompt" action from website
if (message.action === "openWithPrompt") {
const prompt = message.prompt;
if (!prompt || typeof prompt !== "string") {
sendResponse({ success: false, error: "Invalid prompt" });
return true;
}
// Save prompt to chrome.storage.local with timestamp
chrome.storage.local.set(
{
"aipex-pending-prompt": prompt,
"aipex-pending-prompt-timestamp": Date.now(),
},
() => {
if (chrome.runtime.lastError) {
sendResponse({
success: false,
error: chrome.runtime.lastError.message,
});
return;
}
// Open sidepanel
const windowId = sender.tab?.windowId;
if (!windowId) {
chrome.windows
.getCurrent()
.then((window) => {
if (window.id) {
return chrome.sidePanel.open({ windowId: window.id });
}
throw new Error("No window ID available");
})
.then(() => {
sendResponse({ success: true });
})
.catch((error) => {
sendResponse({ success: false, error: error.message });
});
} else {
chrome.sidePanel
.open({ windowId })
.then(() => {
sendResponse({ success: true });
})
.catch((error) => {
sendResponse({ success: false, error: error.message });
});
}
},
);
return true; // Keep message channel open for async response
}
sendResponse({ success: false, error: "Unknown action" });
return true;
},
);
console.log("AIPex background service worker started");

View File

@@ -1,4 +1,9 @@
import {
Alert,
AlertDescription,
} from "@aipexstudio/aipex-react/components/ui/alert";
import { Badge } from "@aipexstudio/aipex-react/components/ui/badge";
import { Button } from "@aipexstudio/aipex-react/components/ui/button";
import {
Dialog,
DialogContent,
@@ -11,16 +16,42 @@ import {
TabsList,
TabsTrigger,
} from "@aipexstudio/aipex-react/components/ui/tabs";
import { Code, Download, Eye, FileText } from "lucide-react";
import { Textarea } from "@aipexstudio/aipex-react/components/ui/textarea";
import {
AlertCircle,
Code,
Download,
Edit,
Eye,
FileText,
FolderOpen,
Loader2,
Save,
X,
} from "lucide-react";
import type React from "react";
import { useCallback, useEffect, useState } from "react";
import type { SkillClient, SkillMetadata } from "./types";
/** Script data with path and loaded content */
interface ScriptData {
path: string;
content: string;
}
/** Reference data with path and loaded content */
interface ReferenceData {
path: string;
content: string;
}
interface SkillDetailsProps {
skill: SkillMetadata | null;
skillClient: SkillClient;
open: boolean;
onOpenChange: (open: boolean) => void;
onEditInFileManager?: (filePath: string) => void;
onSkillUpdated?: () => void;
}
export const SkillDetails: React.FC<SkillDetailsProps> = ({
@@ -28,36 +59,92 @@ export const SkillDetails: React.FC<SkillDetailsProps> = ({
skillClient,
open,
onOpenChange,
onEditInFileManager,
onSkillUpdated,
}) => {
const [skillContent, setSkillContent] = useState<string>("");
const [scripts, setScripts] = useState<string[]>([]);
const [references, setReferences] = useState<string[]>([]);
const [scriptsData, setScriptsData] = useState<ScriptData[]>([]);
const [referencesData, setReferencesData] = useState<ReferenceData[]>([]);
const [assets, setAssets] = useState<string[]>([]);
const [loading, setLoading] = useState(false);
// Edit states
const [isEditingContent, setIsEditingContent] = useState(false);
const [editedContent, setEditedContent] = useState("");
const [editingScriptIndex, setEditingScriptIndex] = useState<number | null>(
null,
);
const [editedScriptContent, setEditedScriptContent] = useState("");
const [editingRefIndex, setEditingRefIndex] = useState<number | null>(null);
const [editedRefContent, setEditedRefContent] = useState("");
// Save states
const [saving, setSaving] = useState(false);
const [saveError, setSaveError] = useState<string | null>(null);
const resetEditStates = useCallback(() => {
setIsEditingContent(false);
setEditedContent("");
setEditingScriptIndex(null);
setEditedScriptContent("");
setEditingRefIndex(null);
setEditedRefContent("");
setSaveError(null);
}, []);
const loadSkillDetails = useCallback(async () => {
if (!skill) return;
setLoading(true);
resetEditStates();
try {
// Load skill details via adapter
// Load skill content
const content = await skillClient.getSkillContent(skill.name);
setSkillContent(content);
// Get skill data
const skillData = await skillClient.getSkill(skill.name);
if (skillData) {
setSkillContent(skillData.skillMdContent);
// Load scripts with their paths and content
const scriptDataList: ScriptData[] = [];
for (const scriptPath of skillData.scripts) {
try {
const scriptContent = await skillClient.getSkillScript(
skill.name,
scriptPath,
);
scriptDataList.push({ path: scriptPath, content: scriptContent });
} catch (error) {
console.error(`Failed to load script ${scriptPath}:`, error);
scriptDataList.push({
path: scriptPath,
content: "// Error loading script",
});
}
}
setScriptsData(scriptDataList);
// For now, just set the file paths - in a full implementation,
// we'd load the actual content
setScripts(
skillData.scripts.map(
(s) => `// Script: ${s}\n// (content not loaded)`,
),
);
setReferences(
skillData.references.map(
(r) => `# Reference: ${r}\n(content not loaded)`,
),
);
// Load references with their paths and content
const refDataList: ReferenceData[] = [];
for (const refPath of skillData.references) {
try {
const refContent = await skillClient.getSkillReference(
skill.name,
refPath,
);
refDataList.push({ path: refPath, content: refContent });
} catch (error) {
console.error(`Failed to load reference ${refPath}:`, error);
refDataList.push({
path: refPath,
content: "// Error loading reference",
});
}
}
setReferencesData(refDataList);
// Set assets
setAssets(skillData.assets);
}
} catch (error) {
@@ -65,7 +152,7 @@ export const SkillDetails: React.FC<SkillDetailsProps> = ({
} finally {
setLoading(false);
}
}, [skill, skillClient]);
}, [skill, skillClient, resetEditStates]);
useEffect(() => {
if (skill && open) {
@@ -73,6 +160,13 @@ export const SkillDetails: React.FC<SkillDetailsProps> = ({
}
}, [skill, open, loadSkillDetails]);
// Reset edit states when dialog closes
useEffect(() => {
if (!open) {
resetEditStates();
}
}, [open, resetEditStates]);
const formatDate = (timestamp: number) => {
return new Date(timestamp).toLocaleDateString("en-US", {
year: "numeric",
@@ -83,6 +177,171 @@ export const SkillDetails: React.FC<SkillDetailsProps> = ({
});
};
const handleEditInFileManager = () => {
if (skill && onEditInFileManager) {
const skillMdPath = `/skills/${skill.id}/SKILL.md`;
onOpenChange(false); // Close the details dialog
onEditInFileManager(skillMdPath);
}
};
// --- SKILL.md Edit Handlers ---
const handleStartEditContent = () => {
setEditedContent(skillContent);
setIsEditingContent(true);
setSaveError(null);
};
const handleCancelEditContent = () => {
setIsEditingContent(false);
setEditedContent("");
setSaveError(null);
};
const handleSaveContent = async () => {
if (!skill) return;
// Validate name hasn't changed
const nameMatch = editedContent.match(
/^---\n[\s\S]*?name:\s*(.+?)[\s]*\n[\s\S]*?---/m,
);
const parsedName = nameMatch?.[1]?.trim();
if (parsedName && parsedName !== skill.id) {
setSaveError(
`Cannot rename skill. The name "${parsedName}" must match "${skill.id}". Skill renaming is not supported.`,
);
return;
}
try {
setSaving(true);
setSaveError(null);
const filePath = `/skills/${skill.id}/SKILL.md`;
await skillClient.writeFile(filePath, editedContent);
// Refresh metadata
try {
await skillClient.refreshSkillMetadata(skill.id);
} catch (metadataErr) {
console.error("Failed to sync skill metadata:", metadataErr);
}
// Update local state
setSkillContent(editedContent);
setIsEditingContent(false);
setEditedContent("");
// Notify parent
onSkillUpdated?.();
} catch (err) {
const errorMsg =
err instanceof Error ? err.message : "Failed to save file";
setSaveError(errorMsg);
} finally {
setSaving(false);
}
};
// --- Script Edit Handlers ---
const handleStartEditScript = (index: number) => {
const script = scriptsData[index];
if (!script) return;
setEditedScriptContent(script.content);
setEditingScriptIndex(index);
setSaveError(null);
};
const handleCancelEditScript = () => {
setEditingScriptIndex(null);
setEditedScriptContent("");
setSaveError(null);
};
const handleSaveScript = async () => {
if (!skill || editingScriptIndex === null) return;
const scriptData = scriptsData[editingScriptIndex];
if (!scriptData) return;
try {
setSaving(true);
setSaveError(null);
const filePath = `/skills/${skill.id}/${scriptData.path}`;
await skillClient.writeFile(filePath, editedScriptContent);
// Update local state
const updatedScripts = [...scriptsData];
updatedScripts[editingScriptIndex] = {
...scriptData,
content: editedScriptContent,
};
setScriptsData(updatedScripts);
setEditingScriptIndex(null);
setEditedScriptContent("");
onSkillUpdated?.();
} catch (err) {
const errorMsg =
err instanceof Error ? err.message : "Failed to save script";
setSaveError(errorMsg);
} finally {
setSaving(false);
}
};
// --- Reference Edit Handlers ---
const handleStartEditRef = (index: number) => {
const ref = referencesData[index];
if (!ref) return;
setEditedRefContent(ref.content);
setEditingRefIndex(index);
setSaveError(null);
};
const handleCancelEditRef = () => {
setEditingRefIndex(null);
setEditedRefContent("");
setSaveError(null);
};
const handleSaveRef = async () => {
if (!skill || editingRefIndex === null) return;
const refData = referencesData[editingRefIndex];
if (!refData) return;
try {
setSaving(true);
setSaveError(null);
const filePath = `/skills/${skill.id}/${refData.path}`;
await skillClient.writeFile(filePath, editedRefContent);
// Update local state
const updatedRefs = [...referencesData];
updatedRefs[editingRefIndex] = {
...refData,
content: editedRefContent,
};
setReferencesData(updatedRefs);
setEditingRefIndex(null);
setEditedRefContent("");
onSkillUpdated?.();
} catch (err) {
const errorMsg =
err instanceof Error ? err.message : "Failed to save reference";
setSaveError(errorMsg);
} finally {
setSaving(false);
}
};
if (!skill) return null;
const isBuiltin = skill.id === "skill-creator";
@@ -135,6 +394,17 @@ export const SkillDetails: React.FC<SkillDetailsProps> = ({
{skill.enabled ? "Enabled" : "Disabled"}
</Badge>
</div>
{onEditInFileManager && (
<Button
variant="outline"
size="sm"
onClick={handleEditInFileManager}
className="mt-1"
>
<FolderOpen className="h-4 w-4 mr-1" />
Edit in File Manager
</Button>
)}
</div>
</div>
@@ -150,14 +420,14 @@ export const SkillDetails: React.FC<SkillDetailsProps> = ({
</TabsTrigger>
<TabsTrigger value="scripts" className="flex items-center gap-1">
<Code className="h-3 w-3" />
Scripts ({scripts.length})
Scripts ({scriptsData.length})
</TabsTrigger>
<TabsTrigger
value="references"
className="flex items-center gap-1"
>
<Eye className="h-3 w-3" />
References ({references.length})
References ({referencesData.length})
</TabsTrigger>
<TabsTrigger value="assets" className="flex items-center gap-1">
<Download className="h-3 w-3" />
@@ -165,66 +435,250 @@ export const SkillDetails: React.FC<SkillDetailsProps> = ({
</TabsTrigger>
</TabsList>
{/* ===== Content (SKILL.md) Tab ===== */}
<TabsContent
value="content"
className="mt-4 flex-1 overflow-hidden flex flex-col min-h-0"
>
<div className="space-y-2 flex-1 overflow-y-auto">
<div className="text-sm font-medium">SKILL.md</div>
<pre className="bg-muted p-4 rounded-lg overflow-auto text-sm">
{loading ? "Loading..." : skillContent}
</pre>
{isEditingContent ? (
<>
<div className="flex items-center justify-between">
<div className="text-sm font-medium">
SKILL.md (Editing)
</div>
<div className="flex items-center gap-2">
<Button
variant="outline"
size="sm"
onClick={handleCancelEditContent}
disabled={saving}
>
<X className="h-4 w-4 mr-1" />
Cancel
</Button>
<Button
size="sm"
onClick={handleSaveContent}
disabled={saving}
>
{saving ? (
<Loader2 className="h-4 w-4 mr-1 animate-spin" />
) : (
<Save className="h-4 w-4 mr-1" />
)}
Save
</Button>
</div>
</div>
{saveError && (
<Alert variant="destructive">
<AlertCircle className="h-4 w-4" />
<AlertDescription>{saveError}</AlertDescription>
</Alert>
)}
<Textarea
value={editedContent}
onChange={(e) => setEditedContent(e.target.value)}
className="font-mono text-sm min-h-[400px] resize-y flex-1"
placeholder="SKILL.md content..."
disabled={saving}
/>
</>
) : (
<>
<div className="flex items-center justify-between">
<div className="text-sm font-medium">SKILL.md</div>
<Button
variant="outline"
size="sm"
onClick={handleStartEditContent}
>
<Edit className="h-4 w-4 mr-1" />
Edit
</Button>
</div>
<pre className="bg-muted p-4 rounded-lg overflow-auto text-sm whitespace-pre-wrap">
{loading ? "Loading..." : skillContent}
</pre>
</>
)}
</div>
</TabsContent>
{/* ===== Scripts Tab ===== */}
<TabsContent
value="scripts"
className="mt-4 flex-1 overflow-hidden flex flex-col min-h-0"
>
<div className="space-y-4 overflow-y-auto flex-1">
{scripts.length === 0 ? (
{scriptsData.length === 0 ? (
<div className="text-center text-muted-foreground py-8">
No scripts found
</div>
) : (
scripts.map((script, index) => (
<div key={script} className="space-y-2">
<div className="text-sm font-medium">
Script {index + 1}
</div>
<pre className="bg-muted p-4 rounded-lg overflow-auto text-sm max-h-[60vh]">
{script}
</pre>
scriptsData.map((scriptData, index) => (
<div key={scriptData.path} className="space-y-2">
{editingScriptIndex === index ? (
<>
<div className="flex items-center justify-between">
<div className="text-sm font-medium">
{scriptData.path} (Editing)
</div>
<div className="flex items-center gap-2">
<Button
variant="outline"
size="sm"
onClick={handleCancelEditScript}
disabled={saving}
>
<X className="h-4 w-4 mr-1" />
Cancel
</Button>
<Button
size="sm"
onClick={handleSaveScript}
disabled={saving}
>
{saving ? (
<Loader2 className="h-4 w-4 mr-1 animate-spin" />
) : (
<Save className="h-4 w-4 mr-1" />
)}
Save
</Button>
</div>
</div>
{saveError && (
<Alert variant="destructive">
<AlertCircle className="h-4 w-4" />
<AlertDescription>{saveError}</AlertDescription>
</Alert>
)}
<Textarea
value={editedScriptContent}
onChange={(e) =>
setEditedScriptContent(e.target.value)
}
className="font-mono text-sm min-h-[300px] resize-y"
placeholder="Script content..."
disabled={saving}
/>
</>
) : (
<>
<div className="flex items-center justify-between">
<div className="text-sm font-medium">
{scriptData.path}
</div>
<Button
variant="outline"
size="sm"
onClick={() => handleStartEditScript(index)}
disabled={editingScriptIndex !== null}
>
<Edit className="h-4 w-4 mr-1" />
Edit
</Button>
</div>
<pre className="bg-muted p-4 rounded-lg overflow-auto text-sm max-h-[60vh] whitespace-pre-wrap">
{scriptData.content}
</pre>
</>
)}
</div>
))
)}
</div>
</TabsContent>
{/* ===== References Tab ===== */}
<TabsContent
value="references"
className="mt-4 flex-1 overflow-hidden flex flex-col min-h-0"
>
<div className="space-y-4 overflow-y-auto flex-1">
{references.length === 0 ? (
{referencesData.length === 0 ? (
<div className="text-center text-muted-foreground py-8">
No references found
</div>
) : (
references.map((reference, index) => (
<div key={reference} className="space-y-2">
<div className="text-sm font-medium">
Reference {index + 1}
</div>
<pre className="bg-muted p-4 rounded-lg overflow-auto text-sm max-h-[60vh]">
{reference}
</pre>
referencesData.map((refData, index) => (
<div key={refData.path} className="space-y-2">
{editingRefIndex === index ? (
<>
<div className="flex items-center justify-between">
<div className="text-sm font-medium">
{refData.path} (Editing)
</div>
<div className="flex items-center gap-2">
<Button
variant="outline"
size="sm"
onClick={handleCancelEditRef}
disabled={saving}
>
<X className="h-4 w-4 mr-1" />
Cancel
</Button>
<Button
size="sm"
onClick={handleSaveRef}
disabled={saving}
>
{saving ? (
<Loader2 className="h-4 w-4 mr-1 animate-spin" />
) : (
<Save className="h-4 w-4 mr-1" />
)}
Save
</Button>
</div>
</div>
{saveError && (
<Alert variant="destructive">
<AlertCircle className="h-4 w-4" />
<AlertDescription>{saveError}</AlertDescription>
</Alert>
)}
<Textarea
value={editedRefContent}
onChange={(e) =>
setEditedRefContent(e.target.value)
}
className="font-mono text-sm min-h-[300px] resize-y"
placeholder="Reference content..."
disabled={saving}
/>
</>
) : (
<>
<div className="flex items-center justify-between">
<div className="text-sm font-medium">
{refData.path}
</div>
<Button
variant="outline"
size="sm"
onClick={() => handleStartEditRef(index)}
disabled={editingRefIndex !== null}
>
<Edit className="h-4 w-4 mr-1" />
Edit
</Button>
</div>
<pre className="bg-muted p-4 rounded-lg overflow-auto text-sm max-h-[60vh] whitespace-pre-wrap">
{refData.content}
</pre>
</>
)}
</div>
))
)}
</div>
</TabsContent>
{/* ===== Assets Tab ===== */}
<TabsContent
value="assets"
className="mt-4 flex-1 overflow-hidden flex flex-col min-h-0"

View File

@@ -14,7 +14,7 @@ import {
import { Input } from "@aipexstudio/aipex-react/components/ui/input";
import { AlertCircle, Filter, RefreshCw, Search } from "lucide-react";
import type React from "react";
import { useCallback, useEffect, useState } from "react";
import { useCallback, useEffect, useRef, useState } from "react";
import { SkillCard } from "./SkillCard";
import { SkillDetails } from "./SkillDetails";
import type { SkillClient, SkillMetadata } from "./types";
@@ -22,11 +22,16 @@ import type { SkillClient, SkillMetadata } from "./types";
interface SkillListProps {
skillClient: SkillClient;
onSkillUpdate: () => void;
onNavigateToFile?: (filePath: string) => void;
/** Pre-open a specific skill's detail dialog by name (from URL deep-link). */
initialSkill?: string;
}
export const SkillList: React.FC<SkillListProps> = ({
skillClient,
onSkillUpdate,
onNavigateToFile,
initialSkill,
}) => {
const [skills, setSkills] = useState<SkillMetadata[]>([]);
const [filteredSkills, setFilteredSkills] = useState<SkillMetadata[]>([]);
@@ -65,6 +70,26 @@ export const SkillList: React.FC<SkillListProps> = ({
loadSkills();
}, [loadSkills]);
// Auto-open a skill's detail dialog when initialSkill is provided (from URL deep-link)
const initialSkillHandled = useRef(false);
useEffect(() => {
if (
initialSkill &&
!loading &&
skills.length > 0 &&
!initialSkillHandled.current
) {
const match = skills.find(
(s) => s.name === initialSkill || s.id === initialSkill,
);
if (match) {
setSelectedSkill(match);
setDetailsOpen(true);
initialSkillHandled.current = true;
}
}
}, [initialSkill, loading, skills]);
useEffect(() => {
// Filter skills based on search query and enabled filter
let filtered = skills;
@@ -257,6 +282,8 @@ export const SkillList: React.FC<SkillListProps> = ({
skillClient={skillClient}
open={detailsOpen}
onOpenChange={setDetailsOpen}
onEditInFileManager={onNavigateToFile}
onSkillUpdated={onSkillUpdate}
/>
</div>
);

View File

@@ -61,6 +61,31 @@ export interface SkillClient {
* Get detailed skill information
*/
getSkill(skillNameOrId: string): Promise<SkillDetail | null>;
/**
* Get skill SKILL.md content
*/
getSkillContent(skillName: string): Promise<string>;
/**
* Get a specific script's content
*/
getSkillScript(skillName: string, scriptPath: string): Promise<string>;
/**
* Get a specific reference's content
*/
getSkillReference(skillName: string, refPath: string): Promise<string>;
/**
* Write a file under /skills/ via ZenFS
*/
writeFile(filePath: string, content: string): Promise<void>;
/**
* Refresh skill metadata after file changes
*/
refreshSkillMetadata(skillId: string): Promise<void>;
}
/**

View File

@@ -0,0 +1,82 @@
/**
* Website URL configuration
* Provides centralized, env-configurable website base URL for the extension.
*/
const DEFAULT_WEBSITE_URL = "https://www.claudechrome.com";
/**
* Resolve the website URL from environment or use default.
* Validates the URL and normalizes to origin (removes trailing paths).
*/
function resolveWebsiteUrl(): string {
// VITE_WEBSITE_URL is injected at build time via .env
const envUrl =
typeof import.meta !== "undefined" && import.meta.env?.VITE_WEBSITE_URL;
if (!envUrl || typeof envUrl !== "string" || envUrl.trim() === "") {
return DEFAULT_WEBSITE_URL;
}
try {
const parsed = new URL(envUrl.trim());
// Security: only allow https in production
if (parsed.protocol !== "https:" && parsed.protocol !== "http:") {
console.warn(
"[website config] Invalid protocol, falling back to default",
);
return DEFAULT_WEBSITE_URL;
}
// Return origin (scheme + host + port, no path)
return parsed.origin;
} catch {
console.warn(
"[website config] Invalid VITE_WEBSITE_URL, falling back to default",
);
return DEFAULT_WEBSITE_URL;
}
}
/**
* The base website URL (origin only, no trailing slash).
* Example: "https://www.claudechrome.com"
*/
export const WEBSITE_URL: string = resolveWebsiteUrl();
/**
* Alias for WEBSITE_URL, explicitly named as origin.
*/
export const WEBSITE_ORIGIN: string = WEBSITE_URL;
/**
* The hostname of the website (without protocol or port).
* Example: "www.claudechrome.com"
*/
export const WEBSITE_HOST: string = new URL(WEBSITE_URL).hostname;
/**
* Build a full URL from a path relative to the website.
* @param path - Path starting with "/" (e.g., "/pricing", "/api/auth/verify")
* @returns Full URL string
*/
export function buildWebsiteUrl(path: string): string {
// Ensure path starts with /
const normalizedPath = path.startsWith("/") ? path : `/${path}`;
return `${WEBSITE_URL}${normalizedPath}`;
}
/**
* Check if a domain matches the website domain (for cookie validation).
* Handles subdomains: ".claudechrome.com" matches "www.claudechrome.com".
* @param domain - Domain string from cookie or origin
* @returns true if domain matches website
*/
export function isWebsiteDomain(domain: string): boolean {
if (!domain) return false;
const normalized = domain.toLowerCase().replace(/^\./, "");
// Exact match or subdomain match
return (
normalized === WEBSITE_HOST.toLowerCase() ||
WEBSITE_HOST.toLowerCase().endsWith(`.${normalized}`)
);
}

View File

@@ -10,10 +10,11 @@ import { getRuntime } from "@aipexstudio/aipex-react/lib/runtime";
import { cn } from "@aipexstudio/aipex-react/lib/utils";
import type { HeaderProps } from "@aipexstudio/aipex-react/types";
import { conversationStorage } from "@aipexstudio/browser-runtime";
import { PlusIcon, SettingsIcon } from "lucide-react";
import { KeyboardIcon, MicIcon, PlusIcon, SettingsIcon } from "lucide-react";
import { useCallback, useEffect, useRef, useState } from "react";
import { UserProfile, useAuth } from "../auth";
import { ConversationHistory } from "./conversation-history";
import { useInputMode } from "./input-mode-context";
import { fromStorageFormat, toStorageFormat } from "./message-adapter";
export function BrowserChatHeader({
@@ -126,6 +127,12 @@ export function BrowserChatHeader({
onNewChat?.();
}, [onNewChat]);
const { inputMode, setInputMode } = useInputMode();
const toggleInputMode = useCallback(() => {
setInputMode(inputMode === "voice" ? "text" : "voice");
}, [inputMode, setInputMode]);
return (
<div
className={cn(
@@ -134,26 +141,47 @@ export function BrowserChatHeader({
)}
{...props}
>
{/* Left side - Settings */}
<Button
variant="ghost"
size="sm"
onClick={handleOpenOptions}
className="gap-2"
>
<SettingsIcon className="size-4" />
{t("common.settings")}
</Button>
{/* Left side - Settings + Voice/Text toggle + History */}
<div className="flex items-center gap-1">
<Button
variant="ghost"
size="icon"
onClick={handleOpenOptions}
title={t("tooltip.settings")}
className="size-8"
>
<SettingsIcon className="size-4" />
</Button>
{/* Center - History */}
<ConversationHistory
currentConversationId={currentConversationId}
onConversationSelect={handleConversationSelect}
onNewConversation={handleNewChat}
/>
{/* Voice / Text toggle */}
<Button
variant="ghost"
size="icon"
onClick={toggleInputMode}
title={
inputMode === "voice"
? t("tooltip.switchToText")
: t("tooltip.switchToVoice")
}
className="size-8"
>
{inputMode === "voice" ? (
<KeyboardIcon className="size-4" />
) : (
<MicIcon className="size-4" />
)}
</Button>
{/* Conversation History */}
<ConversationHistory
currentConversationId={currentConversationId}
onConversationSelect={handleConversationSelect}
onNewConversation={handleNewChat}
/>
</div>
{/* Right side - New Chat and User Profile */}
<div className="flex items-center gap-2">
<div className="flex items-center gap-1">
<Button
variant="ghost"
size="sm"

View File

@@ -0,0 +1,50 @@
/**
* BrowserChatInputArea
* Renders VoiceInput when inputMode is "voice", otherwise the default text InputArea.
*/
import {
DefaultInputArea,
type ExtendedInputAreaProps,
} from "@aipexstudio/aipex-react/components/chatbot/components";
import { VoiceInput } from "@aipexstudio/aipex-react/components/voice";
import type { InputAreaProps } from "@aipexstudio/aipex-react/types";
import { useCallback } from "react";
import { useInputMode } from "./input-mode-context";
export function BrowserChatInputArea(props: InputAreaProps) {
const { inputMode, setInputMode } = useInputMode();
const handleTranscript = useCallback(
(text: string) => {
// Send the transcribed text as a message
props.onSubmit(text);
},
[
props.onSubmit, // Send the transcribed text as a message
props,
],
);
const handleSwitchToText = useCallback(() => {
setInputMode("text");
}, [setInputMode]);
if (inputMode === "voice") {
const isStreaming =
props.status === "streaming" || props.status === "submitted";
return (
<div className="flex-1 overflow-hidden">
<VoiceInput
onTranscript={handleTranscript}
isPaused={isStreaming}
onSwitchToText={handleSwitchToText}
/>
</div>
);
}
// Text mode: render the default input area, forwarding all props
return <DefaultInputArea {...(props as ExtendedInputAreaProps)} />;
}

View File

@@ -0,0 +1,90 @@
/**
* BrowserContextLoader
* Rendered inside PromptInput (via the promptExtras slot) to populate
* available contexts (tabs, bookmarks, current page) and available skills.
*
* This component renders nothing visible; it only syncs data from
* browser-runtime providers into the PromptInput context hooks.
*/
import {
type SkillItem,
usePromptInputContexts,
usePromptInputSkills,
} from "@aipexstudio/aipex-react/components/ai-elements/prompt-input";
import type { SkillMetadata } from "@aipexstudio/browser-runtime";
import { skillManager, skillStorage } from "@aipexstudio/browser-runtime";
import { useEffect } from "react";
import { useTabsSync } from "../hooks/use-tabs-sync";
export function BrowserContextLoader() {
const contexts = usePromptInputContexts();
const skills = usePromptInputSkills();
// Sync contexts from tab/bookmark/page providers
useTabsSync({
onContextsUpdate: (availableContexts) => {
contexts.setAvailableContexts(availableContexts);
},
onContextRemove: (contextId) => {
contexts.remove(contextId);
},
getSelectedContexts: () => {
return contexts.items;
},
debounceDelay: 300,
});
// Load skills and subscribe to skill changes
useEffect(() => {
const loadSkills = async () => {
try {
const allSkills: SkillMetadata[] = await skillStorage.listSkills();
const enabledSkills = allSkills.filter(
(skill: SkillMetadata) => skill.enabled,
);
const skillItems: SkillItem[] = enabledSkills.map(
(skill: SkillMetadata) => ({
id: skill.id,
name: skill.name,
description: skill.description,
}),
);
skills.setAvailableSkills(skillItems);
} catch (error) {
console.error("[BrowserContextLoader] Failed to load skills:", error);
}
};
// Initial load
void loadSkills();
// Subscribe to skill changes
const unsubscribeLoaded = skillManager.subscribe(
"skill_loaded",
() => void loadSkills(),
);
const unsubscribeUnloaded = skillManager.subscribe(
"skill_unloaded",
() => void loadSkills(),
);
const unsubscribeEnabled = skillManager.subscribe(
"skill_enabled",
() => void loadSkills(),
);
const unsubscribeDisabled = skillManager.subscribe(
"skill_disabled",
() => void loadSkills(),
);
return () => {
unsubscribeLoaded();
unsubscribeUnloaded();
unsubscribeEnabled();
unsubscribeDisabled();
};
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [skills]);
return null;
}

View File

@@ -0,0 +1,22 @@
/**
* BrowserMessageList
* Wraps the default MessageList and hides it when inputMode is "voice",
* matching aipex's behaviour where messages are hidden in voice mode.
*/
import { DefaultMessageList } from "@aipexstudio/aipex-react/components/chatbot/components";
import type { MessageListProps } from "@aipexstudio/aipex-react/types";
import { useInputMode } from "./input-mode-context";
export function BrowserMessageList(
props: MessageListProps & { onSuggestionClick?: (text: string) => void },
) {
const { inputMode } = useInputMode();
// In voice mode, hide the message list (matching aipex full-screen voice behaviour)
if (inputMode === "voice") {
return null;
}
return <DefaultMessageList {...props} />;
}

View File

@@ -0,0 +1,143 @@
/**
* ChatImagesListener
*
* Invisible component that listens for "provide-current-chat-images" messages
* from the background service worker. When received, it extracts screenshot
* data from the current chat messages (via ChatContext) and responds with the
* image payloads so that the background can trigger downloads (e.g. for the
* download_current_chat_report_zip skill tool).
*
* Must be rendered inside a ChatbotProvider so useChatContext() is available.
*/
import { useChatContext } from "@aipexstudio/aipex-react/components/chatbot/context";
import { useEffect } from "react";
interface ImagePayload {
id: string;
parts: Array<{ type: string; imageData: string; imageTitle?: string }>;
}
export function ChatImagesListener() {
const { messages } = useChatContext();
useEffect(() => {
const handleRequest = (
message: unknown,
_sender: chrome.runtime.MessageSender,
sendResponse: (response: unknown) => void,
): boolean => {
if (
!message ||
typeof message !== "object" ||
(message as { request?: string }).request !==
"provide-current-chat-images"
) {
return false;
}
try {
const images: ImagePayload[] = [];
for (const msg of messages) {
for (const part of msg.parts) {
// Tool parts may carry screenshot data inline (screenshot field)
// or in their output (imageData field)
if (part.type === "tool") {
const toolPart = part as {
output?: unknown;
screenshot?: string;
toolName?: string;
};
// Prefer the inline screenshot field (set by ChatAdapter)
const screenshotData = toolPart.screenshot;
if (
screenshotData &&
typeof screenshotData === "string" &&
screenshotData.startsWith("data:image/")
) {
images.push({
id: msg.id,
parts: [
{
type: "image",
imageData: screenshotData,
imageTitle: toolPart.toolName || "Screenshot",
},
],
});
} else {
// Fall back to extracting from output
const output = toolPart.output;
if (
output &&
typeof output === "object" &&
"imageData" in output
) {
const imageData = (output as { imageData?: string })
.imageData;
if (
imageData &&
typeof imageData === "string" &&
imageData.startsWith("data:image/")
) {
images.push({
id: msg.id,
parts: [
{
type: "image",
imageData,
imageTitle: toolPart.toolName || "Screenshot",
},
],
});
}
}
}
}
// Also look for file parts with image data
if (part.type === "file") {
const filePart = part as {
mediaType?: string;
url?: string;
filename?: string;
};
if (
filePart.url?.startsWith("data:image/") &&
filePart.mediaType?.startsWith("image/")
) {
images.push({
id: msg.id,
parts: [
{
type: "image",
imageData: filePart.url,
imageTitle: filePart.filename || "Image",
},
],
});
}
}
}
}
sendResponse({ images });
} catch (error) {
console.error("[ChatImagesListener] Error extracting images:", error);
sendResponse({ images: [], error: String(error) });
}
return true; // Keep channel open for async response
};
chrome.runtime.onMessage.addListener(handleRequest);
return () => {
chrome.runtime.onMessage.removeListener(handleRequest);
};
}, [messages]);
// Render nothing this is a listener-only component
return null;
}

View File

@@ -0,0 +1,78 @@
/**
* InputModeContext
* Shared context for voice/text input mode toggle, persisted in chrome.storage.local.
*/
import type React from "react";
import {
createContext,
useCallback,
useContext,
useEffect,
useState,
} from "react";
export type InputMode = "voice" | "text";
interface InputModeContextValue {
inputMode: InputMode;
setInputMode: (mode: InputMode) => void;
}
const InputModeContext = createContext<InputModeContextValue>({
inputMode: "text",
setInputMode: () => {},
});
const STORAGE_KEY = "aipex-input-mode";
export function InputModeProvider({ children }: { children: React.ReactNode }) {
const [inputMode, setInputModeState] = useState<InputMode>("text");
// Load persisted value on mount
useEffect(() => {
chrome.storage.local
.get(STORAGE_KEY)
.then((result) => {
const stored = result[STORAGE_KEY];
if (stored === "voice" || stored === "text") {
setInputModeState(stored);
}
})
.catch(() => {
// storage may not be available yet
});
}, []);
// Listen for external changes (e.g. another instance)
useEffect(() => {
const listener = (
changes: Record<string, chrome.storage.StorageChange>,
areaName: string,
) => {
if (areaName === "local" && changes[STORAGE_KEY]) {
const newValue = changes[STORAGE_KEY].newValue;
if (newValue === "voice" || newValue === "text") {
setInputModeState(newValue);
}
}
};
chrome.storage.onChanged.addListener(listener);
return () => chrome.storage.onChanged.removeListener(listener);
}, []);
const setInputMode = useCallback((mode: InputMode) => {
setInputModeState(mode);
chrome.storage.local.set({ [STORAGE_KEY]: mode }).catch(() => {});
}, []);
return (
<InputModeContext.Provider value={{ inputMode, setInputMode }}>
{children}
</InputModeContext.Provider>
);
}
export function useInputMode() {
return useContext(InputModeContext);
}

View File

@@ -0,0 +1,257 @@
import { describe, expect, it } from "vitest";
import { fromStorageFormat, toStorageFormat } from "./message-adapter";
const TEST_IMAGE_DATA = "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQ==";
const TEST_SCREENSHOT_UID = "screenshot_1234567890_abcdefghi";
const PLACEHOLDER = "[Image data removed - see following user message]";
describe("message-adapter", () => {
describe("toStorageFormat screenshot stripping", () => {
it("should strip base64 imageData from screenshot tool results", () => {
const output = {
success: true,
imageData: TEST_IMAGE_DATA,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
tabId: 1,
url: "https://example.com",
title: "Example",
};
const messages = [
{
id: "msg-1",
role: "assistant" as const,
parts: [
{
type: "tool" as const,
toolCallId: "call-1",
toolName: "capture_screenshot",
input: { sendToLLM: true },
output,
state: "completed" as const,
screenshot: TEST_IMAGE_DATA,
screenshotUid: TEST_SCREENSHOT_UID,
},
],
timestamp: Date.now(),
},
];
const stored = toStorageFormat(messages as any);
expect(stored.length).toBe(1);
// Find the tool_result part
const toolResultPart = stored[0]!.parts.find(
(p: any) => p.type === "tool_result",
) as any;
expect(toolResultPart).toBeTruthy();
// Parse the content and verify imageData is stripped
const parsedContent = JSON.parse(toolResultPart.content);
expect(parsedContent.imageData).toBe(PLACEHOLDER);
expect(parsedContent.screenshotUid).toBe(TEST_SCREENSHOT_UID);
expect(parsedContent.success).toBe(true);
});
it("should not strip non-screenshot tool results", () => {
const output = {
tabs: [{ id: 1, title: "Tab" }],
imageData: TEST_IMAGE_DATA, // Even if it has imageData
};
const messages = [
{
id: "msg-1",
role: "assistant" as const,
parts: [
{
type: "tool" as const,
toolCallId: "call-1",
toolName: "get_tabs",
input: {},
output,
state: "completed" as const,
},
],
timestamp: Date.now(),
},
];
const stored = toStorageFormat(messages as any);
const toolResultPart = stored[0]!.parts.find(
(p: any) => p.type === "tool_result",
) as any;
const parsedContent = JSON.parse(toolResultPart.content);
expect(parsedContent.imageData).toBe(TEST_IMAGE_DATA);
});
});
describe("fromStorageFormat screenshotUid restoration", () => {
it("should restore screenshotUid from stored tool result", () => {
const storedOutput = {
success: true,
imageData: PLACEHOLDER,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
tabId: 1,
};
const storedMessages = [
{
id: "msg-1",
role: "assistant" as const,
parts: [
{
type: "tool_use" as const,
id: "call-1",
name: "capture_screenshot",
input: { sendToLLM: true },
},
{
type: "tool_result" as const,
tool_use_id: "call-1",
content: JSON.stringify(storedOutput),
is_error: false,
},
],
timestamp: Date.now(),
},
];
const restored = fromStorageFormat(storedMessages as any);
expect(restored.length).toBe(1);
// Find the tool part (merged from tool_use + tool_result)
const toolPart = restored[0]!.parts.find(
(p: any) => p.type === "tool",
) as any;
expect(toolPart).toBeTruthy();
expect(toolPart.screenshotUid).toBe(TEST_SCREENSHOT_UID);
// imageData is the placeholder, not a real data URL, so screenshot should NOT be set
expect(toolPart.screenshot).toBeUndefined();
expect(toolPart.state).toBe("completed");
});
it("should restore both screenshotUid and screenshot when real imageData is present", () => {
const storedOutput = {
success: true,
imageData: TEST_IMAGE_DATA,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
tabId: 1,
};
const storedMessages = [
{
id: "msg-1",
role: "assistant" as const,
parts: [
{
type: "tool_use" as const,
id: "call-1",
name: "capture_screenshot",
input: { sendToLLM: true },
},
{
type: "tool_result" as const,
tool_use_id: "call-1",
content: JSON.stringify(storedOutput),
is_error: false,
},
],
timestamp: Date.now(),
},
];
const restored = fromStorageFormat(storedMessages as any);
const toolPart = restored[0]!.parts.find(
(p: any) => p.type === "tool",
) as any;
expect(toolPart.screenshotUid).toBe(TEST_SCREENSHOT_UID);
expect(toolPart.screenshot).toBe(TEST_IMAGE_DATA);
});
});
describe("round-trip: toStorageFormat -> fromStorageFormat", () => {
it("should preserve screenshotUid through round-trip", () => {
const original = [
{
id: "msg-1",
role: "assistant" as const,
parts: [
{
type: "tool" as const,
toolCallId: "call-1",
toolName: "capture_screenshot",
input: { sendToLLM: true },
output: {
success: true,
imageData: TEST_IMAGE_DATA,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
tabId: 1,
},
state: "completed" as const,
screenshot: TEST_IMAGE_DATA,
screenshotUid: TEST_SCREENSHOT_UID,
},
],
timestamp: Date.now(),
},
];
// Store -> Restore
const stored = toStorageFormat(original as any);
const restored = fromStorageFormat(stored);
const toolPart = restored[0]!.parts.find(
(p: any) => p.type === "tool",
) as any;
// screenshotUid should survive the round-trip
expect(toolPart.screenshotUid).toBe(TEST_SCREENSHOT_UID);
// imageData was stripped during storage, so inline screenshot is gone
expect(toolPart.screenshot).toBeUndefined();
expect(toolPart.state).toBe("completed");
expect(toolPart.toolName).toBe("capture_screenshot");
});
it("should handle capture_tab_screenshot round-trip", () => {
const original = [
{
id: "msg-1",
role: "assistant" as const,
parts: [
{
type: "tool" as const,
toolCallId: "call-1",
toolName: "capture_tab_screenshot",
input: { tabId: 42, sendToLLM: true },
output: {
success: true,
imageData: TEST_IMAGE_DATA,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
tabId: 42,
},
state: "completed" as const,
screenshot: TEST_IMAGE_DATA,
screenshotUid: TEST_SCREENSHOT_UID,
},
],
timestamp: Date.now(),
},
];
const stored = toStorageFormat(original as any);
const restored = fromStorageFormat(stored);
const toolPart = restored[0]!.parts.find(
(p: any) => p.type === "tool",
) as any;
expect(toolPart.screenshotUid).toBe(TEST_SCREENSHOT_UID);
expect(toolPart.toolName).toBe("capture_tab_screenshot");
});
});
});

View File

@@ -6,6 +6,88 @@
import type { UIMessage as ReactUIMessage } from "@aipexstudio/aipex-react/types";
import type { UIMessage as RuntimeUIMessage } from "@aipexstudio/browser-runtime";
/** Tool names whose results may include screenshot image data */
const SCREENSHOT_TOOL_NAMES = new Set([
"capture_screenshot",
"capture_screenshot_with_highlight",
"capture_tab_screenshot",
]);
/** Placeholder that replaces base64 imageData in stored tool results */
const IMAGE_DATA_PLACEHOLDER =
"[Image data removed - see following user message]";
interface ScreenshotToolInfo {
/** The base64 data URL if present (may be null if already stripped) */
imageData: string | null;
/** The screenshot uid if present */
screenshotUid: string | null;
}
/**
* Navigate into the parsed tool result to find the "actual data" layer.
* Handles nesting: { data: { ... } }, { data: { data: { ... } } }, or flat.
*/
function getScreenshotActualData(
parsedOutput: unknown,
): Record<string, unknown> | null {
if (typeof parsedOutput !== "object" || parsedOutput === null) return null;
const obj = parsedOutput as Record<string, unknown>;
const middleLayer = obj.data as Record<string, unknown> | undefined;
return (middleLayer?.data as Record<string, unknown>) ?? middleLayer ?? obj;
}
/**
* Extract screenshot info (imageData + screenshotUid) from a parsed tool result.
*/
function extractScreenshotInfo(
toolName: string,
parsedOutput: unknown,
): ScreenshotToolInfo | null {
if (!SCREENSHOT_TOOL_NAMES.has(toolName)) return null;
const actual = getScreenshotActualData(parsedOutput);
if (!actual) return null;
const imageData =
typeof actual.imageData === "string" &&
actual.imageData.startsWith("data:image/")
? actual.imageData
: null;
const screenshotUid =
typeof actual.screenshotUid === "string" ? actual.screenshotUid : null;
if (!imageData && !screenshotUid) return null;
return { imageData, screenshotUid };
}
/**
* Strip base64 imageData from a screenshot tool result string, replacing it
* with a placeholder. Returns the stripped string (or the original if not applicable).
*/
function stripImageDataFromToolOutput(
toolName: string,
content: string,
): string {
if (!SCREENSHOT_TOOL_NAMES.has(toolName)) return content;
const parsed = safeJsonParse<Record<string, unknown>>(content);
if (!parsed) return content;
const actual = getScreenshotActualData(parsed);
if (!actual) return content;
if (
typeof actual.imageData !== "string" ||
!actual.imageData.startsWith("data:image/")
) {
return content;
}
// Replace imageData in the actual data layer
actual.imageData = IMAGE_DATA_PLACEHOLDER;
return JSON.stringify(parsed);
}
/**
* Convert aipex-react UIMessage to runtime UIMessage for storage
*/
@@ -15,47 +97,71 @@ export function toStorageFormat(
return messages.map((msg) => ({
id: msg.id,
role: msg.role === "tool" ? "assistant" : msg.role, // Map "tool" to "assistant"
parts: msg.parts.map((part) => {
switch (part.type) {
case "text":
return { type: "text", text: part.text };
case "file":
// Map file to image (store URL as imageData)
return {
type: "image",
imageData: part.url,
imageTitle: part.filename,
};
case "tool":
// Map tool to tool_use or tool_result based on state
if (part.output !== undefined) {
// Avoid double-stringifying if output is already a string
const content =
typeof part.output === "string"
? part.output
: JSON.stringify(part.output);
return {
type: "tool_result",
tool_use_id: part.toolCallId,
content,
is_error: part.state === "error",
};
}
return {
type: "tool_use",
id: part.toolCallId,
name: part.toolName,
input: part.input as Record<string, unknown>,
};
default:
// For context, source-url, reasoning - store as text
if ("text" in part) {
parts: msg.parts.flatMap(
(
part,
):
| RuntimeUIMessage["parts"][number]
| RuntimeUIMessage["parts"][number][] => {
switch (part.type) {
case "text":
return { type: "text", text: part.text };
}
// Fallback: store as text with type info
return { type: "text", text: `[${part.type}]` };
}
}),
case "file":
// Map file to image (store URL as imageData)
return {
type: "image",
imageData: part.url,
imageTitle: part.filename,
};
case "tool":
// Map tool to tool_use + tool_result pair (when completed)
// or just tool_use (when pending/executing).
// Emitting both ensures fromStorageFormat can correlate them
// to restore the proper toolName and input.
if (part.output !== undefined) {
// Avoid double-stringifying if output is already a string.
let content =
typeof part.output === "string"
? part.output
: JSON.stringify(part.output);
// Strip base64 imageData from screenshot tool results before
// persisting to keep stored conversations small and avoid
// storing large blobs. The screenshotUid is preserved in the
// output so images can be loaded from IndexedDB on restore.
content = stripImageDataFromToolOutput(part.toolName, content);
return [
{
type: "tool_use",
id: part.toolCallId,
name: part.toolName,
input: part.input as Record<string, unknown>,
},
{
type: "tool_result",
tool_use_id: part.toolCallId,
content,
is_error: part.state === "error",
},
];
}
return {
type: "tool_use",
id: part.toolCallId,
name: part.toolName,
input: part.input as Record<string, unknown>,
};
default:
// For context, source-url, reasoning - store as text
if ("text" in part) {
return { type: "text", text: part.text };
}
// Fallback: store as text with type info
return { type: "text", text: `[${part.type}]` };
}
},
),
timestamp: msg.timestamp,
})) as RuntimeUIMessage[];
}
@@ -210,7 +316,8 @@ export function fromStorageFormat(
};
}
// Normal successful completion
// Normal successful completion restore screenshot data
const screenshotInfo = extractScreenshotInfo(toolName, parsedOutput);
return {
type: "tool",
toolName,
@@ -218,6 +325,15 @@ export function fromStorageFormat(
input,
output: parsedOutput,
state: "completed" as const,
// Restore screenshotUid so UI can load from IndexedDB
...(screenshotInfo?.screenshotUid
? { screenshotUid: screenshotInfo.screenshotUid }
: {}),
// Restore inline screenshot only if actual base64 is present
// (not when it's been replaced with a placeholder)
...(screenshotInfo?.imageData
? { screenshot: screenshotInfo.imageData }
: {}),
};
}
default:

View File

@@ -5,7 +5,11 @@
* used by browser-ext UI components.
*/
import { SkillConflictError, skillManager } from "@aipexstudio/browser-runtime";
import {
SkillConflictError,
skillManager,
zenfs,
} from "@aipexstudio/browser-runtime";
import type {
SkillClient,
SkillDetail,
@@ -77,6 +81,34 @@ export class SkillClientAdapter implements SkillClient {
return null;
}
}
async getSkillContent(skillName: string): Promise<string> {
return await skillManager.getSkillContent(skillName);
}
async getSkillScript(skillName: string, scriptPath: string): Promise<string> {
return await skillManager.getSkillScript(skillName, scriptPath);
}
async getSkillReference(skillName: string, refPath: string): Promise<string> {
return await skillManager.getSkillReference(skillName, refPath);
}
async writeFile(filePath: string, content: string): Promise<void> {
// Validate path: must start with /skills/, no path traversal
if (!filePath.startsWith("/skills/")) {
throw new Error("File path must be under /skills/");
}
const decoded = decodeURIComponent(filePath);
if (decoded.includes("..")) {
throw new Error("Path traversal (..) is not allowed");
}
await zenfs.writeFile(filePath, content);
}
async refreshSkillMetadata(skillId: string): Promise<void> {
await skillManager.refreshSkillMetadata(skillId);
}
}
// Export singleton instance

View File

@@ -0,0 +1,45 @@
/**
* UpdateBannerWrapper
* Connects the platform-agnostic UpdateBanner component to Chrome extension
* version-checking services.
*/
import { UpdateBanner } from "@aipexstudio/aipex-react/components/chatbot";
import { useCallback } from "react";
import {
checkVersion,
dismissUpdate,
isUpdateDismissed,
openChangelog,
openUpdatePage,
requestUpdate,
} from "../services/version-checker";
export function UpdateBannerWrapper() {
const handleCheckVersion = useCallback(() => checkVersion(), []);
const handleIsUpdateDismissed = useCallback(
(version: string) => isUpdateDismissed(version),
[],
);
const handleDismissUpdate = useCallback(
(version: string) => dismissUpdate(version),
[],
);
const handleRequestUpdate = useCallback(() => requestUpdate(), []);
const handleOpenChangelog = useCallback(
(url: string) => openChangelog(url),
[],
);
const handleOpenUpdatePage = useCallback(() => openUpdatePage(), []);
return (
<UpdateBanner
onCheckVersion={handleCheckVersion}
onIsUpdateDismissed={handleIsUpdateDismissed}
onDismissUpdate={handleDismissUpdate}
onRequestUpdate={handleRequestUpdate}
onOpenChangelog={handleOpenChangelog}
onOpenUpdatePage={handleOpenUpdatePage}
/>
);
}

View File

@@ -11,7 +11,7 @@ import type { Language } from "@aipexstudio/aipex-react/i18n/types";
import { ThemeProvider } from "@aipexstudio/aipex-react/theme/context";
import type { Theme } from "@aipexstudio/aipex-react/theme/types";
import { ChromeStorageAdapter } from "@aipexstudio/browser-runtime";
import React, { useState } from "react";
import React, { useCallback, useEffect, useRef, useState } from "react";
import ReactDOM from "react-dom/client";
import { AuthProvider } from "../../auth";
import { chromeStorageAdapter } from "../../hooks";
@@ -24,12 +24,100 @@ import {
useBrowserTools,
} from "../../lib/browser-agent-config";
import { BrowserChatHeader } from "../../lib/browser-chat-header";
import { BrowserChatInputArea } from "../../lib/browser-chat-input-area";
import { BrowserContextLoader } from "../../lib/browser-context-loader";
import { BrowserMessageList } from "../../lib/browser-message-list";
import { ChatImagesListener } from "../../lib/chat-images-listener";
import { InputModeProvider } from "../../lib/input-mode-context";
import { InterventionModeProvider } from "../../lib/intervention-mode-context";
import { InterventionUI } from "../../lib/intervention-ui";
import { UpdateBannerWrapper } from "../../lib/update-banner-wrapper";
const i18nStorageAdapter = new ChromeStorageAdapter<Language>();
const themeStorageAdapter = new ChromeStorageAdapter<Theme>();
/**
* Reads and consumes a pending prompt saved by the openWithPrompt external
* message handler in the background service worker. Prompts older than 5 s
* are treated as expired and silently discarded.
*/
function usePendingPrompt() {
const [pendingInput, setPendingInput] = useState<string | undefined>(
undefined,
);
useEffect(() => {
const check = async () => {
try {
const result = await chrome.storage.local.get([
"aipex-pending-prompt",
"aipex-pending-prompt-timestamp",
]);
const prompt = result["aipex-pending-prompt"];
const timestamp = result["aipex-pending-prompt-timestamp"];
if (prompt && typeof prompt === "string") {
const now = Date.now();
// Only use prompts that are less than 5 seconds old
if (typeof timestamp === "number" && now - timestamp < 5000) {
setPendingInput(prompt);
}
}
// Always clear storage regardless of expiry
if (prompt) {
chrome.storage.local.remove([
"aipex-pending-prompt",
"aipex-pending-prompt-timestamp",
]);
}
} catch {
// Silently ignore storage may not be available yet
}
};
check();
}, []);
return pendingInput;
}
/**
* Manages the "aipex-conversation-active" heartbeat in chrome.storage.local
* so content scripts can show the breathing border overlay while the AI is
* actively generating a response.
*/
function useConversationHeartbeat() {
const intervalRef = useRef<ReturnType<typeof setInterval> | null>(null);
const start = useCallback(() => {
// Avoid duplicate intervals
if (intervalRef.current) return;
const tick = () => {
chrome.storage.local
.set({ "aipex-conversation-active": Date.now() })
.catch(() => {});
};
tick(); // Immediate first tick
intervalRef.current = setInterval(tick, 2000);
}, []);
const stop = useCallback(() => {
if (intervalRef.current) {
clearInterval(intervalRef.current);
intervalRef.current = null;
}
chrome.storage.local.remove("aipex-conversation-active").catch(() => {});
}, []);
// Cleanup on unmount
useEffect(() => stop, [stop]);
return { start, stop };
}
function ChatApp() {
const { settings, isLoading } = useChatConfig({
storageAdapter: chromeStorageAdapter,
@@ -51,9 +139,54 @@ function ChatApp() {
...BROWSER_AGENT_CONFIG,
});
const pendingInput = usePendingPrompt();
const heartbeat = useConversationHeartbeat();
const handleStatusChange = useCallback(
(status: string) => {
if (status === "streaming" || status === "submitted") {
heartbeat.start();
} else {
heartbeat.stop();
}
},
[heartbeat],
);
const [interventionMode, setInterventionMode] =
useState<InterventionMode>("passive");
// Sidepanel lifecycle: port connection + cleanup on hide/close
useEffect(() => {
// Long-lived port so the background can detect sidepanel disconnect
const port = chrome.runtime.connect({ name: "sidepanel" });
const handleVisibilityChange = () => {
if (document.hidden) {
// Stop any active recording
chrome.runtime.sendMessage({ request: "stop-recording" }).catch(() => {
/* background may be busy */
});
// Stop element capture on the active tab
chrome.runtime
.sendMessage({
request: "relay-to-active-tab",
message: { request: "stop-capture" },
})
.catch(() => {
/* tab may be closed */
});
}
};
document.addEventListener("visibilitychange", handleVisibilityChange);
return () => {
document.removeEventListener("visibilitychange", handleVisibilityChange);
port.disconnect();
};
}, []);
if (isLoading) {
return (
<div className="flex h-full items-center justify-center">
@@ -63,29 +196,42 @@ function ChatApp() {
}
return (
<InterventionModeProvider
mode={interventionMode}
setMode={setInterventionMode}
>
<ChatBot
agent={agent}
configError={error}
initialSettings={settings}
storageAdapter={chromeStorageAdapter}
components={{
Header: BrowserChatHeader,
}}
slots={{
afterMessages: () => (
<InterventionUI
mode={interventionMode}
onModeChange={setInterventionMode}
/>
),
inputToolbar: (props) => <AutomationModeInputToolbar {...props} />,
}}
/>
</InterventionModeProvider>
<InputModeProvider>
<InterventionModeProvider
mode={interventionMode}
setMode={setInterventionMode}
>
<ChatBot
agent={agent}
configError={error}
initialSettings={settings}
storageAdapter={chromeStorageAdapter}
initialInput={pendingInput}
handlers={{
onStatusChange: handleStatusChange,
}}
components={{
Header: BrowserChatHeader,
MessageList: BrowserMessageList,
InputArea: BrowserChatInputArea,
}}
slots={{
beforeMessages: () => <UpdateBannerWrapper />,
afterMessages: () => (
<>
<InterventionUI
mode={interventionMode}
onModeChange={setInterventionMode}
/>
<ChatImagesListener />
</>
),
inputToolbar: (props) => <AutomationModeInputToolbar {...props} />,
promptExtras: () => <BrowserContextLoader />,
}}
/>
</InterventionModeProvider>
</InputModeProvider>
);
}

View File

@@ -339,6 +339,102 @@ const ContentApp = () => {
);
};
// ============================================================================
// Breathing Border Overlay — mounted OUTSIDE shadow DOM so z-index works
// against page elements. Driven by the "aipex-conversation-active" storage key
// which the sidepanel writes as a heartbeat.
// ============================================================================
const HEARTBEAT_KEY = "aipex-conversation-active";
const HEARTBEAT_TTL_MS = 6_000; // Hide overlay if heartbeat is stale (>6 s)
function BorderOverlayApp() {
const [visible, setVisible] = React.useState(false);
const handleConversationState = React.useCallback((timestamp: unknown) => {
if (
typeof timestamp === "number" &&
Date.now() - timestamp < HEARTBEAT_TTL_MS
) {
setVisible(true);
} else {
setVisible(false);
}
}, []);
React.useEffect(() => {
// Check on mount
chrome.storage.local.get(HEARTBEAT_KEY, (result) => {
handleConversationState(result[HEARTBEAT_KEY]);
});
// Listen for changes
const onChange = (
changes: Record<string, chrome.storage.StorageChange>,
area: string,
) => {
if (area === "local" && changes[HEARTBEAT_KEY]) {
handleConversationState(changes[HEARTBEAT_KEY].newValue);
}
};
chrome.storage.onChanged.addListener(onChange);
// Poll heartbeat staleness every 3 s
const interval = setInterval(() => {
chrome.storage.local.get(HEARTBEAT_KEY, (result) => {
handleConversationState(result[HEARTBEAT_KEY]);
});
}, 3000);
return () => {
chrome.storage.onChanged.removeListener(onChange);
clearInterval(interval);
};
}, [handleConversationState]);
if (!visible) return null;
return (
<>
<div
style={{
position: "fixed",
top: 0,
left: 0,
width: "100vw",
height: "100vh",
zIndex: 999998,
pointerEvents: "none",
animation: "aipexBreathe 2.5s ease-in-out infinite",
boxShadow: `
inset 0 0 15px 3px rgba(37, 99, 235, 0.5),
inset 0 0 25px 5px rgba(59, 130, 246, 0.4),
inset 0 0 35px 7px rgba(96, 165, 250, 0.3),
inset 0 0 45px 9px rgba(147, 197, 253, 0.2)
`,
}}
/>
<style>{`
@keyframes aipexBreathe {
0%, 100% {
box-shadow:
inset 0 0 12px 3px rgba(37,99,235,0.35),
inset 0 0 20px 5px rgba(59,130,246,0.28),
inset 0 0 28px 6px rgba(96,165,250,0.22),
inset 0 0 35px 8px rgba(147,197,253,0.15);
}
50% {
box-shadow:
inset 0 0 20px 5px rgba(37,99,235,0.7),
inset 0 0 30px 7px rgba(59,130,246,0.6),
inset 0 0 40px 9px rgba(96,165,250,0.5),
inset 0 0 50px 11px rgba(147,197,253,0.35);
}
}
`}</style>
</>
);
}
// Wait for DOM to be ready
if (document.readyState === "loading") {
document.addEventListener("DOMContentLoaded", initContentScript);
@@ -347,17 +443,15 @@ if (document.readyState === "loading") {
}
function initContentScript() {
// Mount the content script
// Mount the content script (shadow DOM for isolation)
const container = document.createElement("div");
container.id = "aipex-content-root";
document.body.appendChild(container);
// Create shadow DOM to isolate styles
const shadowRoot = container.attachShadow({ mode: "open" });
const shadowContainer = document.createElement("div");
shadowRoot.appendChild(shadowContainer);
// Inject Tailwind CSS into shadow DOM
const style = document.createElement("style");
style.textContent = `
:host {
@@ -367,11 +461,22 @@ function initContentScript() {
`;
shadowRoot.appendChild(style);
// Render the app
const root = ReactDOM.createRoot(shadowContainer);
root.render(
<React.StrictMode>
<ContentApp />
</React.StrictMode>,
);
// Mount breathing border overlay OUTSIDE shadow DOM so z-index works
const borderContainer = document.createElement("div");
borderContainer.id = "aipex-border-overlay";
document.body.appendChild(borderContainer);
const borderRoot = ReactDOM.createRoot(borderContainer);
borderRoot.render(
<React.StrictMode>
<BorderOverlayApp />
</React.StrictMode>,
);
}

View File

@@ -3,6 +3,7 @@ import {
AlertDescription,
} from "@aipexstudio/aipex-react/components/ui/alert";
import { Badge } from "@aipexstudio/aipex-react/components/ui/badge";
import { Button } from "@aipexstudio/aipex-react/components/ui/button";
import {
Dialog,
DialogContent,
@@ -10,10 +11,20 @@ import {
DialogHeader,
DialogTitle,
} from "@aipexstudio/aipex-react/components/ui/dialog";
import { Textarea } from "@aipexstudio/aipex-react/components/ui/textarea";
import { useTheme } from "@aipexstudio/aipex-react/theme/context";
import type { FileInfo } from "@aipexstudio/browser-runtime";
import { zenfs } from "@aipexstudio/browser-runtime";
import { AlertCircle, Code, File as FileIcon, FileText } from "lucide-react";
import { skillManager, zenfs } from "@aipexstudio/browser-runtime";
import {
AlertCircle,
Code,
Edit,
File as FileIcon,
FileText,
Loader2,
Save,
X,
} from "lucide-react";
import type React from "react";
import { useCallback, useEffect, useState } from "react";
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
@@ -24,20 +35,60 @@ import {
import { formatBytes, formatDate, getFileExtension } from "./utils";
// Max file size for editing (1MB)
const MAX_EDIT_SIZE = 1024 * 1024;
/**
* Validate that a file path is safe to write to.
* - Must start with /skills/
* - Must not contain path traversal segments (..)
*/
function validateFilePath(path: string): { valid: boolean; error?: string } {
if (!path.startsWith("/skills/")) {
return { valid: false, error: "File path must be under /skills/" };
}
const decodedPath = decodeURIComponent(path);
if (decodedPath.includes("..")) {
return { valid: false, error: "Path traversal (..) is not allowed" };
}
return { valid: true };
}
/**
* Extract skill ID from a file path like /skills/<skillId>/...
*/
function extractSkillIdFromPath(path: string): string | null {
const match = path.match(/^\/skills\/([^/]+)/);
return match ? (match[1] ?? null) : null;
}
/**
* Check if path is a SKILL.md file
*/
function isSkillMdPath(path: string): boolean {
return /^\/skills\/[^/]+\/SKILL\.md$/.test(path);
}
interface FilePreviewProps {
filePath: string | null;
open: boolean;
onOpenChange: (open: boolean) => void;
onFileSaved?: () => void;
}
export const FilePreview: React.FC<FilePreviewProps> = ({
filePath,
open,
onOpenChange,
onFileSaved,
}) => {
const [fileInfo, setFileInfo] = useState<FileInfo | null>(null);
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [isEditing, setIsEditing] = useState(false);
const [editedContent, setEditedContent] = useState("");
const [saving, setSaving] = useState(false);
const [saveError, setSaveError] = useState<string | null>(null);
const { effectiveTheme } = useTheme();
const loadFile = useCallback(async () => {
@@ -65,6 +116,111 @@ export const FilePreview: React.FC<FilePreviewProps> = ({
}
}, [filePath, open, loadFile]);
// Reset edit state when dialog closes or file changes
useEffect(() => {
if (!open) {
setIsEditing(false);
setEditedContent("");
setSaveError(null);
}
}, [open]);
const handleStartEdit = () => {
if (fileInfo?.content) {
setEditedContent(fileInfo.content);
setIsEditing(true);
setSaveError(null);
}
};
const handleCancelEdit = () => {
setIsEditing(false);
setEditedContent("");
setSaveError(null);
};
const handleSave = async () => {
if (!filePath || !fileInfo) return;
// Validate path
const pathValidation = validateFilePath(filePath);
if (!pathValidation.valid) {
setSaveError(pathValidation.error || "Invalid file path");
return;
}
// Check size limit
const contentSize = new Blob([editedContent]).size;
if (contentSize > MAX_EDIT_SIZE) {
setSaveError(
`File content exceeds maximum size of ${formatBytes(MAX_EDIT_SIZE)}`,
);
return;
}
try {
setSaving(true);
setSaveError(null);
// If this is a SKILL.md file, validate the name field hasn't changed
if (isSkillMdPath(filePath)) {
const skillId = extractSkillIdFromPath(filePath);
if (skillId) {
const nameMatch = editedContent.match(
/^---\n[\s\S]*?name:\s*(.+?)[\s]*\n[\s\S]*?---/m,
);
const parsedName = nameMatch?.[1]?.trim();
if (parsedName && parsedName !== skillId) {
setSaveError(
`Cannot rename skill. The name "${parsedName}" in SKILL.md must match the skill ID "${skillId}". Skill renaming is not supported.`,
);
return;
}
}
}
// Write file to ZenFS
await zenfs.writeFile(filePath, editedContent);
// If this is a SKILL.md file, sync the metadata
if (isSkillMdPath(filePath)) {
const skillId = extractSkillIdFromPath(filePath);
if (skillId) {
try {
await skillManager.refreshSkillMetadata(skillId);
} catch (metadataErr) {
// Log but don't fail - file was saved successfully
console.error("Failed to sync skill metadata:", metadataErr);
}
}
}
// Reload file info to show updated content/mtime
await loadFile();
// Exit edit mode
setIsEditing(false);
setEditedContent("");
// Notify parent that file was saved
onFileSaved?.();
} catch (err) {
const errorMsg =
err instanceof Error ? err.message : "Failed to save file";
setSaveError(errorMsg);
} finally {
setSaving(false);
}
};
// Check if file can be edited
const canEdit =
fileInfo?.isText &&
fileInfo?.type === "file" &&
fileInfo?.content !== undefined &&
filePath?.startsWith("/skills/");
const renderContent = () => {
if (loading) {
return (
@@ -100,18 +256,77 @@ export const FilePreview: React.FC<FilePreviewProps> = ({
}
// Text file
if (fileInfo.isText && fileInfo.content) {
if (fileInfo.isText && fileInfo.content !== undefined) {
const ext = getFileExtension(fileInfo.name);
const language = getLanguageFromExtension(ext);
const isDark = effectiveTheme === "dark";
// Edit mode
if (isEditing) {
return (
<div className="space-y-3">
<div className="flex items-center justify-between">
<div className="flex items-center gap-2">
<Edit className="h-4 w-4 text-muted-foreground" />
<span className="text-sm text-muted-foreground">
Editing {language ? `${language} file` : "text file"}
</span>
</div>
<div className="flex items-center gap-2">
<Button
variant="outline"
size="sm"
onClick={handleCancelEdit}
disabled={saving}
>
<X className="h-4 w-4 mr-1" />
Cancel
</Button>
<Button size="sm" onClick={handleSave} disabled={saving}>
{saving ? (
<Loader2 className="h-4 w-4 mr-1 animate-spin" />
) : (
<Save className="h-4 w-4 mr-1" />
)}
Save
</Button>
</div>
</div>
{saveError && (
<Alert variant="destructive">
<AlertCircle className="h-4 w-4" />
<AlertDescription>{saveError}</AlertDescription>
</Alert>
)}
<Textarea
value={editedContent}
onChange={(e) => setEditedContent(e.target.value)}
className="font-mono text-sm min-h-[400px] resize-y"
placeholder="File content..."
disabled={saving}
/>
</div>
);
}
// View mode
return (
<div className="space-y-2">
<div className="flex items-center gap-2">
<Code className="h-4 w-4 text-muted-foreground" />
<span className="text-sm text-muted-foreground">
{language ? `${language} code` : "Text file"}
</span>
<div className="flex items-center justify-between">
<div className="flex items-center gap-2">
<Code className="h-4 w-4 text-muted-foreground" />
<span className="text-sm text-muted-foreground">
{language ? `${language} code` : "Text file"}
</span>
</div>
{canEdit && (
<Button variant="outline" size="sm" onClick={handleStartEdit}>
<Edit className="h-4 w-4 mr-1" />
Edit
</Button>
)}
</div>
<div className="relative border rounded-md overflow-hidden">
{/* Language label */}

View File

@@ -38,10 +38,16 @@ import { formatBytes } from "./file-components/utils.js";
interface FileExplorerProps {
basePath?: string;
/** When set, auto-opens preview for this file path (deep-link from SkillDetails). */
initialFilePath?: string | null;
/** Called after initialFilePath has been opened, so parent can clear the pending path. */
onInitialFileOpened?: () => void;
}
export const FileExplorerWrapper: React.FC<FileExplorerProps> = ({
basePath = "/skills",
initialFilePath,
onInitialFileOpened,
}) => {
const [fileTree, setFileTree] = useState<FileTreeNode[]>([]);
const [diskUsage, setDiskUsage] = useState<DiskUsage | null>(null);
@@ -81,6 +87,15 @@ export const FileExplorerWrapper: React.FC<FileExplorerProps> = ({
void loadFileSystem();
}, [loadFileSystem]);
// Handle deep-link: when initialFilePath is set, open that file in the preview
useEffect(() => {
if (initialFilePath && !loading) {
setSelectedFile(initialFilePath);
setPreviewOpen(true);
onInitialFileOpened?.();
}
}, [initialFilePath, loading, onInitialFileOpened]);
const handleRefresh = () => {
void loadFileSystem();
};
@@ -282,6 +297,7 @@ export const FileExplorerWrapper: React.FC<FileExplorerProps> = ({
filePath={selectedFile}
open={previewOpen}
onOpenChange={setPreviewOpen}
onFileSaved={handleRefresh}
/>
</div>
);

View File

@@ -1,4 +1,5 @@
import type { AppSettings } from "@aipexstudio/aipex-core";
import type { STTConfigAdapter } from "@aipexstudio/aipex-react";
import { SettingsPage } from "@aipexstudio/aipex-react";
import { I18nProvider } from "@aipexstudio/aipex-react/i18n/context";
import type { Language } from "@aipexstudio/aipex-react/i18n/types";
@@ -7,18 +8,54 @@ import type { Theme } from "@aipexstudio/aipex-react/theme/types";
import { ChromeStorageAdapter } from "@aipexstudio/browser-runtime";
import type { LanguageModel } from "ai";
import { generateText } from "ai";
import React, { useCallback } from "react";
import React, { useCallback, useMemo } from "react";
import ReactDOM from "react-dom/client";
import { chromeStorageAdapter } from "../../hooks";
import { createAIProvider } from "../../lib/ai-provider";
import { SkillsOptionsTab } from "./skills-tab";
/** Parse and validate URL params for deep-linking. */
function parseUrlParams() {
const params = new URLSearchParams(window.location.search);
const tabAllowlist = new Set(["general", "ai", "skills"]);
const rawTab = params.get("tab");
const tab =
rawTab && tabAllowlist.has(rawTab)
? (rawTab as "general" | "ai" | "skills")
: undefined;
const rawSkill = params.get("skill");
// Bound skill name length to prevent abuse
const skill = rawSkill ? rawSkill.slice(0, 200) : undefined;
return { tab, skill };
}
import "../tailwind.css";
const i18nStorageAdapter = new ChromeStorageAdapter<Language>();
const themeStorageAdapter = new ChromeStorageAdapter<Theme>();
const chromeSttAdapter: STTConfigAdapter = {
load: async () => {
const result = await chrome.storage.local.get([
"elevenlabsApiKey",
"elevenlabsModelId",
]);
return {
apiKey: (result.elevenlabsApiKey as string) || "",
modelId: (result.elevenlabsModelId as string) || "",
};
},
save: async ({ apiKey, modelId }) => {
await chrome.storage.local.set({
elevenlabsApiKey: apiKey,
elevenlabsModelId: modelId,
});
},
};
function OptionsPageContent() {
const { tab: initialTab, skill: initialSkill } = useMemo(parseUrlParams, []);
const handleTestConnection = useCallback(async (settings: AppSettings) => {
try {
const provider = createAIProvider(settings);
@@ -44,7 +81,10 @@ function OptionsPageContent() {
<SettingsPage
storageAdapter={chromeStorageAdapter}
onTestConnection={handleTestConnection}
skillsContent={<SkillsOptionsTab />}
skillsContent={<SkillsOptionsTab initialSkill={initialSkill} />}
sttConfig={chromeSttAdapter}
initialTab={initialTab}
initialSkill={initialSkill}
/>
</div>
);

View File

@@ -21,11 +21,17 @@ import {
import { skillClientAdapter } from "../../lib/skill-client-adapter";
import { FileExplorerWrapper } from "./file-explorer-wrapper";
export function SkillsOptionsTab() {
interface SkillsOptionsTabProps {
/** Pre-open a specific skill's detail dialog by name. */
initialSkill?: string;
}
export function SkillsOptionsTab({ initialSkill }: SkillsOptionsTabProps) {
const [refreshKey, setRefreshKey] = useState(0);
const [skillsSubTab, setSkillsSubTab] = useState<"skills" | "files">(
"skills",
);
const [pendingFilePath, setPendingFilePath] = useState<string | null>(null);
const handleUploadSuccess = useCallback((skill: SkillMetadata) => {
console.log("Skill uploaded successfully:", skill.name);
@@ -42,6 +48,17 @@ export function SkillsOptionsTab() {
setRefreshKey((prev) => prev + 1);
}, []);
const handleNavigateToFile = useCallback((filePath: string) => {
// Switch to the files sub-tab and set the file to open
setPendingFilePath(filePath);
setSkillsSubTab("files");
}, []);
const handleInitialFileOpened = useCallback(() => {
// Clear the pending file path after it's been opened
setPendingFilePath(null);
}, []);
return (
<div className="space-y-6">
{/* Sub-tabs for Skills */}
@@ -76,12 +93,17 @@ export function SkillsOptionsTab() {
key={refreshKey}
skillClient={skillClientAdapter}
onSkillUpdate={handleSkillUpdate}
onNavigateToFile={handleNavigateToFile}
initialSkill={initialSkill}
/>
</TabsContent>
{/* File System Sub-tab */}
<TabsContent value="files">
<FileExplorerWrapper />
<FileExplorerWrapper
initialFilePath={pendingFilePath}
onInitialFileOpened={handleInitialFileOpened}
/>
</TabsContent>
</Tabs>
</div>

View File

@@ -1,3 +1,18 @@
import { zenfs } from "@aipexstudio/browser-runtime";
import { quickjs } from "@aipexstudio/browser-runtime/lib/vm/quickjs-manager";
import { renderChatApp } from "../common/app-root";
// Pre-initialize QuickJS and ZenFS on sidepanel startup so that
// the first skill execution doesn't incur a cold-start WASM load.
const initializeVM = async () => {
try {
await Promise.all([zenfs.initialize(), quickjs.initialize()]);
console.log("[Sidepanel] QuickJS and ZenFS initialized");
} catch (error) {
console.error("[Sidepanel] Failed to initialize VM:", error);
}
};
initializeVM();
renderChatApp();

View File

@@ -0,0 +1,59 @@
// Web authentication
// Website URL helpers
export {
buildWebsiteUrl,
isWebsiteDomain,
WEBSITE_HOST,
WEBSITE_ORIGIN,
} from "../config/website";
// Sound effects
export {
playSoundEffect,
type SoundEffectType,
soundEffects,
} from "./sound-effects";
// Tool management
export {
type AITool,
clearDynamicTools,
getAllTools,
getTool,
getToolCount,
getToolDescription,
getToolStats,
getToolsForOpenAI,
hasTool,
registerDynamicTool,
searchTools,
ToolCategory,
type ToolCategoryType,
type ToolEventType,
ToolManager,
type ToolMetadata,
toolManager,
unregisterDynamicTool,
} from "./tool-manager";
// Version checking
export {
checkVersion,
clearDismissedUpdate,
compareVersions,
dismissUpdate,
fetchLatestVersion,
getCurrentVersion,
getLastKnownVersion,
isUpdateDismissed,
openChangelog,
openUpdatePage,
requestUpdate,
saveCurrentVersionAsKnown,
type VersionCheckResult,
type VersionInfo,
} from "./version-checker";
export {
AUTH_COOKIE_NAMES,
getAuthCookieHeader,
hasAuthCookies,
WEBSITE_URL,
} from "./web-auth";

View File

@@ -0,0 +1,318 @@
/**
* Sound Effects System
*
* Provides audio feedback for mode transitions using Web Audio API.
* Generates synthetic sounds dynamically without requiring audio files.
*/
import { chromeStorageAdapter } from "@aipexstudio/browser-runtime";
/**
* Sound effect types
*/
export type SoundEffectType =
| "enter-immersive" // User enters focus mode (focus on Chrome + AIPex group)
| "enter-background" // User enters background mode (switches away)
| "conversation-start" // Conversation begins
| "conversation-end"; // Conversation ends
/**
* Sound effects configuration
*/
interface SoundConfig {
enabled: boolean;
volume: number; // 0.0 to 1.0
playInBackgroundMode: boolean; // Whether to play sounds when in background mode
}
/**
* Default sound configuration
*/
const DEFAULT_CONFIG: SoundConfig = {
enabled: true,
volume: 0.5,
playInBackgroundMode: false,
};
const SOUND_EFFECTS_ENABLED_KEY = "soundEffectsEnabled";
const SOUND_EFFECTS_VOLUME_KEY = "soundEffectsVolume";
const SOUND_EFFECTS_PLAY_IN_BACKGROUND_KEY = "soundEffectsPlayInBackground";
/**
* Sound Effects Manager
*/
class SoundEffectsManager {
private audioContext: AudioContext | null = null;
private config: SoundConfig = DEFAULT_CONFIG;
private currentMode: "immersive" | "background" = "background";
constructor() {
this.loadConfig();
}
/**
* Load configuration from storage
*/
private async loadConfig(): Promise<void> {
try {
const enabled = await chromeStorageAdapter.get(SOUND_EFFECTS_ENABLED_KEY);
const volume = await chromeStorageAdapter.get(SOUND_EFFECTS_VOLUME_KEY);
const playInBackground = await chromeStorageAdapter.get(
SOUND_EFFECTS_PLAY_IN_BACKGROUND_KEY,
);
this.config = {
enabled: enabled !== "false" && enabled !== false, // Default to true
volume: volume ? parseFloat(volume as string) : 0.5,
playInBackgroundMode:
playInBackground === "true" || playInBackground === true,
};
} catch (error) {
console.warn(
"⚠️ [SoundEffects] Failed to load config, using defaults:",
error,
);
}
}
/**
* Update configuration
*/
public async updateConfig(config: Partial<SoundConfig>): Promise<void> {
this.config = { ...this.config, ...config };
if (config.enabled !== undefined) {
await chromeStorageAdapter.set(SOUND_EFFECTS_ENABLED_KEY, config.enabled);
}
if (config.volume !== undefined) {
await chromeStorageAdapter.set(SOUND_EFFECTS_VOLUME_KEY, config.volume);
}
if (config.playInBackgroundMode !== undefined) {
await chromeStorageAdapter.set(
SOUND_EFFECTS_PLAY_IN_BACKGROUND_KEY,
config.playInBackgroundMode,
);
}
}
/**
* Get current configuration
*/
public getConfig(): SoundConfig {
return { ...this.config };
}
/**
* Set current mode
*/
public setMode(mode: "immersive" | "background"): void {
this.currentMode = mode;
}
/**
* Initialize audio context (must be called after user interaction)
*/
private getAudioContext(): AudioContext | null {
if (!this.audioContext) {
try {
this.audioContext = new AudioContext();
} catch (error) {
console.warn("⚠️ [SoundEffects] Failed to create AudioContext:", error);
return null;
}
}
// Resume context if suspended (browser autoplay policy)
if (this.audioContext.state === "suspended") {
this.audioContext.resume().catch((err) => {
console.warn("⚠️ [SoundEffects] Failed to resume AudioContext:", err);
});
}
return this.audioContext;
}
/**
* Check if sound should be played based on current mode and config
*/
private shouldPlaySound(): boolean {
if (!this.config.enabled) {
return false;
}
// If in background mode and not allowed to play in background, skip
if (
this.currentMode === "background" &&
!this.config.playInBackgroundMode
) {
return false;
}
return true;
}
/**
* Play a sound effect
*/
public play(type: SoundEffectType): void {
if (!this.shouldPlaySound()) {
return;
}
const audioContext = this.getAudioContext();
if (!audioContext) {
return;
}
switch (type) {
case "enter-immersive":
this.playEnterImmersive(audioContext);
break;
case "enter-background":
this.playEnterBackground(audioContext);
break;
case "conversation-start":
this.playConversationStart(audioContext);
break;
case "conversation-end":
this.playConversationEnd(audioContext);
break;
}
}
/**
* Enter Focus Mode Sound
* Bright, upward sweep: C5 (523Hz) → E5 (659Hz)
* Duration: 200ms
*/
private playEnterImmersive(audioContext: AudioContext): void {
const now = audioContext.currentTime;
const duration = 0.2;
// Oscillator for tone
const oscillator = audioContext.createOscillator();
oscillator.type = "sine";
oscillator.frequency.setValueAtTime(523, now); // C5
oscillator.frequency.exponentialRampToValueAtTime(659, now + duration); // E5
// Gain envelope for smooth fade out
const gainNode = audioContext.createGain();
gainNode.gain.setValueAtTime(this.config.volume * 0.3, now);
gainNode.gain.exponentialRampToValueAtTime(0.01, now + duration);
// Connect nodes
oscillator.connect(gainNode);
gainNode.connect(audioContext.destination);
// Play
oscillator.start(now);
oscillator.stop(now + duration);
}
/**
* Enter Background Mode Sound
* Soft, downward sweep: E5 (659Hz) → C4 (262Hz)
* Duration: 150ms
*/
private playEnterBackground(audioContext: AudioContext): void {
const now = audioContext.currentTime;
const duration = 0.15;
// Oscillator for tone
const oscillator = audioContext.createOscillator();
oscillator.type = "sine";
oscillator.frequency.setValueAtTime(659, now); // E5
oscillator.frequency.exponentialRampToValueAtTime(262, now + duration); // C4
// Gain envelope for smooth fade out
const gainNode = audioContext.createGain();
gainNode.gain.setValueAtTime(this.config.volume * 0.2, now);
gainNode.gain.exponentialRampToValueAtTime(0.01, now + duration);
// Connect nodes
oscillator.connect(gainNode);
gainNode.connect(audioContext.destination);
// Play
oscillator.start(now);
oscillator.stop(now + duration);
}
/**
* Conversation Start Sound
* Warm notification: single tone at A4 (440Hz)
* Duration: 100ms
*/
private playConversationStart(audioContext: AudioContext): void {
const now = audioContext.currentTime;
const duration = 0.1;
// Oscillator for tone
const oscillator = audioContext.createOscillator();
oscillator.type = "sine";
oscillator.frequency.setValueAtTime(440, now); // A4
// Gain envelope
const gainNode = audioContext.createGain();
gainNode.gain.setValueAtTime(this.config.volume * 0.25, now);
gainNode.gain.exponentialRampToValueAtTime(0.01, now + duration);
// Connect nodes
oscillator.connect(gainNode);
gainNode.connect(audioContext.destination);
// Play
oscillator.start(now);
oscillator.stop(now + duration);
}
/**
* Conversation End Sound
* Gentle completion: single tone at G4 (392Hz)
* Duration: 120ms
*/
private playConversationEnd(audioContext: AudioContext): void {
const now = audioContext.currentTime;
const duration = 0.12;
// Oscillator for tone
const oscillator = audioContext.createOscillator();
oscillator.type = "sine";
oscillator.frequency.setValueAtTime(392, now); // G4
// Gain envelope
const gainNode = audioContext.createGain();
gainNode.gain.setValueAtTime(this.config.volume * 0.2, now);
gainNode.gain.exponentialRampToValueAtTime(0.01, now + duration);
// Connect nodes
oscillator.connect(gainNode);
gainNode.connect(audioContext.destination);
// Play
oscillator.start(now);
oscillator.stop(now + duration);
}
/**
* Cleanup audio context
*/
public dispose(): void {
if (this.audioContext) {
this.audioContext.close();
this.audioContext = null;
}
}
}
/**
* Global sound effects manager instance
*/
export const soundEffects = new SoundEffectsManager();
/**
* Play a sound effect (convenience function)
*/
export function playSoundEffect(type: SoundEffectType): void {
soundEffects.play(type);
}

View File

@@ -0,0 +1,248 @@
/**
* Tool Manager Service
* Provides a unified interface for tool management in the browser extension
*
* This is a simplified adapter that wraps the browser-runtime tools
* for use in the browser extension context.
*/
import type { FunctionTool } from "@aipexstudio/aipex-core";
import { allBrowserTools } from "@aipexstudio/browser-runtime";
export interface ToolMetadata {
name: string;
description: string;
category: string;
inputSchema: unknown;
examples?: string[];
}
export interface AITool {
type: "function";
function: {
name: string;
description: string;
parameters: unknown;
};
}
export type ToolEventType = "tool_registered" | "tool_unregistered";
type ToolSubscriber = (data: unknown) => void;
/**
* Tool categories for organization
*/
export const ToolCategory = {
BROWSER: "browser",
UI: "ui",
PAGE: "page",
SCREENSHOT: "screenshot",
DOWNLOAD: "download",
INTERVENTION: "intervention",
SKILL: "skill",
} as const;
export type ToolCategoryType = (typeof ToolCategory)[keyof typeof ToolCategory];
/**
* Unified Tool Manager Service
* Provides simplified tool management interface for the browser extension
*/
export class ToolManager {
private static instance: ToolManager;
private dynamicTools: Map<string, FunctionTool> = new Map();
private subscribers: Map<ToolEventType, Set<ToolSubscriber>> = new Map();
private constructor() {}
public static getInstance(): ToolManager {
if (!ToolManager.instance) {
ToolManager.instance = new ToolManager();
}
return ToolManager.instance;
}
/**
* Get all available tools (including dynamic tools)
*/
public getAllTools(): FunctionTool[] {
const staticTools = allBrowserTools;
const dynamicToolsArray = Array.from(this.dynamicTools.values());
return [...staticTools, ...dynamicToolsArray];
}
/**
* Get tool by name
*/
public getTool(name: string): FunctionTool | undefined {
// Check static tools first
const staticTool = allBrowserTools.find((t) => t.name === name);
if (staticTool) {
return staticTool;
}
// Check dynamic tools
return this.dynamicTools.get(name);
}
/**
* Check if a tool exists
*/
public hasTool(name: string): boolean {
return this.getTool(name) !== undefined;
}
/**
* Get tool count
*/
public getToolCount(): number {
return allBrowserTools.length + this.dynamicTools.size;
}
/**
* Get tool description
*/
public getToolDescription(name: string): string | undefined {
const tool = this.getTool(name);
return tool?.description;
}
/**
* Search tools by query (name or description match)
*/
public searchTools(query: string): FunctionTool[] {
const lowerQuery = query.toLowerCase();
return this.getAllTools().filter(
(tool) =>
tool.name.toLowerCase().includes(lowerQuery) ||
tool.description.toLowerCase().includes(lowerQuery),
);
}
/**
* Get tools formatted for OpenAI-compatible APIs
*/
public getToolsForOpenAI(): AITool[] {
return this.getAllTools().map((tool) => ({
type: "function" as const,
function: {
name: tool.name,
description: tool.description,
parameters: tool.parameters,
},
}));
}
/**
* Get tool statistics
*/
public getToolStats(): {
total: number;
static: number;
dynamic: number;
} {
return {
total: this.getToolCount(),
static: allBrowserTools.length,
dynamic: this.dynamicTools.size,
};
}
/**
* Register a dynamic tool (e.g., from skills system)
*/
public registerDynamicTool(tool: FunctionTool): void {
this.dynamicTools.set(tool.name, tool);
console.log(`[ToolManager] Dynamic tool registered: ${tool.name}`);
this._emit("tool_registered", { name: tool.name });
}
/**
* Unregister a dynamic tool
*/
public unregisterDynamicTool(name: string): boolean {
const existed = this.dynamicTools.delete(name);
if (existed) {
console.log(`[ToolManager] Dynamic tool unregistered: ${name}`);
this._emit("tool_unregistered", { name });
}
return existed;
}
/**
* Clear all dynamic tools
*/
public clearDynamicTools(): void {
const names = Array.from(this.dynamicTools.keys());
this.dynamicTools.clear();
for (const name of names) {
this._emit("tool_unregistered", { name });
}
console.log(`[ToolManager] Cleared ${names.length} dynamic tools`);
}
// ===== Event Subscription Methods =====
/**
* Subscribe to tool events
*/
public subscribe(event: ToolEventType, callback: ToolSubscriber): () => void {
if (!this.subscribers.has(event)) {
this.subscribers.set(event, new Set());
}
const eventSubscribers = this.subscribers.get(event);
if (eventSubscribers) {
eventSubscribers.add(callback);
}
// Return an unsubscribe function
return () => this.unsubscribe(event, callback);
}
/**
* Unsubscribe from tool events
*/
public unsubscribe(event: ToolEventType, callback: ToolSubscriber): void {
const eventSubscribers = this.subscribers.get(event);
if (eventSubscribers) {
eventSubscribers.delete(callback);
}
}
/**
* Emit an event
*/
private _emit(event: ToolEventType, data: unknown): void {
const eventSubscribers = this.subscribers.get(event);
if (eventSubscribers) {
eventSubscribers.forEach((callback) => {
try {
callback(data);
} catch (e) {
console.error(
`[ToolManager] Error in subscriber for event "${event}":`,
e,
);
}
});
}
}
}
// Export singleton instance
export const toolManager = ToolManager.getInstance();
// Export convenience functions
export const getAllTools = () => toolManager.getAllTools();
export const getTool = (name: string) => toolManager.getTool(name);
export const hasTool = (name: string) => toolManager.hasTool(name);
export const getToolCount = () => toolManager.getToolCount();
export const getToolDescription = (name: string) =>
toolManager.getToolDescription(name);
export const searchTools = (query: string) => toolManager.searchTools(query);
export const getToolsForOpenAI = () => toolManager.getToolsForOpenAI();
export const getToolStats = () => toolManager.getToolStats();
export const registerDynamicTool = (tool: FunctionTool) =>
toolManager.registerDynamicTool(tool);
export const unregisterDynamicTool = (name: string) =>
toolManager.unregisterDynamicTool(name);
export const clearDynamicTools = () => toolManager.clearDynamicTools();

View File

@@ -0,0 +1,230 @@
/**
* Version Checker Service
* Checks the current extension version against the latest version from the server
*/
import { buildWebsiteUrl } from "../config/website";
export interface VersionInfo {
version: string;
notes: string | null;
releasedAt: string;
changelogUrl: string;
}
export interface VersionCheckResult {
hasUpdate: boolean;
currentVersion: string;
latestVersion: string | null;
changelogUrl: string;
isNewlyUpdated: boolean;
notes: string | null;
}
const VERSION_STORAGE_KEY = "aipex-last-known-version";
const UPDATE_DISMISSED_KEY = "aipex-update-dismissed-version";
/**
* Get the current extension version from manifest
*/
export function getCurrentVersion(): string {
return chrome.runtime.getManifest().version;
}
/**
* Fetch the latest version info from the server
*/
export async function fetchLatestVersion(): Promise<VersionInfo | null> {
try {
const response = await fetch(buildWebsiteUrl("/api/release/latest"), {
method: "GET",
headers: {
"Content-Type": "application/json",
},
});
if (!response.ok) {
console.warn(
"[VersionChecker] Failed to fetch latest version:",
response.status,
);
return null;
}
const data = await response.json();
return data as VersionInfo;
} catch (error) {
console.error("[VersionChecker] Error fetching latest version:", error);
return null;
}
}
/**
* Compare two version strings
* Returns: 1 if v1 > v2, -1 if v1 < v2, 0 if equal
*/
export function compareVersions(v1: string, v2: string): number {
const parts1 = v1.split(".").map(Number);
const parts2 = v2.split(".").map(Number);
const maxLength = Math.max(parts1.length, parts2.length);
for (let i = 0; i < maxLength; i++) {
const p1 = parts1[i] || 0;
const p2 = parts2[i] || 0;
if (p1 > p2) return 1;
if (p1 < p2) return -1;
}
return 0;
}
/**
* Get the last known version (the version user was previously on)
*/
export async function getLastKnownVersion(): Promise<string | null> {
try {
const result = await chrome.storage.local.get(VERSION_STORAGE_KEY);
const version = result[VERSION_STORAGE_KEY];
return typeof version === "string" ? version : null;
} catch {
return null;
}
}
/**
* Save the current version as the last known version
*/
export async function saveCurrentVersionAsKnown(): Promise<void> {
try {
const currentVersion = getCurrentVersion();
await chrome.storage.local.set({ [VERSION_STORAGE_KEY]: currentVersion });
} catch (error) {
console.error("[VersionChecker] Failed to save version:", error);
}
}
/**
* Check if user has dismissed the update notification for a specific version
*/
export async function isUpdateDismissed(version: string): Promise<boolean> {
try {
const result = await chrome.storage.local.get(UPDATE_DISMISSED_KEY);
return result[UPDATE_DISMISSED_KEY] === version;
} catch {
return false;
}
}
/**
* Dismiss the update notification for a specific version
*/
export async function dismissUpdate(version: string): Promise<void> {
try {
await chrome.storage.local.set({ [UPDATE_DISMISSED_KEY]: version });
} catch (error) {
console.error("[VersionChecker] Failed to dismiss update:", error);
}
}
/**
* Clear the dismissed update (used when a new version is available)
*/
export async function clearDismissedUpdate(): Promise<void> {
try {
await chrome.storage.local.remove(UPDATE_DISMISSED_KEY);
} catch (error) {
console.error("[VersionChecker] Failed to clear dismissed update:", error);
}
}
/**
* Check for version updates and determine if user just updated
*/
export async function checkVersion(): Promise<VersionCheckResult> {
const currentVersion = getCurrentVersion();
const lastKnownVersion = await getLastKnownVersion();
const latestVersionInfo = await fetchLatestVersion();
const result: VersionCheckResult = {
hasUpdate: false,
currentVersion,
latestVersion: latestVersionInfo?.version || null,
changelogUrl:
latestVersionInfo?.changelogUrl ||
buildWebsiteUrl(`/release/${currentVersion}`),
isNewlyUpdated: false,
notes: latestVersionInfo?.notes || null,
};
// Check if user just updated (current version is newer than last known)
if (
lastKnownVersion &&
compareVersions(currentVersion, lastKnownVersion) > 0
) {
result.isNewlyUpdated = true;
// Clear any dismissed update since we're on a new version
await clearDismissedUpdate();
}
// Check if there's a newer version available
if (latestVersionInfo?.version) {
const comparison = compareVersions(
latestVersionInfo.version,
currentVersion,
);
if (comparison > 0) {
result.hasUpdate = true;
}
}
// Save current version as known for future comparisons
await saveCurrentVersionAsKnown();
console.log("[VersionChecker] Version check result:", result);
return result;
}
/**
* Request update check from Chrome
* Returns the status of the update check
*/
export async function requestUpdate(): Promise<{
status: "update_available" | "no_update" | "throttled" | "error";
version?: string;
}> {
try {
const result = await chrome.runtime.requestUpdateCheck();
if (result.status === "update_available") {
return { status: "update_available", version: result.version };
} else if (result.status === "no_update") {
return { status: "no_update" };
} else if (result.status === "throttled") {
return { status: "throttled" };
}
return { status: "error" };
} catch (error) {
console.error("[VersionChecker] Update check failed:", error);
return { status: "error" };
}
}
/**
* Open the changelog page
*/
export function openChangelog(
url: string = buildWebsiteUrl("/changelog"),
): void {
chrome.tabs.create({ url });
}
/**
* Open the extension update page (Chrome Web Store)
*/
export function openUpdatePage(): void {
const extensionId = chrome.runtime.id;
const updateUrl = `https://chrome.google.com/webstore/detail/${extensionId}`;
chrome.tabs.create({ url: updateUrl });
}

View File

@@ -2,7 +2,10 @@
* Public website configuration and authentication cookie utilities
*/
export const WEBSITE_URL = "https://www.claudechrome.com";
// Re-export WEBSITE_URL from centralized config for backward compatibility
export { WEBSITE_URL } from "../config/website";
import { WEBSITE_URL as _WEBSITE_URL } from "../config/website";
/**
* Aggregate claudechrome website authentication cookies and generate Cookie header content.
@@ -10,7 +13,7 @@ export const WEBSITE_URL = "https://www.claudechrome.com";
*/
export async function getAuthCookieHeader(): Promise<string | undefined> {
try {
const cookies = await chrome.cookies.getAll({ url: WEBSITE_URL });
const cookies = await chrome.cookies.getAll({ url: _WEBSITE_URL });
const relevantCookies = cookies.filter(
(cookie) =>
@@ -39,7 +42,7 @@ export async function getAuthCookieHeader(): Promise<string | undefined> {
*/
export async function hasAuthCookies(): Promise<boolean> {
try {
const cookies = await chrome.cookies.getAll({ url: WEBSITE_URL });
const cookies = await chrome.cookies.getAll({ url: _WEBSITE_URL });
return cookies.some(
(cookie) =>
cookie.name.includes("better-auth") || cookie.name.includes("session"),

View File

@@ -5,6 +5,8 @@ interface ImportMetaEnv {
readonly VITE_AI_TOKEN?: string;
readonly VITE_AI_MODEL?: string;
readonly VITE_DEV_MODE?: string;
/** Base URL for the website (e.g., "https://www.claudechrome.com") */
readonly VITE_WEBSITE_URL?: string;
}
interface ImportMeta {

View File

@@ -94,7 +94,8 @@ export default defineConfig({
build: {
rollupOptions: {
input: {
sidepanel: path.resolve(__dirname, "src/pages/sidepanel/index.html"),
// Note: sidepanel entry is handled by @crxjs/vite-plugin via manifest.json
// side_panel.default_path -> src/sidepanel.html -> pages/sidepanel/index.tsx
options: path.resolve(__dirname, "src/pages/options/index.html"),
},
},

View File

@@ -11,6 +11,8 @@ export * from "./conversation/index.js";
// export * from "./hooks/index.js";
// Intervention
export * from "./intervention/index.js";
// Screenshot Storage (IndexedDB)
export { RuntimeScreenshotStorage } from "./lib/screenshot-storage.js";
export type {
DiskUsage,
FileInfo,

View File

@@ -9,6 +9,7 @@
* - Screenshot functionality
*/
import { captureVisibleTabWithElementCrop } from "../tools/screenshot-helpers.js";
import type { ElementCaptureEvent, ElementCaptureOptions } from "./types.js";
type CaptureCallback = (event: ElementCaptureEvent) => void;
@@ -232,34 +233,43 @@ export class ElementCaptureService {
}
/**
* Capture screenshot functionality (with highlight)
* Capture screenshot functionality (with highlight / element crop).
*
* Delegates to the shared `captureVisibleTabWithElementCrop` helper so that
* the element-rect resolution, DPR scaling, crop, and restricted-page
* checks are consistent with `captureScreenshotWithHighlightTool`.
*
* Falls back to a full-page screenshot if the selector cannot be resolved.
*/
async captureScreenshot(
_selector: string,
_options?: {
selector: string,
options?: {
cropToElement?: boolean;
padding?: number;
},
): Promise<string | null> {
try {
// Use Chrome's captureVisibleTab API directly
if (!this.currentTabId) {
console.warn("⚠️ [ElementCaptureService] No current tab for screenshot");
return null;
}
// Get the tab to find its window ID
const tab = await chrome.tabs.get(this.currentTabId);
if (!tab.windowId) {
console.warn("⚠️ [ElementCaptureService] No window ID for tab");
return null;
}
const screenshot = await chrome.tabs.captureVisibleTab(tab.windowId, {
format: "png",
const result = await captureVisibleTabWithElementCrop({
tabId: this.currentTabId,
windowId: tab.windowId,
tabUrl: tab.url,
selector,
cropToElement: options?.cropToElement ?? true,
padding: options?.padding ?? 50,
});
return screenshot;
return result.dataUrl;
} catch (error) {
console.error("❌ [ElementCaptureService] Screenshot error:", error);
return null;

View File

@@ -0,0 +1,186 @@
/**
* Screenshot storage using IndexedDB.
* Stores screenshots with a uid for efficient reference and retrieval.
* Applies an LRU eviction policy (max 50 screenshots).
*
* Uses the same DB/store as the aipex ScreenshotStorage so both
* can share screenshots during the migration period.
*/
export interface ScreenshotData {
uid: string;
/** Complete data URL: data:image/png;base64,... */
base64Data: string;
timestamp: number;
tabId?: number;
metadata?: {
width: number;
height: number;
viewportWidth: number;
viewportHeight: number;
};
}
const DB_NAME = "aipex-screenshots-db";
const DB_VERSION = 1;
const STORE_NAME = "screenshots";
const MAX_SCREENSHOTS = 50;
let db: IDBDatabase | null = null;
let initPromise: Promise<void> | null = null;
function initialize(): Promise<void> {
if (initPromise) return initPromise;
if (db) return Promise.resolve();
initPromise = new Promise<void>((resolve, reject) => {
const request = indexedDB.open(DB_NAME, DB_VERSION);
request.onerror = () => {
initPromise = null;
reject(request.error);
};
request.onsuccess = () => {
db = request.result;
initPromise = null;
resolve();
};
request.onupgradeneeded = (event) => {
const database = (event.target as IDBOpenDBRequest).result;
if (!database.objectStoreNames.contains(STORE_NAME)) {
const store = database.createObjectStore(STORE_NAME, {
keyPath: "uid",
});
store.createIndex("timestamp", "timestamp", { unique: false });
store.createIndex("tabId", "tabId", { unique: false });
}
};
});
return initPromise;
}
function generateUid(): string {
return `screenshot_${Date.now()}_${Math.random().toString(36).slice(2, 11)}`;
}
async function applyLRU(): Promise<void> {
if (!db) return;
const tx = db.transaction([STORE_NAME], "readonly");
const store = tx.objectStore(STORE_NAME);
const all: ScreenshotData[] = await new Promise((res, rej) => {
const req = store.getAll();
req.onsuccess = () => res(req.result as ScreenshotData[]);
req.onerror = () => rej(req.error);
});
if (all.length <= MAX_SCREENSHOTS) return;
all.sort((a, b) => b.timestamp - a.timestamp);
const toDelete = all.slice(MAX_SCREENSHOTS);
const delTx = db.transaction([STORE_NAME], "readwrite");
const delStore = delTx.objectStore(STORE_NAME);
for (const item of toDelete) {
delStore.delete(item.uid);
}
}
/**
* Runtime-level screenshot storage (for use inside browser-runtime tools).
* Shares the same IndexedDB database as the UI-level ScreenshotStorage
* in aipex-react so screenshots are accessible across packages.
*/
export const RuntimeScreenshotStorage = {
/**
* Save a screenshot and return its uid.
* The base64Data must be a valid data URL (validated before storing).
*/
async saveScreenshot(
base64Data: string,
metadata?: {
tabId?: number;
width?: number;
height?: number;
viewportWidth?: number;
viewportHeight?: number;
},
): Promise<string> {
// Validate that it's a data URL (not arbitrary content)
if (
typeof base64Data !== "string" ||
!base64Data.startsWith("data:image/")
) {
throw new Error("Invalid screenshot data: expected data:image/ URL");
}
await initialize();
if (!db) throw new Error("Database not initialized");
const uid = generateUid();
const entry: ScreenshotData = {
uid,
base64Data,
timestamp: Date.now(),
tabId: metadata?.tabId,
metadata: metadata
? {
width: metadata.width ?? 0,
height: metadata.height ?? 0,
viewportWidth: metadata.viewportWidth ?? 0,
viewportHeight: metadata.viewportHeight ?? 0,
}
: undefined,
};
await new Promise<void>((resolve, reject) => {
const tx = db!.transaction([STORE_NAME], "readwrite");
const store = tx.objectStore(STORE_NAME);
const req = store.put(entry);
req.onsuccess = () => resolve();
req.onerror = () => reject(req.error);
});
// Async LRU eviction — fire-and-forget
applyLRU().catch(() => {});
return uid;
},
/**
* Get screenshot base64 data by uid.
*/
async getScreenshot(uid: string): Promise<string | null> {
await initialize();
if (!db) throw new Error("Database not initialized");
return new Promise((resolve, reject) => {
const tx = db!.transaction([STORE_NAME], "readonly");
const store = tx.objectStore(STORE_NAME);
const req = store.get(uid);
req.onsuccess = () => {
const data = req.result as ScreenshotData | undefined;
resolve(data?.base64Data ?? null);
};
req.onerror = () => reject(req.error);
});
},
/**
* Clear all screenshots.
*/
async clearAll(): Promise<void> {
await initialize();
if (!db) throw new Error("Database not initialized");
await new Promise<void>((resolve, reject) => {
const tx = db!.transaction([STORE_NAME], "readwrite");
const store = tx.objectStore(STORE_NAME);
const req = store.clear();
req.onsuccess = () => resolve();
req.onerror = () => reject(req.error);
});
},
};

View File

@@ -15,7 +15,18 @@ import {
highlightTextInlineTool,
scrollToElementTool,
} from "./page";
import { captureScreenshotTool, captureTabScreenshotTool } from "./screenshot";
import {
captureScreenshotTool,
captureScreenshotWithHighlightTool,
captureTabScreenshotTool,
} from "./screenshot";
// Clipboard image tools available but not registered in the default bundle.
// Enable explicitly if the product decides to ship clipboard access.
// import {
// captureScreenshotToClipboardTool,
// readClipboardImageTool,
// getClipboardImageInfoTool,
// } from "./screenshot";
import { skillTools } from "./skill";
import { searchElementsTool } from "./snapshot";
import {
@@ -30,13 +41,15 @@ import { downloadChatImagesTool, downloadImageTool } from "./tools/downloads";
/**
* All browser tools registered for AI use
* Total: 31 tools (27 core + 4 intervention tools)
* Total: 32 tools (28 core + 4 intervention tools)
*
* Disabled tools (per aipex):
* - switch_to_tab (causes context switching issues)
* - duplicate_tab (not in aipex)
* - wait (replaced by computer tool's wait action)
* - capture_screenshot_to_clipboard (not enabled in aipex)
* - capture_screenshot_to_clipboard (not enabled in aipex default bundle)
* - read_clipboard_image (P1 clipboard tool not enabled by default; requires security review)
* - get_clipboard_image_info (P1 clipboard tool not enabled by default; requires security review)
* - download_text_as_markdown (not enabled in aipex)
* - download_current_chat_images (architecture issue, not enabled in aipex)
* - organize_tabs (stub implementation, temporarily disabled until AI grouping is complete)
@@ -72,8 +85,9 @@ const browserFunctionTools: BrowserFunctionTool[] = [
highlightElementTool,
highlightTextInlineTool,
// Screenshot (2 tools)
// Screenshot (3 tools)
captureScreenshotTool,
captureScreenshotWithHighlightTool,
captureTabScreenshotTool,
// Download (2 tools)

View File

@@ -0,0 +1,210 @@
/**
* Shared screenshot helpers.
*
* This module is intentionally kept free of imports from `./index` or any
* module that participates in the tools ↔ screenshot circular-import chain.
* Both `captureScreenshotWithHighlightTool` (in screenshot.ts) and
* `ElementCaptureService` (in intervention/element-capture.ts) import from
* here without triggering a cycle.
*/
/** Maximum padding in pixels */
export const MAX_PADDING = 200;
// ===================== Image utilities =====================
/**
* Crop image to a specific region using canvas.
*/
export async function cropImage(
dataUrl: string,
region: { x: number; y: number; width: number; height: number },
): Promise<string> {
return new Promise((resolve, reject) => {
const img = new Image();
img.onload = () => {
const canvas = document.createElement("canvas");
const ctx = canvas.getContext("2d");
if (!ctx) {
reject(new Error("Failed to get canvas context"));
return;
}
canvas.width = region.width;
canvas.height = region.height;
ctx.drawImage(
img,
region.x,
region.y,
region.width,
region.height,
0,
0,
region.width,
region.height,
);
resolve(canvas.toDataURL("image/png", 0.9));
};
img.onerror = () => reject(new Error("Failed to load image"));
img.src = dataUrl;
});
}
// ===================== Shared capture helper =====================
/**
* Options for the shared capture + element-crop helper.
*/
export interface CaptureWithElementCropOptions {
tabId: number;
windowId: number;
tabUrl?: string;
/** CSS selector of the element to focus on. Max length enforced by callers. */
selector?: string;
/** Whether to crop the screenshot to the element bounding box (plus padding). */
cropToElement?: boolean;
/** Padding around the element in CSS pixels when cropping (default 50, max 200). */
padding?: number;
}
/**
* Result returned by the shared capture helper.
*/
export interface CaptureWithElementCropResult {
/** The captured (and optionally cropped) image as a data URL. */
dataUrl: string;
/** True if the image was actually cropped to the element. */
cropped: boolean;
/** True if the selector matched an element on the page. */
elementFound: boolean;
}
/**
* Core logic for capturing the visible tab and optionally cropping to an
* element identified by CSS selector.
*
* This is shared by `captureScreenshotWithHighlightTool` (the agent-facing
* tool) and `ElementCaptureService.captureScreenshot` so that both use the
* same element-rect resolution, DPR scaling, and crop logic.
*
* Security notes:
* - Rejects browser-internal pages (chrome://, edge://, about:, extension://).
* - Selector length must be bounded by the caller (tool uses zod `.max()`).
* - Padding is clamped to [0, MAX_PADDING].
*/
export async function captureVisibleTabWithElementCrop(
options: CaptureWithElementCropOptions,
): Promise<CaptureWithElementCropResult> {
const {
tabId,
windowId,
tabUrl,
selector,
cropToElement = false,
padding = 50,
} = options;
// Reject restricted pages
if (
tabUrl &&
(tabUrl.startsWith("chrome://") ||
tabUrl.startsWith("chrome-extension://") ||
tabUrl.startsWith("edge://") ||
tabUrl.startsWith("about:"))
) {
throw new Error("Cannot capture browser internal pages");
}
// Clamp padding to safe range
const safePadding = Math.max(0, Math.min(padding, MAX_PADDING));
// If a selector is provided, resolve the element rect via content script
let elementRect: {
x: number;
y: number;
width: number;
height: number;
devicePixelRatio: number;
} | null = null;
if (selector) {
try {
const result = await chrome.scripting.executeScript({
target: { tabId },
func: (sel: string) => {
const element = document.querySelector(sel);
if (!element) return null;
const rect = element.getBoundingClientRect();
const dpr = window.devicePixelRatio || 1;
return {
x: rect.x * dpr,
y: rect.y * dpr,
width: rect.width * dpr,
height: rect.height * dpr,
devicePixelRatio: dpr,
};
},
args: [selector],
});
if (result[0]?.result) {
elementRect = result[0].result;
}
} catch (err) {
console.warn("[Screenshot] Failed to get element rect:", err);
// Continue with full-page screenshot if selector fails
}
}
// Focus window and capture
await chrome.windows.update(windowId, { focused: true });
await new Promise((resolve) => setTimeout(resolve, 100));
let dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
format: "png",
quality: 90,
});
if (!dataUrl || !dataUrl.startsWith("data:image/")) {
throw new Error("Invalid image data captured");
}
const cropped = !!(cropToElement && elementRect);
// Crop to element if requested and the element was found
if (cropToElement && elementRect) {
const dpr = elementRect.devicePixelRatio || 1;
const scaledPadding = safePadding * dpr;
// Load image to get actual dimensions for bounds checking
const img = new Image();
await new Promise<void>((resolve, reject) => {
img.onload = () => resolve();
img.onerror = () => reject(new Error("Failed to load image for crop"));
img.src = dataUrl;
});
const x = Math.max(0, Math.round(elementRect.x - scaledPadding));
const y = Math.max(0, Math.round(elementRect.y - scaledPadding));
const maxWidth = img.width - x;
const maxHeight = img.height - y;
const width = Math.min(
Math.round(elementRect.width + scaledPadding * 2),
maxWidth,
);
const height = Math.min(
Math.round(elementRect.height + scaledPadding * 2),
maxHeight,
);
if (width > 0 && height > 0) {
dataUrl = await cropImage(dataUrl, { x, y, width, height });
}
}
return { dataUrl, cropped, elementFound: !!elementRect };
}

View File

@@ -1,8 +1,20 @@
import { tool } from "@aipexstudio/aipex-core";
import { z } from "zod";
import { cacheScreenshotMetadata } from "../automation/computer";
import { RuntimeScreenshotStorage } from "../lib/screenshot-storage";
import { getAutomationMode } from "../runtime/automation-mode";
import { getActiveTab } from "./index";
import {
captureVisibleTabWithElementCrop,
MAX_PADDING,
} from "./screenshot-helpers.js";
// Re-export the shared helper types/function so existing consumers aren't broken
export type {
CaptureWithElementCropOptions,
CaptureWithElementCropResult,
} from "./screenshot-helpers.js";
export { captureVisibleTabWithElementCrop } from "./screenshot-helpers.js";
async function compressImage(
dataUrl: string,
@@ -93,15 +105,25 @@ export const captureScreenshotTool = tool({
throw new Error("Invalid image data captured");
}
// Get viewport dimensions for metadata caching
const viewportDimensions = await chrome.scripting.executeScript({
target: { tabId: tab.id },
func: () => ({
width: window.innerWidth,
height: window.innerHeight,
}),
});
const viewport = viewportDimensions[0]?.result;
// Get viewport dimensions for metadata caching (graceful degradation)
let viewport: { width: number; height: number } | undefined;
try {
const viewportDimensions = await chrome.scripting.executeScript({
target: { tabId: tab.id },
func: () => ({
width: window.innerWidth,
height: window.innerHeight,
}),
});
viewport = viewportDimensions[0]?.result ?? undefined;
} catch (e) {
console.warn("[Screenshot] Failed to get viewport dimensions:", e);
// Continue without viewport metadata screenshot still works
}
// Get image dimensions for metadata
let imageWidth = 0;
let imageHeight = 0;
if (sendToLLM) {
// Compress for LLM
@@ -114,6 +136,8 @@ export const captureScreenshotTool = tool({
img.onerror = reject;
img.src = dataUrl;
});
imageWidth = img.width;
imageHeight = img.height;
// Cache screenshot metadata for computer tool
if (viewport) {
@@ -125,12 +149,50 @@ export const captureScreenshotTool = tool({
viewport.height,
);
}
} else {
// Get original image dimensions for non-LLM screenshots
const img = new Image();
await new Promise((resolve, reject) => {
img.onload = resolve;
img.onerror = reject;
img.src = dataUrl;
});
imageWidth = img.width;
imageHeight = img.height;
}
// Save screenshot to IndexedDB and get uid
let screenshotUid: string | undefined;
try {
screenshotUid = await RuntimeScreenshotStorage.saveScreenshot(dataUrl, {
tabId: tab.id,
width: imageWidth,
height: imageHeight,
viewportWidth: viewport?.width ?? 0,
viewportHeight: viewport?.height ?? 0,
});
} catch (err) {
console.error("[Screenshot] Failed to save to IndexedDB:", err);
// Continue even if storage fails
}
if (sendToLLM) {
return {
success: true,
imageData: dataUrl,
sendToLLM: true,
screenshotUid,
tabId: tab.id,
url: tab.url,
title: tab.title,
};
}
return {
success: true,
imageData: sendToLLM ? dataUrl : undefined,
captured: !sendToLLM,
captured: true,
sendToLLM: false,
screenshotUid,
tabId: tab.id,
url: tab.url,
title: tab.title,
@@ -177,15 +239,25 @@ export const captureTabScreenshotTool = tool({
quality: 90,
});
// Get viewport dimensions for metadata caching
const viewportDimensions = await chrome.scripting.executeScript({
target: { tabId },
func: () => ({
width: window.innerWidth,
height: window.innerHeight,
}),
});
const viewport = viewportDimensions[0]?.result;
// Get viewport dimensions for metadata caching (graceful degradation)
let viewport: { width: number; height: number } | undefined;
try {
const viewportDimensions = await chrome.scripting.executeScript({
target: { tabId },
func: () => ({
width: window.innerWidth,
height: window.innerHeight,
}),
});
viewport = viewportDimensions[0]?.result ?? undefined;
} catch (e) {
console.warn("[Screenshot] Failed to get viewport dimensions:", e);
// Continue without viewport metadata screenshot still works
}
// Get image dimensions for metadata
let imageWidth = 0;
let imageHeight = 0;
if (sendToLLM) {
// Compress for LLM
@@ -198,6 +270,8 @@ export const captureTabScreenshotTool = tool({
img.onerror = reject;
img.src = dataUrl;
});
imageWidth = img.width;
imageHeight = img.height;
// Cache screenshot metadata for computer tool
if (viewport) {
@@ -209,12 +283,50 @@ export const captureTabScreenshotTool = tool({
viewport.height,
);
}
} else {
// Get original image dimensions for non-LLM screenshots
const img = new Image();
await new Promise((resolve, reject) => {
img.onload = resolve;
img.onerror = reject;
img.src = dataUrl;
});
imageWidth = img.width;
imageHeight = img.height;
}
// Save screenshot to IndexedDB and get uid
let screenshotUid: string | undefined;
try {
screenshotUid = await RuntimeScreenshotStorage.saveScreenshot(dataUrl, {
tabId,
width: imageWidth,
height: imageHeight,
viewportWidth: viewport?.width ?? 0,
viewportHeight: viewport?.height ?? 0,
});
} catch (err) {
console.error("[Screenshot] Failed to save to IndexedDB:", err);
// Continue even if storage fails
}
if (sendToLLM) {
return {
success: true,
imageData: dataUrl,
sendToLLM: true,
screenshotUid,
tabId,
url: tab.url,
title: tab.title,
};
}
return {
success: true,
imageData: sendToLLM ? dataUrl : undefined,
captured: !sendToLLM,
captured: true,
sendToLLM: false,
screenshotUid,
tabId,
url: tab.url,
title: tab.title,
@@ -222,6 +334,163 @@ export const captureTabScreenshotTool = tool({
},
});
/** Maximum allowed CSS selector length to prevent injection of excessively long strings */
const MAX_SELECTOR_LENGTH = 500;
// ===================== Tool definition =====================
export const captureScreenshotWithHighlightTool = tool({
name: "capture_screenshot_with_highlight",
description:
"Capture screenshot of the current visible tab, optionally highlighting and cropping to a specific element identified by CSS selector. The screenshot is always sent to the LLM for visual analysis. NOTE: This tool requires focus mode.",
parameters: z.object({
selector: z
.string()
.max(MAX_SELECTOR_LENGTH)
.optional()
.describe("CSS selector of element to highlight/focus on"),
cropToElement: z
.boolean()
.optional()
.default(false)
.describe(
"Whether to crop the screenshot to the element region (plus padding)",
),
padding: z
.number()
.min(0)
.max(MAX_PADDING)
.optional()
.default(50)
.describe("Padding around element in pixels when cropping (default: 50)"),
sendToLLM: z
.boolean()
.nullable()
.optional()
.default(true)
.describe(
"Whether to send the screenshot to LLM for visual analysis. Defaults to true.",
),
}),
execute: async ({
selector,
cropToElement = false,
padding = 50,
sendToLLM = true,
}) => {
const mode = await getAutomationMode();
console.log("🔧 [captureScreenshotWithHighlight] Automation mode:", mode);
if (mode === "background") {
throw new Error(
"Screenshot capture is disabled in background mode. Please switch to focus mode to use visual tools.",
);
}
const tab = await getActiveTab();
if (!tab.id || !tab.windowId) {
throw new Error("No active tab found");
}
// Delegate to shared helper for capture + element crop
const capture = await captureVisibleTabWithElementCrop({
tabId: tab.id,
windowId: tab.windowId,
tabUrl: tab.url,
selector,
cropToElement,
padding,
});
let { dataUrl } = capture;
// Get viewport dimensions (graceful degradation)
let viewport: { width: number; height: number } | undefined;
try {
const viewportDimensions = await chrome.scripting.executeScript({
target: { tabId: tab.id },
func: () => ({
width: window.innerWidth,
height: window.innerHeight,
}),
});
viewport = viewportDimensions[0]?.result ?? undefined;
} catch (e) {
console.warn(
"[ScreenshotHighlight] Failed to get viewport dimensions:",
e,
);
}
if (sendToLLM) {
// Compress for LLM
dataUrl = await compressImage(dataUrl, 0.6, 1024);
}
// Extract image dimensions
const finalImg = new Image();
await new Promise<void>((resolve, reject) => {
finalImg.onload = () => resolve();
finalImg.onerror = () => reject(new Error("Failed to load image"));
finalImg.src = dataUrl;
});
const imageWidth = finalImg.width;
const imageHeight = finalImg.height;
// Cache screenshot metadata for computer tool
if (sendToLLM && viewport) {
cacheScreenshotMetadata(
tab.id,
imageWidth,
imageHeight,
viewport.width,
viewport.height,
);
}
// Save screenshot to IndexedDB
let screenshotUid: string | undefined;
try {
screenshotUid = await RuntimeScreenshotStorage.saveScreenshot(dataUrl, {
tabId: tab.id,
width: imageWidth,
height: imageHeight,
viewportWidth: viewport?.width ?? 0,
viewportHeight: viewport?.height ?? 0,
});
} catch (err) {
console.error("[ScreenshotHighlight] Failed to save to IndexedDB:", err);
}
if (sendToLLM) {
return {
success: true,
imageData: dataUrl,
sendToLLM: true,
screenshotUid,
tabId: tab.id,
url: tab.url,
title: tab.title,
selector: selector ?? undefined,
cropped: capture.cropped,
};
}
return {
success: true,
captured: true,
sendToLLM: false,
screenshotUid,
tabId: tab.id,
url: tab.url,
title: tab.title,
selector: selector ?? undefined,
cropped: capture.cropped,
};
},
});
export const captureScreenshotToClipboardTool = tool({
name: "capture_screenshot_to_clipboard",
description:
@@ -267,3 +536,81 @@ export const captureScreenshotToClipboardTool = tool({
};
},
});
// ===================== Clipboard image tools (P1) =====================
export const readClipboardImageTool = tool({
name: "read_clipboard_image",
description:
"Read an image from the system clipboard and return it as a base64 data URL. " +
"Useful for inspecting images the user has copied. Returns an error if no image is present.",
parameters: z.object({}),
execute: async () => {
try {
const clipboardItems = await navigator.clipboard.read();
for (const item of clipboardItems) {
for (const type of item.types) {
if (type.startsWith("image/")) {
const blob = await item.getType(type);
// Convert blob to data URL
const dataUrl = await new Promise<string>((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result as string);
reader.onerror = () =>
reject(new Error("Failed to read image data"));
reader.readAsDataURL(blob);
});
return {
success: true,
imageData: dataUrl,
};
}
}
}
return { success: false, error: "No image found in clipboard" };
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
return {
success: false,
error: `Failed to read clipboard: ${message}`,
};
}
},
});
export const getClipboardImageInfoTool = tool({
name: "get_clipboard_image_info",
description:
"Check whether the system clipboard contains an image, and if so return " +
"its MIME type. Does NOT read the full image data.",
parameters: z.object({}),
execute: async () => {
try {
const clipboardItems = await navigator.clipboard.read();
for (const item of clipboardItems) {
for (const type of item.types) {
if (type.startsWith("image/")) {
return {
success: true,
hasImage: true,
imageType: type,
};
}
}
}
return { success: true, hasImage: false };
} catch (error: unknown) {
const message = error instanceof Error ? error.message : String(error);
return {
success: false,
error: `Failed to read clipboard: ${message}`,
};
}
},
});

View File

@@ -158,6 +158,143 @@ describe("AIPex", () => {
expect(events[0]?.type).toBe("content_delta");
});
it("should pass an EphemeralSession to run() in stateless mode", async () => {
vi.mocked(run).mockResolvedValue(
createMockRunResult({
finalOutput: "Reply",
streamEvents: [
{
type: "raw_model_stream_event",
data: { type: "output_text_delta", delta: "Reply" },
},
],
}),
);
const agent = AIPex.create({
instructions: "Test",
model: mockModel,
conversation: false,
});
for await (const _event of agent.chat("Hi")) {
// consume events
}
// Verify run() was called with a session (EphemeralSession) even in stateless mode
expect(run).toHaveBeenCalledTimes(1);
const runCallArgs = vi.mocked(run).mock.calls[0]!;
const runOptions = runCallArgs[2] as { session?: unknown };
expect(runOptions.session).toBeDefined();
// EphemeralSession has getSessionId, addItems, getItems, popItem, clearSession
expect(typeof (runOptions.session as any).getSessionId).toBe("function");
expect(typeof (runOptions.session as any).addItems).toBe("function");
});
it("should pass callModelInputFilter to run() for screenshot shaping", async () => {
vi.mocked(run).mockResolvedValue(
createMockRunResult({
finalOutput: "Reply",
streamEvents: [
{
type: "raw_model_stream_event",
data: { type: "output_text_delta", delta: "Reply" },
},
],
}),
);
const agent = AIPex.create({
instructions: "Test",
model: mockModel,
conversation: false,
});
for await (const _event of agent.chat("Hi")) {
// consume events
}
expect(run).toHaveBeenCalledTimes(1);
const runCallArgs = vi.mocked(run).mock.calls[0]!;
const runOptions = runCallArgs[2] as { callModelInputFilter?: unknown };
expect(runOptions.callModelInputFilter).toBeDefined();
expect(typeof runOptions.callModelInputFilter).toBe("function");
});
it("callModelInputFilter should shape screenshot items before model call", async () => {
vi.mocked(run).mockResolvedValue(
createMockRunResult({
finalOutput: "Reply",
streamEvents: [
{
type: "raw_model_stream_event",
data: { type: "output_text_delta", delta: "Reply" },
},
],
}),
);
const agent = AIPex.create({
instructions: "Test",
model: mockModel,
});
for await (const _event of agent.chat("Hi")) {
// consume events
}
// Extract the callModelInputFilter and invoke it with a screenshot tool result
const runCallArgs = vi.mocked(run).mock.calls[0]!;
const runOptions = runCallArgs[2] as unknown as {
callModelInputFilter: (args: {
modelData: { input: unknown[]; instructions?: string };
agent: unknown;
context: unknown;
}) => Promise<{ input: unknown[]; instructions?: string }>;
};
const screenshotToolResult = {
type: "function_call_result",
name: "capture_screenshot",
callId: "call_test",
output: JSON.stringify({
success: true,
imageData: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQ==",
sendToLLM: true,
screenshotUid: "screenshot_123_abc",
}),
};
const result = await runOptions.callModelInputFilter({
modelData: {
input: [screenshotToolResult],
instructions: "Test instructions",
},
agent: {},
context: undefined,
});
// Should have 2 items: stripped tool result + transient user image message
expect(result.input.length).toBe(2);
// First item: stripped tool result with imageData replaced
const stripped = result.input[0] as { type: string; output: string };
expect(stripped.type).toBe("function_call_result");
const parsed = JSON.parse(stripped.output);
expect(parsed.success).toBe(true);
expect(parsed.data.imageData).toBe(
"[Image data removed - see following user message]",
);
// Second item: transient user image message
const userMsg = result.input[1] as { type: string; role: string };
expect(userMsg.type).toBe("message");
expect(userMsg.role).toBe("user");
// Instructions should pass through unchanged
expect(result.instructions).toBe("Test instructions");
});
it("should work with custom conversationManager", async () => {
vi.mocked(run).mockResolvedValue(
createMockRunResult({

View File

@@ -7,6 +7,7 @@ import type { ContextManager } from "../context/manager.js";
import type { Context } from "../context/types.js";
import { formatContextsForPrompt, resolveContexts } from "../context/utils.js";
import { ConversationCompressor } from "../conversation/compressor.js";
import { EphemeralSession } from "../conversation/ephemeral-session.js";
import { ConversationManager } from "../conversation/manager.js";
import type { Session } from "../conversation/session.js";
import { SessionStorage } from "../conversation/storage.js";
@@ -26,6 +27,7 @@ import type {
} from "../types.js";
import { AgentError, ErrorCode } from "../utils/errors.js";
import { safeJsonParse } from "../utils/json.js";
import { shapeScreenshotItems } from "../utils/screenshot-shaping.js";
export class AIPex {
private agent: OpenAIAgent;
@@ -120,6 +122,13 @@ export class AIPex {
const startTime = Date.now();
const metrics = this.initMetrics(startTime, session);
// Always provide a session to the runner so that screenshot shaping
// (strip base64 imageData, inject transient user image message) runs
// even in stateless mode. The EphemeralSession is in-memory only and
// never persisted.
const runSession: Session | EphemeralSession =
session ?? new EphemeralSession();
// Track tool-call argument streaming during a single model response.
// This is best-effort and provider-dependent (e.g. OpenAI ChatCompletions tool_calls deltas).
const toolArgsStreamByIndex = new Map<
@@ -130,8 +139,15 @@ export class AIPex {
try {
const result = await run(this.agent, input, {
maxTurns: this.maxTurns,
session: session ?? undefined,
session: runSession,
stream: true,
// Shape screenshot tool results before every model call:
// strip base64 imageData from tool results and inject a transient
// user image message so the model can consume images via the vision path.
callModelInputFilter: async ({ modelData }) => ({
input: shapeScreenshotItems(modelData.input),
instructions: modelData.instructions,
}),
});
let streamedOutput = "";

View File

@@ -23,7 +23,7 @@ export const AI_PROVIDERS = {
openai: {
name: "OpenAI",
icon: "🤖",
host: "https://api.openai.com/v1/chat/completions",
host: "https://api.openai.com/v1",
models: ["gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo"] as const,
tokenPlaceholder: "sk-...",
docs: "https://platform.openai.com/api-keys",
@@ -32,7 +32,7 @@ export const AI_PROVIDERS = {
anthropic: {
name: "Anthropic",
icon: "🧠",
host: "https://api.anthropic.com/v1/messages",
host: "https://api.anthropic.com",
models: [
"claude-sonnet-4-20250514",
"claude-3-5-sonnet-20241022",
@@ -57,7 +57,7 @@ export const AI_PROVIDERS = {
openrouter: {
name: "OpenRouter",
icon: "🔀",
host: "https://openrouter.ai/api/v1/chat/completions",
host: "https://openrouter.ai/api/v1",
models: [
"anthropic/claude-3.5-sonnet",
"openai/gpt-4o",
@@ -72,7 +72,7 @@ export const AI_PROVIDERS = {
deepseek: {
name: "DeepSeek",
icon: "🔍",
host: "https://api.deepseek.com/v1/chat/completions",
host: "https://api.deepseek.com/v1",
models: ["deepseek-chat", "deepseek-coder"] as const,
tokenPlaceholder: "sk-...",
docs: "https://platform.deepseek.com/api_keys",
@@ -81,7 +81,7 @@ export const AI_PROVIDERS = {
groq: {
name: "Groq",
icon: "⚡",
host: "https://api.groq.com/openai/v1/chat/completions",
host: "https://api.groq.com/openai/v1",
models: [
"llama-3.3-70b-versatile",
"llama-3.1-8b-instant",
@@ -94,7 +94,7 @@ export const AI_PROVIDERS = {
together: {
name: "Together AI",
icon: "🤝",
host: "https://api.together.xyz/v1/chat/completions",
host: "https://api.together.xyz/v1",
models: [
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
@@ -107,7 +107,7 @@ export const AI_PROVIDERS = {
mistral: {
name: "Mistral AI",
icon: "🌬️",
host: "https://api.mistral.ai/v1/chat/completions",
host: "https://api.mistral.ai/v1",
models: [
"mistral-large-latest",
"mistral-medium-latest",
@@ -120,7 +120,7 @@ export const AI_PROVIDERS = {
cohere: {
name: "Cohere",
icon: "🔗",
host: "https://api.cohere.ai/v1/chat",
host: "https://api.cohere.ai/v1",
models: ["command-r-plus", "command-r", "command"] as const,
tokenPlaceholder: "...",
docs: "https://dashboard.cohere.com/api-keys",
@@ -129,7 +129,7 @@ export const AI_PROVIDERS = {
perplexity: {
name: "Perplexity",
icon: "🔎",
host: "https://api.perplexity.ai/chat/completions",
host: "https://api.perplexity.ai",
models: [
"llama-3.1-sonar-large-128k-online",
"llama-3.1-sonar-small-128k-online",
@@ -141,7 +141,7 @@ export const AI_PROVIDERS = {
fireworks: {
name: "Fireworks AI",
icon: "🎆",
host: "https://api.fireworks.ai/inference/v1/chat/completions",
host: "https://api.fireworks.ai/inference/v1",
models: [
"accounts/fireworks/models/llama-v3p1-70b-instruct",
"accounts/fireworks/models/mixtral-8x7b-instruct",
@@ -153,7 +153,7 @@ export const AI_PROVIDERS = {
replicate: {
name: "Replicate",
icon: "🔁",
host: "https://api.replicate.com/v1/models",
host: "https://api.replicate.com/v1",
models: [
"meta/llama-2-70b-chat",
"mistralai/mixtral-8x7b-instruct-v0.1",
@@ -165,7 +165,7 @@ export const AI_PROVIDERS = {
azure: {
name: "Azure OpenAI",
icon: "☁️",
host: "https://YOUR-RESOURCE.openai.azure.com/openai/deployments/YOUR-DEPLOYMENT/chat/completions?api-version=2024-02-15-preview",
host: "https://YOUR-RESOURCE.openai.azure.com/openai/deployments/YOUR-DEPLOYMENT",
models: ["gpt-4", "gpt-35-turbo"] as const,
tokenPlaceholder: "YOUR-API-KEY",
docs: "https://portal.azure.com",

View File

@@ -0,0 +1,79 @@
import type { AgentInputItem } from "@openai/agents";
import { describe, expect, it } from "vitest";
import { EphemeralSession } from "./ephemeral-session.js";
function createUserMessage(text: string): AgentInputItem {
return { type: "message", role: "user", content: text };
}
describe("EphemeralSession", () => {
it("implements Session interface basics", async () => {
const session = new EphemeralSession();
expect(typeof session.id).toBe("string");
expect(await session.getSessionId()).toBe(session.id);
expect(await session.getItems()).toEqual([]);
});
it("stores and retrieves items", async () => {
const session = new EphemeralSession();
await session.addItems([createUserMessage("hello")]);
const items = await session.getItems();
expect(items.length).toBe(1);
expect(session.getItemCount()).toBe(1);
});
it("supports getItems with limit", async () => {
const session = new EphemeralSession();
await session.addItems([
createUserMessage("a"),
createUserMessage("b"),
createUserMessage("c"),
]);
const limited = await session.getItems(2);
expect(limited.length).toBe(2);
});
it("supports popItem", async () => {
const session = new EphemeralSession();
await session.addItems([createUserMessage("a"), createUserMessage("b")]);
const popped = await session.popItem();
expect(popped).toBeDefined();
expect(session.getItemCount()).toBe(1);
});
it("supports clearSession", async () => {
const session = new EphemeralSession();
await session.addItems([createUserMessage("a")]);
await session.clearSession();
expect(session.getItemCount()).toBe(0);
});
it("stores items as-is without shaping (shaping is done by callModelInputFilter)", async () => {
const screenshotResult: AgentInputItem = {
type: "function_call_result",
name: "capture_screenshot",
callId: "call_abc",
output: JSON.stringify({
success: true,
imageData: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQ==",
sendToLLM: true,
screenshotUid: "screenshot_1234567890_abc",
tabId: 1,
}),
} as AgentInputItem;
const session = new EphemeralSession();
await session.addItems([screenshotResult]);
const items = await session.getItems();
// Items should be stored as-is (1 item, no shaping)
expect(items.length).toBe(1);
expect(items[0]).toEqual(screenshotResult);
});
it("accepts custom id", async () => {
const session = new EphemeralSession("custom-id");
expect(session.id).toBe("custom-id");
expect(await session.getSessionId()).toBe("custom-id");
});
});

View File

@@ -0,0 +1,51 @@
/**
* Ephemeral (in-memory, non-persistent) session that implements the
* `@openai/agents` Session interface.
*
* Used in stateless mode (conversation: false) so that the OpenAI Agents
* runner still has a session to work with. Screenshot shaping (strip base64
* imageData, inject transient user image message) is handled by the
* `callModelInputFilter` hook in AIPex, not by the session itself.
*
* This class intentionally imports only from `../utils/` to avoid circular
* dependencies with the persistence layer (session.ts, manager.ts, storage.ts).
*/
import type { AgentInputItem, Session } from "@openai/agents";
import { generateId } from "../utils/id-generator.js";
export class EphemeralSession implements Session {
readonly id: string;
private items: AgentInputItem[] = [];
constructor(id?: string) {
this.id = id ?? generateId();
}
async getSessionId(): Promise<string> {
return this.id;
}
async getItems(limit?: number): Promise<AgentInputItem[]> {
if (limit === undefined) {
return [...this.items];
}
return this.items.slice(-limit);
}
async addItems(items: AgentInputItem[]): Promise<void> {
this.items.push(...items);
}
async popItem(): Promise<AgentInputItem | undefined> {
return this.items.pop();
}
async clearSession(): Promise<void> {
this.items = [];
}
getItemCount(): number {
return this.items.length;
}
}

View File

@@ -7,6 +7,7 @@ import type {
SessionTree,
} from "../types.js";
import { generateId } from "../utils/id-generator.js";
import { pruneTransientScreenshotItems } from "../utils/screenshot-shaping.js";
import type { ConversationCompressor } from "./compressor.js";
import { Session } from "./session.js";
@@ -87,7 +88,10 @@ export class ConversationManager {
}
private async doCompress(session: Session): Promise<{ summary: string }> {
const items = await session.getItems();
// Prune transient screenshot user-image messages before compression
// to avoid sending large base64 blobs to the compressor/LLM.
const rawItems = await session.getItems();
const items = pruneTransientScreenshotItems(rawItems);
const { summary, compressedItems } =
await this.compressor!.compressItems(items);

View File

@@ -8,6 +8,10 @@ import type {
SessionSummary,
} from "../types.js";
import { generateId } from "../utils/id-generator.js";
import {
isTransientScreenshotItem,
pruneTransientScreenshotItems,
} from "../utils/screenshot-shaping.js";
function createEmptySessionMetrics(): SessionMetrics {
return {
@@ -156,7 +160,12 @@ export class Session implements OpenAISession {
private updatePreview(): void {
const latestUserMessage = [...this.items]
.reverse()
.find((item) => item.type === "message" && item.role === "user");
.find(
(item) =>
item.type === "message" &&
item.role === "user" &&
!isTransientScreenshotItem(item),
);
const previewSource =
this.extractContent(latestUserMessage) ??
@@ -207,7 +216,9 @@ export class Session implements OpenAISession {
toJSON(): SerializedSession {
return {
id: this.id,
items: this.items,
// Prune transient screenshot user-image messages before persisting
// to avoid storing large base64 blobs in conversation history.
items: pruneTransientScreenshotItems(this.items),
metadata: this.metadata,
config: this.config,
metrics: this.sessionMetrics,

View File

@@ -3,3 +3,9 @@
*/
export { CancellationError, CancellationToken } from "./cancellation-token.js";
export {
isTransientScreenshotItem,
pruneTransientScreenshotItems,
shapeScreenshotItems,
TRANSIENT_SCREENSHOT_MARKER,
} from "./screenshot-shaping.js";

View File

@@ -0,0 +1,506 @@
import type { AgentInputItem } from "@openai/agents";
import { describe, expect, it } from "vitest";
import {
isTransientScreenshotItem,
pruneTransientScreenshotItems,
shapeScreenshotItems,
TRANSIENT_SCREENSHOT_MARKER,
} from "./screenshot-shaping.js";
// --- Helpers ---
const TEST_IMAGE_DATA = "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQ==";
const TEST_SCREENSHOT_UID = "screenshot_1234567890_abcdefghi";
const PLACEHOLDER = "[Image data removed - see following user message]";
/**
* Create a screenshot tool result with plain string output (legacy/test format).
*/
function createScreenshotToolResult(
overrides: Record<string, unknown> = {},
): AgentInputItem {
const output = {
success: true,
imageData: TEST_IMAGE_DATA,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
tabId: 1,
url: "https://example.com",
title: "Example",
...overrides,
};
return {
type: "function_call_result",
name: "capture_screenshot",
callId: "call_abc123",
output: JSON.stringify(output),
} as AgentInputItem;
}
/**
* Create a screenshot tool result using the SDK's actual output format:
* { type: 'text', text: '<json string>' }
*
* This is what `@openai/agents` SDK produces via `getToolCallOutputItem()`
* when a tool returns a plain object.
*/
function createSdkScreenshotToolResult(
overrides: Record<string, unknown> = {},
): AgentInputItem {
const output = {
success: true,
imageData: TEST_IMAGE_DATA,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
tabId: 1,
url: "https://example.com",
title: "Example",
...overrides,
};
return {
type: "function_call_result",
name: "capture_screenshot",
callId: "call_sdk_123",
status: "completed",
output: { type: "text", text: JSON.stringify(output) },
} as AgentInputItem;
}
function createNonScreenshotToolResult(): AgentInputItem {
return {
type: "function_call_result",
name: "get_tabs",
callId: "call_other",
output: JSON.stringify({ tabs: [{ id: 1, title: "Tab" }] }),
} as AgentInputItem;
}
function createUserMessage(text: string): AgentInputItem {
return {
type: "message",
role: "user",
content: text,
};
}
// --- Tests ---
describe("shapeScreenshotItems", () => {
it("should strip imageData and inject transient user image message for sendToLLM=true", () => {
const items = [createScreenshotToolResult()];
const shaped = shapeScreenshotItems(items);
expect(shaped.length).toBe(2);
// First item: stripped tool result — always wrapped in { success, data }
const toolResult = shaped[0] as { type: string; output: string };
expect(toolResult.type).toBe("function_call_result");
const parsed = JSON.parse(toolResult.output);
expect(parsed.success).toBe(true);
expect(parsed.data).toBeDefined();
expect(parsed.data.imageData).toBe(PLACEHOLDER);
expect(parsed.data.screenshotUid).toBe(TEST_SCREENSHOT_UID);
expect(parsed.data.sendToLLM).toBe(true);
// Second item: transient user image message
const userMsg = shaped[1] as {
type: string;
role: string;
content: Array<{ type: string; text?: string; image?: string }>;
providerData?: Record<string, unknown>;
};
expect(userMsg.type).toBe("message");
expect(userMsg.role).toBe("user");
expect(userMsg.providerData?.[TRANSIENT_SCREENSHOT_MARKER]).toBe(true);
// Check content has text + image parts
const textPart = userMsg.content.find((c) => c.type === "input_text");
const imagePart = userMsg.content.find((c) => c.type === "input_image");
expect(textPart).toBeTruthy();
expect(imagePart).toBeTruthy();
expect((imagePart as { image: string }).image).toBe(TEST_IMAGE_DATA);
});
it("should pass through items when sendToLLM=false", () => {
const items = [
createScreenshotToolResult({
sendToLLM: false,
imageData: undefined,
captured: true,
}),
];
const shaped = shapeScreenshotItems(items);
// Should not inject a user image message
expect(shaped.length).toBe(1);
expect(shaped[0]).toEqual(items[0]);
});
it("should pass through non-screenshot tools unchanged", () => {
const items = [createNonScreenshotToolResult()];
const shaped = shapeScreenshotItems(items);
expect(shaped.length).toBe(1);
expect(shaped[0]).toEqual(items[0]);
});
it("should pass through non-tool items unchanged", () => {
const items = [createUserMessage("hello")];
const shaped = shapeScreenshotItems(items);
expect(shaped.length).toBe(1);
expect(shaped[0]).toEqual(items[0]);
});
it("should handle capture_tab_screenshot the same way", () => {
const toolResult = createScreenshotToolResult();
(toolResult as { name: string }).name = "capture_tab_screenshot";
const shaped = shapeScreenshotItems([toolResult]);
expect(shaped.length).toBe(2);
expect((shaped[0] as { type: string }).type).toBe("function_call_result");
expect((shaped[1] as { type: string; role: string }).role).toBe("user");
});
it("should handle capture_screenshot_with_highlight the same way", () => {
const output = {
success: true,
imageData: TEST_IMAGE_DATA,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
tabId: 1,
url: "https://example.com",
title: "Example",
selector: ".my-element",
cropped: true,
};
const item: AgentInputItem = {
type: "function_call_result",
name: "capture_screenshot_with_highlight",
callId: "call_highlight",
output: JSON.stringify(output),
} as AgentInputItem;
const shaped = shapeScreenshotItems([item]);
expect(shaped.length).toBe(2);
// First item: stripped tool result — always wrapped in { success, data }
const toolResult = shaped[0] as { type: string; output: string };
expect(toolResult.type).toBe("function_call_result");
const parsed = JSON.parse(toolResult.output);
expect(parsed.success).toBe(true);
expect(parsed.data).toBeDefined();
expect(parsed.data.imageData).toBe(PLACEHOLDER);
expect(parsed.data.screenshotUid).toBe(TEST_SCREENSHOT_UID);
expect(parsed.data.sendToLLM).toBe(true);
// Second item: transient user image message
const userMsg = shaped[1] as {
type: string;
role: string;
content: Array<{ type: string; text?: string; image?: string }>;
providerData?: Record<string, unknown>;
};
expect(userMsg.type).toBe("message");
expect(userMsg.role).toBe("user");
expect(userMsg.providerData?.[TRANSIENT_SCREENSHOT_MARKER]).toBe(true);
const imagePart = userMsg.content.find((c) => c.type === "input_image");
expect(imagePart).toBeTruthy();
expect((imagePart as { image: string }).image).toBe(TEST_IMAGE_DATA);
});
it("should pass through capture_screenshot_with_highlight when sendToLLM=false", () => {
const output = {
success: true,
captured: true,
sendToLLM: false,
screenshotUid: TEST_SCREENSHOT_UID,
tabId: 1,
selector: ".my-element",
cropped: true,
};
const item: AgentInputItem = {
type: "function_call_result",
name: "capture_screenshot_with_highlight",
callId: "call_highlight_no_llm",
output: JSON.stringify(output),
} as AgentInputItem;
const shaped = shapeScreenshotItems([item]);
// No imageData + sendToLLM=false → pass through unchanged
expect(shaped.length).toBe(1);
expect(shaped[0]).toEqual(item);
});
it("should handle mixed items correctly", () => {
const items = [
createUserMessage("Take a screenshot"),
createNonScreenshotToolResult(),
createScreenshotToolResult(),
createUserMessage("What do you see?"),
];
const shaped = shapeScreenshotItems(items);
// Original 4 items + 1 injected user image = 5
expect(shaped.length).toBe(5);
// Verify order: user, non-screenshot tool, stripped screenshot, user image, user
expect((shaped[0] as { role: string }).role).toBe("user");
expect((shaped[1] as { name: string }).name).toBe("get_tabs");
expect((shaped[2] as { type: string }).type).toBe("function_call_result");
expect(
(shaped[3] as { providerData?: Record<string, unknown> }).providerData?.[
TRANSIENT_SCREENSHOT_MARKER
],
).toBe(true);
expect((shaped[4] as { role: string }).role).toBe("user");
});
it("should handle nested data structure (one level)", () => {
const output = {
success: true,
data: {
success: true,
imageData: TEST_IMAGE_DATA,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
},
};
const item: AgentInputItem = {
type: "function_call_result",
name: "capture_screenshot",
callId: "call_nested",
output: JSON.stringify(output),
} as AgentInputItem;
const shaped = shapeScreenshotItems([item]);
expect(shaped.length).toBe(2);
const parsedOutput = JSON.parse((shaped[0] as { output: string }).output);
expect(parsedOutput.success).toBe(true);
expect(parsedOutput.data.imageData).toBe(PLACEHOLDER);
expect(parsedOutput.data.screenshotUid).toBe(TEST_SCREENSHOT_UID);
});
it("should handle data.data nesting (two levels)", () => {
// This matches the old aipex pattern: { data: { data: { imageData, ... } } }
const output = {
success: true,
data: {
data: {
imageData: TEST_IMAGE_DATA,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
tabId: 1,
url: "https://example.com",
},
},
};
const item: AgentInputItem = {
type: "function_call_result",
name: "capture_screenshot",
callId: "call_deep_nested",
output: JSON.stringify(output),
} as AgentInputItem;
const shaped = shapeScreenshotItems([item]);
expect(shaped.length).toBe(2);
// Stripped tool result
const parsedOutput = JSON.parse((shaped[0] as { output: string }).output);
expect(parsedOutput.success).toBe(true);
expect(parsedOutput.data).toBeDefined();
expect(parsedOutput.data.imageData).toBe(PLACEHOLDER);
expect(parsedOutput.data.screenshotUid).toBe(TEST_SCREENSHOT_UID);
expect(parsedOutput.data.sendToLLM).toBe(true);
// Transient user image message
const userMsg = shaped[1] as {
type: string;
role: string;
content: Array<{ type: string; image?: string }>;
};
expect(userMsg.type).toBe("message");
expect(userMsg.role).toBe("user");
const imagePart = userMsg.content.find((c) => c.type === "input_image");
expect(imagePart).toBeTruthy();
expect((imagePart as { image: string }).image).toBe(TEST_IMAGE_DATA);
});
it("should always produce { success: true, data: { ... } } envelope", () => {
// Even for flat input (no data wrapper), stripped output is wrapped
const items = [createScreenshotToolResult()];
const shaped = shapeScreenshotItems(items);
const parsed = JSON.parse((shaped[0] as { output: string }).output);
expect(parsed).toHaveProperty("success", true);
expect(parsed).toHaveProperty("data");
expect(typeof parsed.data).toBe("object");
expect(parsed.data.imageData).toBe(PLACEHOLDER);
});
// --- SDK output format tests ---
// The @openai/agents SDK wraps tool outputs in { type: 'text', text: '...' }
// via getToolCallOutputItem(). These tests verify shaping works with that format.
it("should shape SDK-format output (type: text object wrapper)", () => {
const items = [createSdkScreenshotToolResult()];
const shaped = shapeScreenshotItems(items);
expect(shaped.length).toBe(2);
// First item: stripped tool result - output should preserve text object format
const toolResult = shaped[0] as { type: string; output: unknown };
expect(toolResult.type).toBe("function_call_result");
// Output should be { type: 'text', text: '...' } (same format as input)
const output = toolResult.output as { type: string; text: string };
expect(output.type).toBe("text");
expect(typeof output.text).toBe("string");
const parsed = JSON.parse(output.text);
expect(parsed.success).toBe(true);
expect(parsed.data.imageData).toBe(PLACEHOLDER);
expect(parsed.data.screenshotUid).toBe(TEST_SCREENSHOT_UID);
expect(parsed.data.sendToLLM).toBe(true);
// Second item: transient user image message
const userMsg = shaped[1] as {
type: string;
role: string;
content: Array<{ type: string; image?: string }>;
providerData?: Record<string, unknown>;
};
expect(userMsg.type).toBe("message");
expect(userMsg.role).toBe("user");
expect(userMsg.providerData?.[TRANSIENT_SCREENSHOT_MARKER]).toBe(true);
const imagePart = userMsg.content.find((c) => c.type === "input_image");
expect((imagePart as { image: string }).image).toBe(TEST_IMAGE_DATA);
});
it("should pass through SDK-format output when sendToLLM=false", () => {
const items = [
createSdkScreenshotToolResult({
sendToLLM: false,
imageData: undefined,
captured: true,
}),
];
const shaped = shapeScreenshotItems(items);
expect(shaped.length).toBe(1);
expect(shaped[0]).toEqual(items[0]);
});
it("should handle SDK-format output for capture_screenshot_with_highlight", () => {
const output = {
success: true,
imageData: TEST_IMAGE_DATA,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
tabId: 1,
url: "https://example.com",
title: "Example",
selector: ".my-element",
cropped: true,
};
const item: AgentInputItem = {
type: "function_call_result",
name: "capture_screenshot_with_highlight",
callId: "call_sdk_highlight",
status: "completed",
output: { type: "text", text: JSON.stringify(output) },
} as AgentInputItem;
const shaped = shapeScreenshotItems([item]);
expect(shaped.length).toBe(2);
// Output preserves text object format
const toolOutput = (shaped[0] as { output: unknown }).output as {
type: string;
text: string;
};
expect(toolOutput.type).toBe("text");
const parsed = JSON.parse(toolOutput.text);
expect(parsed.success).toBe(true);
expect(parsed.data.imageData).toBe(PLACEHOLDER);
});
it("should handle SDK array output format", () => {
const outputJson = JSON.stringify({
success: true,
imageData: TEST_IMAGE_DATA,
sendToLLM: true,
screenshotUid: TEST_SCREENSHOT_UID,
});
// The SDK may produce array-wrapped outputs in some structured paths.
// TypeScript types don't perfectly cover this at compile time, hence
// the cast through unknown.
const item = {
type: "function_call_result",
name: "capture_screenshot",
callId: "call_sdk_array",
status: "completed",
output: [{ type: "text", text: outputJson }],
} as unknown as AgentInputItem;
const shaped = shapeScreenshotItems([item]);
expect(shaped.length).toBe(2);
// Output should be { type: 'text', text: '...' } (normalized from array)
const toolOutput = (shaped[0] as { output: unknown }).output as {
type: string;
text: string;
};
expect(toolOutput.type).toBe("text");
const parsed = JSON.parse(toolOutput.text);
expect(parsed.success).toBe(true);
expect(parsed.data.imageData).toBe(PLACEHOLDER);
});
});
describe("pruneTransientScreenshotItems", () => {
it("should remove transient screenshot items", () => {
const transient: AgentInputItem = {
type: "message",
role: "user",
content: [
{ type: "input_text", text: "screenshot" },
{ type: "input_image", image: TEST_IMAGE_DATA, detail: "auto" },
],
providerData: { [TRANSIENT_SCREENSHOT_MARKER]: true },
} as AgentInputItem;
const normal = createUserMessage("hello");
const pruned = pruneTransientScreenshotItems([normal, transient]);
expect(pruned.length).toBe(1);
expect(pruned[0]).toEqual(normal);
});
it("should keep all items when no transients exist", () => {
const items = [createUserMessage("a"), createUserMessage("b")];
const pruned = pruneTransientScreenshotItems(items);
expect(pruned.length).toBe(2);
});
});
describe("isTransientScreenshotItem", () => {
it("should return true for transient items", () => {
const item = {
type: "message",
role: "user",
content: "test",
providerData: { [TRANSIENT_SCREENSHOT_MARKER]: true },
} as unknown as AgentInputItem;
expect(isTransientScreenshotItem(item)).toBe(true);
});
it("should return false for normal items", () => {
expect(isTransientScreenshotItem(createUserMessage("hello"))).toBe(false);
});
});

View File

@@ -0,0 +1,292 @@
/**
* Screenshot message shaping utilities.
*
* When a screenshot tool returns `sendToLLM=true`, the large base64 imageData
* must NOT be sent inside the function_call_result output (models may not
* support images there, and it bloats token counts).
*
* Instead, the imageData is:
* 1. Stripped from the tool result (replaced with a placeholder string).
* 2. Injected as a follow-up user message with `input_image` content.
*
* This matches the message flow used in the original aipex codebase.
*/
import type { AgentInputItem } from "@openai/agents";
import { safeJsonParse } from "./json.js";
/** Tool names whose results may include screenshot image data */
const SCREENSHOT_TOOL_NAMES = new Set([
"capture_screenshot",
"capture_screenshot_with_highlight",
"capture_tab_screenshot",
]);
/** Placeholder that replaces imageData in the tool result */
const IMAGE_DATA_PLACEHOLDER =
"[Image data removed - see following user message]";
/** Marker on transient user-image messages so they can be pruned */
export const TRANSIENT_SCREENSHOT_MARKER = "__transient_screenshot__";
/**
* Process a batch of AgentInputItems. For any `function_call_result` from
* a screenshot tool that contains `imageData` with `sendToLLM=true`:
* - Replace imageData with a placeholder in the tool result.
* - Insert a transient user message with the real image right after.
*
* Items that are not screenshot tool results pass through unchanged.
*/
export function shapeScreenshotItems(
items: AgentInputItem[],
): AgentInputItem[] {
const result: AgentInputItem[] = [];
for (const item of items) {
if (item.type !== "function_call_result") {
result.push(item);
continue;
}
const funcResult = item as {
type: "function_call_result";
name: string;
callId: string;
output: unknown;
[key: string]: unknown;
};
if (!SCREENSHOT_TOOL_NAMES.has(funcResult.name)) {
result.push(item);
continue;
}
// Normalize output: the SDK wraps tool return values in
// { type: 'text', text: '...' }, but older paths may use plain strings.
const { jsonString, outputFormat } = extractOutputJsonString(
funcResult.output,
);
if (!jsonString) {
result.push(item);
continue;
}
// Try to parse the output and extract imageData
const parsed = safeJsonParse<Record<string, unknown>>(jsonString);
if (!parsed) {
result.push(item);
continue;
}
const extracted = extractImageData(parsed);
if (!extracted) {
// No sendToLLM image data pass through
result.push(item);
continue;
}
// 1. Rewrite the tool result with imageData stripped,
// preserving the original output format (object wrapper or plain string)
const strippedOutput = buildStrippedOutput(parsed, extracted.screenshotUid);
const strippedJson = JSON.stringify(strippedOutput);
const newOutput =
outputFormat === "text_object"
? { type: "text", text: strippedJson }
: strippedJson;
const strippedItem: AgentInputItem = {
...item,
output: newOutput,
} as AgentInputItem;
result.push(strippedItem);
// 2. Insert a transient user message carrying the real image
const toolName = funcResult.name;
const messageText =
toolName === "computer"
? "Here is the screenshot from the computer action:"
: "Here is the screenshot you requested:";
const userImageMessage: AgentInputItem = {
type: "message",
role: "user",
content: [
{ type: "input_text", text: messageText },
{
type: "input_image",
image: extracted.imageData,
detail: "auto",
},
],
// Mark as transient so it can be pruned before persistence/compression
providerData: { [TRANSIENT_SCREENSHOT_MARKER]: true },
} as AgentInputItem;
result.push(userImageMessage);
}
return result;
}
/**
* Remove transient screenshot user-image messages from items.
* Used before persistence or compression.
*/
export function pruneTransientScreenshotItems(
items: AgentInputItem[],
): AgentInputItem[] {
return items.filter((item) => {
const pd = (item as { providerData?: Record<string, unknown> })
.providerData;
return !pd?.[TRANSIENT_SCREENSHOT_MARKER];
});
}
/**
* Check if an item is a transient screenshot user-image message.
*/
export function isTransientScreenshotItem(item: AgentInputItem): boolean {
const pd = (item as { providerData?: Record<string, unknown> }).providerData;
return !!pd?.[TRANSIENT_SCREENSHOT_MARKER];
}
// ===================== Internal helpers =====================
/** Describes how the SDK stored the output value */
type OutputFormat = "plain_string" | "text_object";
/**
* Extract the JSON string from a tool result `output` field.
*
* The `@openai/agents` SDK wraps tool return values through
* `getToolCallOutputItem()`. For non-structured outputs the SDK produces:
* `{ type: 'text', text: '<json string>' }`
*
* Older code paths or tests may use a plain string instead.
* We also handle arrays where the first element is a text object.
*/
function extractOutputJsonString(output: unknown): {
jsonString: string | null;
outputFormat: OutputFormat;
} {
// Plain string (legacy / test path)
if (typeof output === "string") {
return { jsonString: output, outputFormat: "plain_string" };
}
// SDK object wrapper: { type: 'text', text: '...' }
if (
output !== null &&
typeof output === "object" &&
!Array.isArray(output) &&
(output as Record<string, unknown>).type === "text" &&
typeof (output as Record<string, unknown>).text === "string"
) {
return {
jsonString: (output as { text: string }).text,
outputFormat: "text_object",
};
}
// SDK array wrapper: [{ type: 'text', text: '...' }, ...]
if (Array.isArray(output)) {
const textEntry = output.find(
(entry: unknown) =>
entry !== null &&
typeof entry === "object" &&
(entry as Record<string, unknown>).type === "text" &&
typeof (entry as Record<string, unknown>).text === "string",
) as { text: string } | undefined;
if (textEntry) {
return { jsonString: textEntry.text, outputFormat: "text_object" };
}
}
return { jsonString: null, outputFormat: "plain_string" };
}
interface ExtractedImage {
imageData: string;
screenshotUid?: string;
}
/**
* Extract imageData from parsed tool output.
* Handles nested structures matching the old aipex pattern:
* { success, imageData, sendToLLM, screenshotUid } (flat)
* { success, data: { imageData, sendToLLM, screenshotUid } } (one level)
* { data: { data: { imageData, sendToLLM, screenshotUid } } } (two levels)
*/
function extractImageData(
parsed: Record<string, unknown>,
): ExtractedImage | null {
if (!parsed.success) return null;
// Navigate possible nesting levels (mirrors old aipex:
// middleLayer?.data || middleLayer || parsedContent)
const actual = resolveActualData(parsed);
// Must have sendToLLM === true
if (actual.sendToLLM !== true) return null;
const imageData = actual.imageData;
if (typeof imageData !== "string" || !imageData.startsWith("data:image/")) {
return null;
}
return {
imageData,
screenshotUid:
typeof actual.screenshotUid === "string"
? actual.screenshotUid
: undefined,
};
}
/**
* Navigate into a parsed tool result to reach the "actual data" layer.
* Handles:
* - flat: { success, imageData, ... }
* - one level: { success, data: { imageData, ... } }
* - two levels: { data: { data: { imageData, ... } } }
*
* Mirrors the old aipex pattern:
* middleLayer?.data || middleLayer || parsedContent
*/
function resolveActualData(
parsed: Record<string, unknown>,
): Record<string, unknown> {
const middleLayer = parsed.data as Record<string, unknown> | undefined;
if (middleLayer && typeof middleLayer === "object") {
const innerData = middleLayer.data as Record<string, unknown> | undefined;
if (innerData && typeof innerData === "object") {
return innerData;
}
return middleLayer;
}
return parsed;
}
/**
* Build the stripped tool output object (imageData replaced with placeholder).
*
* Always produces the `{ success: true, data: { ...actualData } }` envelope
* to match the message format expected by the old aipex codebase.
*/
function buildStrippedOutput(
parsed: Record<string, unknown>,
screenshotUid?: string,
): Record<string, unknown> {
const actual = resolveActualData(parsed);
const stripped: Record<string, unknown> = {
...actual,
imageData: IMAGE_DATA_PLACEHOLDER,
};
if (screenshotUid) {
stripped.screenshotUid = screenshotUid;
}
// Always wrap in { success: true, data: { ... } } to match aipex convention
return { success: true, data: stripped };
}