feat: integrate models.dev registry for model defaults (#425)

* feat: integrate models.dev registry for auto-populated model defaults

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: fall back to upstream provider for model registry lookup

When the browseros meta-provider is used, the registry lookup now
also tries the upstream provider (e.g., openrouter, anthropic) so
that BrowserOS-hosted models get correct context window and image
support defaults.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

* fix: add Object.hasOwn guards to prevent prototype chain lookup

Addresses Greptile review: bracket notation on the registry object
could return prototype-chain properties for keys like __proto__ or
constructor, bypassing the 404 guard in the route handler.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Felarof
2026-03-05 16:27:14 -08:00
committed by GitHub
parent 64b25c1610
commit ddc376a026
13 changed files with 7423 additions and 3 deletions

View File

@@ -0,0 +1,19 @@
{
"name": "@browseros/models-dev",
"version": "0.0.1",
"type": "module",
"scripts": {
"generate": "bun scripts/generate.ts",
"typecheck": "tsc --noEmit"
},
"exports": {
"./registry": {
"types": "./src/registry.ts",
"default": "./src/registry.ts"
},
"./types": {
"types": "./src/types.ts",
"default": "./src/types.ts"
}
}
}

View File

@@ -0,0 +1,181 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* Generates registry.json from a local models.dev clone.
*
* Usage:
* bun packages/models-dev/scripts/generate.ts /path/to/models.dev/providers
*/
import path from 'node:path'
import type {
ModelCost,
ModelInfo,
ModelRegistry,
ProviderInfo,
} from '../src/types'
// models.dev provider IDs → BrowserOS provider IDs
const PROVIDER_MAP: Record<string, string> = {
anthropic: 'anthropic',
openai: 'openai',
google: 'google',
openrouter: 'openrouter',
'ollama-cloud': 'ollama',
}
const SOURCE_PROVIDERS = Object.keys(PROVIDER_MAP)
interface RawModel {
id?: string
name: string
family?: string
reasoning: boolean
tool_call: boolean
structured_output?: boolean
attachment: boolean
modalities: { input: string[]; output: string[] }
limit: { context: number; input?: number; output: number }
cost?: RawCost
knowledge?: string
status?: string
release_date: string
[key: string]: unknown
}
interface RawCost {
input: number
output: number
reasoning?: number
cache_read?: number
cache_write?: number
[key: string]: unknown
}
interface RawProvider {
name: string
[key: string]: unknown
}
function extractCost(raw?: RawCost): ModelCost | undefined {
if (!raw) return undefined
const cost: ModelCost = { input: raw.input, output: raw.output }
if (raw.reasoning !== undefined) cost.reasoning = raw.reasoning
if (raw.cache_read !== undefined) cost.cache_read = raw.cache_read
if (raw.cache_write !== undefined) cost.cache_write = raw.cache_write
return cost
}
function extractModel(raw: RawModel, modelId: string): ModelInfo {
return {
id: modelId,
name: raw.name,
...(raw.family && { family: raw.family }),
reasoning: raw.reasoning,
tool_call: raw.tool_call,
...(raw.structured_output !== undefined && {
structured_output: raw.structured_output,
}),
attachment: raw.attachment,
modalities: raw.modalities,
limit: {
context: raw.limit.context,
...(raw.limit.input !== undefined && { input: raw.limit.input }),
output: raw.limit.output,
},
...(raw.cost && { cost: extractCost(raw.cost) }),
...(raw.knowledge && { knowledge: raw.knowledge }),
...(raw.status && { status: raw.status as ModelInfo['status'] }),
release_date: raw.release_date,
}
}
async function loadToml(filePath: string): Promise<Record<string, unknown>> {
return import(filePath, { with: { type: 'toml' } }).then((mod) => mod.default)
}
async function generateProvider(
providersDir: string,
sourceId: string,
targetId: string,
): Promise<ProviderInfo> {
// Load provider metadata
const providerToml = (await loadToml(
path.join(providersDir, sourceId, 'provider.toml'),
)) as unknown as RawProvider
const provider: ProviderInfo = {
id: targetId,
name: providerToml.name,
models: {},
}
// Scan all model TOML files
const modelsDir = path.join(providersDir, sourceId, 'models')
for await (const modelPath of new Bun.Glob('**/*.toml').scan({
cwd: modelsDir,
absolute: true,
followSymlinks: true,
})) {
const modelId = path.relative(modelsDir, modelPath).slice(0, -5)
try {
const raw = (await loadToml(modelPath)) as unknown as RawModel
provider.models[modelId] = extractModel(raw, modelId)
} catch (err) {
console.warn(`Skipping ${sourceId}/${modelId}: ${err}`)
}
}
return provider
}
async function main() {
const providersDir = process.argv[2]
if (!providersDir) {
console.error(
'Usage: bun scripts/generate.ts <path-to-models.dev-providers>',
)
process.exit(1)
}
const absoluteDir = path.resolve(providersDir)
console.log(`Reading from: ${absoluteDir}`)
const registry: ModelRegistry = {}
for (const sourceId of SOURCE_PROVIDERS) {
const targetId = PROVIDER_MAP[sourceId]
try {
const provider = await generateProvider(absoluteDir, sourceId, targetId)
const modelCount = Object.keys(provider.models).length
console.log(` ${sourceId}${targetId}: ${modelCount} models`)
registry[targetId] = provider
} catch (err) {
console.error(`Failed to process ${sourceId}: ${err}`)
process.exit(1)
}
}
// Write registry
const outputPath = path.join(
import.meta.dir,
'..',
'src',
'data',
'registry.json',
)
await Bun.write(outputPath, JSON.stringify(registry, null, 2))
const totalModels = Object.values(registry).reduce(
(sum, p) => sum + Object.keys(p.models).length,
0,
)
console.log(`\nGenerated ${outputPath}`)
console.log(
` ${Object.keys(registry).length} providers, ${totalModels} models`,
)
}
main()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,32 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* Model registry lookup functions.
* Data is sourced from models.dev and pre-built into registry.json.
*/
import data from './data/registry.json'
import type { ModelInfo, ModelRegistry, ProviderInfo } from './types'
const registry: ModelRegistry = data as ModelRegistry
export function getModelDefaults(
provider: string,
modelId: string,
): ModelInfo | undefined {
if (!Object.hasOwn(registry, provider)) return undefined
const p = registry[provider]
if (!Object.hasOwn(p.models, modelId)) return undefined
return p.models[modelId]
}
export function getProviderModels(provider: string): ProviderInfo | undefined {
if (!Object.hasOwn(registry, provider)) return undefined
return registry[provider]
}
export function getAllProviders(): Record<string, ProviderInfo> {
return registry
}

View File

@@ -0,0 +1,46 @@
/**
* @license
* Copyright 2025 BrowserOS
* SPDX-License-Identifier: AGPL-3.0-or-later
*
* Model metadata types sourced from models.dev.
*/
export interface ModelCost {
input: number
output: number
reasoning?: number
cache_read?: number
cache_write?: number
}
export interface ModelInfo {
id: string
name: string
family?: string
reasoning: boolean
tool_call: boolean
structured_output?: boolean
attachment: boolean
modalities: {
input: string[]
output: string[]
}
limit: {
context: number
input?: number
output: number
}
cost?: ModelCost
knowledge?: string
status?: 'alpha' | 'beta' | 'deprecated'
release_date: string
}
export interface ProviderInfo {
id: string
name: string
models: Record<string, ModelInfo>
}
export type ModelRegistry = Record<string, ProviderInfo>

View File

@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"rootDir": "src",
"resolveJsonModule": true
},
"include": ["src/**/*"]
}