zen: tps rate limit

This commit is contained in:
Frank
2026-05-11 18:32:37 -04:00
parent 812668ae2f
commit 0cf90109dc
6 changed files with 2871 additions and 4 deletions

View File

@@ -47,6 +47,7 @@ import { Resource } from "@opencode-ai/console-resource"
import { i18n, type Key } from "~/i18n"
import { localeFromRequest } from "~/lib/language"
import { createModelTpmLimiter } from "./modelTpmLimiter"
import { createModelTpsLimiter } from "./modelTpsLimiter"
type ZenData = Awaited<ReturnType<typeof ZenData.list>>
type RetryOptions = {
@@ -129,6 +130,8 @@ export async function handler(
logger.metric({ source: billingSource })
const modelTpmLimiter = createModelTpmLimiter(modelInfo.providers)
const modelTpmLimits = await modelTpmLimiter?.check()
const modelTpsLimiter = createModelTpsLimiter(modelInfo.providers)
const modelTpsLimits = await modelTpsLimiter?.check()
const retriableRequest = async (retry: RetryOptions = { excludeProviders: [], retryCount: 0 }) => {
const providerInfo = selectProvider(
@@ -142,6 +145,7 @@ export async function handler(
retry,
stickyProvider,
modelTpmLimits,
modelTpsLimits,
)
validateModelSettings(billingSource, authInfo)
updateProviderKey(authInfo, providerInfo)
@@ -294,14 +298,17 @@ export async function handler(
let buffer = ""
let responseLength = 0
let timestampFirstByte = 0
let timestampLastByte = 0
function pump(): Promise<void> {
return (
reader?.read().then(async ({ done, value: rawValue }) => {
if (done) {
const timestampLastByte = Date.now()
logger.metric({
response_length: responseLength,
"timestamp.last_byte": Date.now(),
"timestamp.last_byte": timestampLastByte,
})
dataDumper?.flush()
await rateLimiter?.track()
@@ -311,6 +318,13 @@ export async function handler(
const costInfo = calculateCost(modelInfo, usageInfo)
await trialLimiter?.track(usageInfo)
await modelTpmLimiter?.track(providerInfo.id, providerInfo.model, usageInfo)
await modelTpsLimiter?.track(
providerInfo.id,
providerInfo.model,
timestampFirstByte,
timestampLastByte,
usageInfo,
)
await trackUsage(sessionId, billingSource, authInfo, modelInfo, providerInfo, usageInfo, costInfo)
await reload(billingSource, authInfo, costInfo)
const cost = calculateOccurredCost(billingSource, costInfo)
@@ -321,10 +335,10 @@ export async function handler(
}
if (responseLength === 0) {
const now = Date.now()
timestampFirstByte = Date.now()
logger.metric({
time_to_first_byte: now - startTimestamp,
"timestamp.first_byte": now,
time_to_first_byte: timestampFirstByte - startTimestamp,
"timestamp.first_byte": timestampFirstByte,
})
}
@@ -478,6 +492,7 @@ export async function handler(
retry: RetryOptions,
stickyProvider: string | undefined,
modelTpmLimits: Record<string, number> | undefined,
modelTpsLimits: Record<string, boolean> | undefined,
) {
const modelProvider = (() => {
// Byok is top priority b/c if user set their own API key, we should use it
@@ -509,6 +524,11 @@ export async function handler(
const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0
return usage < provider.tpmLimit * 1_000_000
})
.filter((provider) => {
if (!provider.tpsGoal) return true
const isLowTps = modelTpsLimits?.[`${provider.id}/${provider.model}`] ?? false
return !isLowTps
})
.map((provider) => {
topPriority = Math.min(topPriority, provider.priority)
return provider

View File

@@ -0,0 +1,89 @@
import { and, Database, inArray, sql } from "@opencode-ai/console-core/drizzle/index.js"
import { ModelTpsRateLimitTable } from "@opencode-ai/console-core/schema/ip.sql.js"
import { UsageInfo } from "./provider/provider"
export function createModelTpsLimiter(providers: { id: string; model: string; tpsGoal?: number }[]) {
const tpsGoals = Object.fromEntries(
providers.flatMap((p) => {
return p.tpsGoal ? [[`${p.id}/${p.model}`, p.tpsGoal]] : []
}),
)
const ids = Object.keys(tpsGoals)
if (ids.length === 0) return
const toInterval = (date: Date) =>
parseInt(
date
.toISOString()
.replace(/[^0-9]/g, "")
.substring(0, 12),
)
const now = Date.now()
const currInterval = toInterval(new Date(now))
const prevInterval = toInterval(new Date(now - 60 * 1000))
return {
check: async () => {
const data = await Database.use((tx) =>
tx
.select()
.from(ModelTpsRateLimitTable)
.where(
and(
inArray(ModelTpsRateLimitTable.id, ids),
inArray(ModelTpsRateLimitTable.interval, [currInterval, prevInterval]),
),
),
)
// convert to map of model to summed count across current and previous intervals
const result = data.reduce(
(acc, curr) => {
const existing = acc[curr.id] ?? { qualify: 0, unqualify: 0 }
acc[curr.id] = {
qualify: existing.qualify + curr.qualify,
unqualify: existing.unqualify + curr.unqualify,
}
return acc
},
{} as Record<string, { qualify: number; unqualify: number }>,
)
return Object.fromEntries(
Object.entries(result).map(([id, { qualify, unqualify }]) => {
const isLowTps = qualify + unqualify > 10 && qualify < unqualify
return [id, isLowTps]
}),
)
},
track: async (provider: string, model: string, tsFirstByte: number, tsLastByte: number, usageInfo: UsageInfo) => {
const id = `${provider}/${model}`
if (!ids.includes(id)) return
const tpsGoal = tpsGoals[id]
if (!tpsGoal) return
if (tsFirstByte <= 0 || tsLastByte <= 0) return
const tokens = usageInfo.outputTokens
if (tokens <= 10) return
const tps = (tokens / (tsLastByte - tsFirstByte)) * 1000
const qualify = tps >= tpsGoal ? 1 : 0
const unqualify = tps < tpsGoal ? 1 : 0
await Database.use((tx) =>
tx
.insert(ModelTpsRateLimitTable)
.values({
id,
interval: currInterval,
qualify,
unqualify,
})
.onDuplicateKeyUpdate({
set: {
qualify: sql`${ModelTpsRateLimitTable.qualify} + ${qualify}`,
unqualify: sql`${ModelTpsRateLimitTable.unqualify} + ${unqualify}`,
},
}),
)
},
}
}

View File

@@ -0,0 +1,7 @@
CREATE TABLE `model_tps_rate_limit` (
`id` varchar(255) NOT NULL,
`interval` bigint NOT NULL,
`qualify` int NOT NULL,
`unqualify` int NOT NULL,
CONSTRAINT PRIMARY KEY(`id`,`interval`)
);

File diff suppressed because it is too large Load Diff

View File

@@ -36,6 +36,7 @@ export namespace ZenData {
model: z.string(),
priority: z.number().optional(),
tpmLimit: z.number().optional(),
tpsGoal: z.number().optional(),
weight: z.number().optional(),
disabled: z.boolean().optional(),
storeModel: z.string().optional(),

View File

@@ -40,3 +40,14 @@ export const ModelTpmRateLimitTable = mysqlTable(
},
(table) => [primaryKey({ columns: [table.id, table.interval] })],
)
export const ModelTpsRateLimitTable = mysqlTable(
"model_tps_rate_limit",
{
id: varchar("id", { length: 255 }).notNull(),
interval: bigint("interval", { mode: "number" }).notNull(),
qualify: int("qualify").notNull(),
unqualify: int("unqualify").notNull(),
},
(table) => [primaryKey({ columns: [table.id, table.interval] })],
)