mirror of
https://github.com/anomalyco/opencode.git
synced 2026-05-13 15:44:56 +00:00
zen: tps rate limit
This commit is contained in:
@@ -47,6 +47,7 @@ import { Resource } from "@opencode-ai/console-resource"
|
||||
import { i18n, type Key } from "~/i18n"
|
||||
import { localeFromRequest } from "~/lib/language"
|
||||
import { createModelTpmLimiter } from "./modelTpmLimiter"
|
||||
import { createModelTpsLimiter } from "./modelTpsLimiter"
|
||||
|
||||
type ZenData = Awaited<ReturnType<typeof ZenData.list>>
|
||||
type RetryOptions = {
|
||||
@@ -129,6 +130,8 @@ export async function handler(
|
||||
logger.metric({ source: billingSource })
|
||||
const modelTpmLimiter = createModelTpmLimiter(modelInfo.providers)
|
||||
const modelTpmLimits = await modelTpmLimiter?.check()
|
||||
const modelTpsLimiter = createModelTpsLimiter(modelInfo.providers)
|
||||
const modelTpsLimits = await modelTpsLimiter?.check()
|
||||
|
||||
const retriableRequest = async (retry: RetryOptions = { excludeProviders: [], retryCount: 0 }) => {
|
||||
const providerInfo = selectProvider(
|
||||
@@ -142,6 +145,7 @@ export async function handler(
|
||||
retry,
|
||||
stickyProvider,
|
||||
modelTpmLimits,
|
||||
modelTpsLimits,
|
||||
)
|
||||
validateModelSettings(billingSource, authInfo)
|
||||
updateProviderKey(authInfo, providerInfo)
|
||||
@@ -294,14 +298,17 @@ export async function handler(
|
||||
|
||||
let buffer = ""
|
||||
let responseLength = 0
|
||||
let timestampFirstByte = 0
|
||||
let timestampLastByte = 0
|
||||
|
||||
function pump(): Promise<void> {
|
||||
return (
|
||||
reader?.read().then(async ({ done, value: rawValue }) => {
|
||||
if (done) {
|
||||
const timestampLastByte = Date.now()
|
||||
logger.metric({
|
||||
response_length: responseLength,
|
||||
"timestamp.last_byte": Date.now(),
|
||||
"timestamp.last_byte": timestampLastByte,
|
||||
})
|
||||
dataDumper?.flush()
|
||||
await rateLimiter?.track()
|
||||
@@ -311,6 +318,13 @@ export async function handler(
|
||||
const costInfo = calculateCost(modelInfo, usageInfo)
|
||||
await trialLimiter?.track(usageInfo)
|
||||
await modelTpmLimiter?.track(providerInfo.id, providerInfo.model, usageInfo)
|
||||
await modelTpsLimiter?.track(
|
||||
providerInfo.id,
|
||||
providerInfo.model,
|
||||
timestampFirstByte,
|
||||
timestampLastByte,
|
||||
usageInfo,
|
||||
)
|
||||
await trackUsage(sessionId, billingSource, authInfo, modelInfo, providerInfo, usageInfo, costInfo)
|
||||
await reload(billingSource, authInfo, costInfo)
|
||||
const cost = calculateOccurredCost(billingSource, costInfo)
|
||||
@@ -321,10 +335,10 @@ export async function handler(
|
||||
}
|
||||
|
||||
if (responseLength === 0) {
|
||||
const now = Date.now()
|
||||
timestampFirstByte = Date.now()
|
||||
logger.metric({
|
||||
time_to_first_byte: now - startTimestamp,
|
||||
"timestamp.first_byte": now,
|
||||
time_to_first_byte: timestampFirstByte - startTimestamp,
|
||||
"timestamp.first_byte": timestampFirstByte,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -478,6 +492,7 @@ export async function handler(
|
||||
retry: RetryOptions,
|
||||
stickyProvider: string | undefined,
|
||||
modelTpmLimits: Record<string, number> | undefined,
|
||||
modelTpsLimits: Record<string, boolean> | undefined,
|
||||
) {
|
||||
const modelProvider = (() => {
|
||||
// Byok is top priority b/c if user set their own API key, we should use it
|
||||
@@ -509,6 +524,11 @@ export async function handler(
|
||||
const usage = modelTpmLimits?.[`${provider.id}/${provider.model}`] ?? 0
|
||||
return usage < provider.tpmLimit * 1_000_000
|
||||
})
|
||||
.filter((provider) => {
|
||||
if (!provider.tpsGoal) return true
|
||||
const isLowTps = modelTpsLimits?.[`${provider.id}/${provider.model}`] ?? false
|
||||
return !isLowTps
|
||||
})
|
||||
.map((provider) => {
|
||||
topPriority = Math.min(topPriority, provider.priority)
|
||||
return provider
|
||||
|
||||
89
packages/console/app/src/routes/zen/util/modelTpsLimiter.ts
Normal file
89
packages/console/app/src/routes/zen/util/modelTpsLimiter.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
import { and, Database, inArray, sql } from "@opencode-ai/console-core/drizzle/index.js"
|
||||
import { ModelTpsRateLimitTable } from "@opencode-ai/console-core/schema/ip.sql.js"
|
||||
import { UsageInfo } from "./provider/provider"
|
||||
|
||||
export function createModelTpsLimiter(providers: { id: string; model: string; tpsGoal?: number }[]) {
|
||||
const tpsGoals = Object.fromEntries(
|
||||
providers.flatMap((p) => {
|
||||
return p.tpsGoal ? [[`${p.id}/${p.model}`, p.tpsGoal]] : []
|
||||
}),
|
||||
)
|
||||
const ids = Object.keys(tpsGoals)
|
||||
if (ids.length === 0) return
|
||||
|
||||
const toInterval = (date: Date) =>
|
||||
parseInt(
|
||||
date
|
||||
.toISOString()
|
||||
.replace(/[^0-9]/g, "")
|
||||
.substring(0, 12),
|
||||
)
|
||||
const now = Date.now()
|
||||
const currInterval = toInterval(new Date(now))
|
||||
const prevInterval = toInterval(new Date(now - 60 * 1000))
|
||||
|
||||
return {
|
||||
check: async () => {
|
||||
const data = await Database.use((tx) =>
|
||||
tx
|
||||
.select()
|
||||
.from(ModelTpsRateLimitTable)
|
||||
.where(
|
||||
and(
|
||||
inArray(ModelTpsRateLimitTable.id, ids),
|
||||
inArray(ModelTpsRateLimitTable.interval, [currInterval, prevInterval]),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
// convert to map of model to summed count across current and previous intervals
|
||||
const result = data.reduce(
|
||||
(acc, curr) => {
|
||||
const existing = acc[curr.id] ?? { qualify: 0, unqualify: 0 }
|
||||
acc[curr.id] = {
|
||||
qualify: existing.qualify + curr.qualify,
|
||||
unqualify: existing.unqualify + curr.unqualify,
|
||||
}
|
||||
return acc
|
||||
},
|
||||
{} as Record<string, { qualify: number; unqualify: number }>,
|
||||
)
|
||||
|
||||
return Object.fromEntries(
|
||||
Object.entries(result).map(([id, { qualify, unqualify }]) => {
|
||||
const isLowTps = qualify + unqualify > 10 && qualify < unqualify
|
||||
return [id, isLowTps]
|
||||
}),
|
||||
)
|
||||
},
|
||||
track: async (provider: string, model: string, tsFirstByte: number, tsLastByte: number, usageInfo: UsageInfo) => {
|
||||
const id = `${provider}/${model}`
|
||||
if (!ids.includes(id)) return
|
||||
const tpsGoal = tpsGoals[id]
|
||||
if (!tpsGoal) return
|
||||
if (tsFirstByte <= 0 || tsLastByte <= 0) return
|
||||
const tokens = usageInfo.outputTokens
|
||||
if (tokens <= 10) return
|
||||
|
||||
const tps = (tokens / (tsLastByte - tsFirstByte)) * 1000
|
||||
const qualify = tps >= tpsGoal ? 1 : 0
|
||||
const unqualify = tps < tpsGoal ? 1 : 0
|
||||
await Database.use((tx) =>
|
||||
tx
|
||||
.insert(ModelTpsRateLimitTable)
|
||||
.values({
|
||||
id,
|
||||
interval: currInterval,
|
||||
qualify,
|
||||
unqualify,
|
||||
})
|
||||
.onDuplicateKeyUpdate({
|
||||
set: {
|
||||
qualify: sql`${ModelTpsRateLimitTable.qualify} + ${qualify}`,
|
||||
unqualify: sql`${ModelTpsRateLimitTable.unqualify} + ${unqualify}`,
|
||||
},
|
||||
}),
|
||||
)
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
CREATE TABLE `model_tps_rate_limit` (
|
||||
`id` varchar(255) NOT NULL,
|
||||
`interval` bigint NOT NULL,
|
||||
`qualify` int NOT NULL,
|
||||
`unqualify` int NOT NULL,
|
||||
CONSTRAINT PRIMARY KEY(`id`,`interval`)
|
||||
);
|
||||
File diff suppressed because it is too large
Load Diff
@@ -36,6 +36,7 @@ export namespace ZenData {
|
||||
model: z.string(),
|
||||
priority: z.number().optional(),
|
||||
tpmLimit: z.number().optional(),
|
||||
tpsGoal: z.number().optional(),
|
||||
weight: z.number().optional(),
|
||||
disabled: z.boolean().optional(),
|
||||
storeModel: z.string().optional(),
|
||||
|
||||
@@ -40,3 +40,14 @@ export const ModelTpmRateLimitTable = mysqlTable(
|
||||
},
|
||||
(table) => [primaryKey({ columns: [table.id, table.interval] })],
|
||||
)
|
||||
|
||||
export const ModelTpsRateLimitTable = mysqlTable(
|
||||
"model_tps_rate_limit",
|
||||
{
|
||||
id: varchar("id", { length: 255 }).notNull(),
|
||||
interval: bigint("interval", { mode: "number" }).notNull(),
|
||||
qualify: int("qualify").notNull(),
|
||||
unqualify: int("unqualify").notNull(),
|
||||
},
|
||||
(table) => [primaryKey({ columns: [table.id, table.interval] })],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user