Compare commits

...

4 Commits

Author SHA1 Message Date
Dax Raad 2d89c22f52 Merge branch 'dev' into feature/tui-assistant-tokens-per-second 2026-02-21 14:13:16 -05:00
Dax Raad 9d7cc2c6db fix 2026-02-21 14:13:05 -05:00
Dax Raad daac4e5599 tui: show streaming tokens-per-second in real-time during model responses
Previously, users only saw tok/s metrics after a message completed. Now

the TUI displays live streaming speed that updates throughout the response,

giving immediate feedback on model performance.
2026-02-21 13:21:22 -05:00
Dax Raad dd6e8f093e feat(tui): show tokens per second in assistant message footer 2026-02-20 17:39:46 -05:00
4 changed files with 47 additions and 9 deletions

View File

@ -1101,6 +1101,7 @@ export function Session() {
</Match>
<Match when={message.role === "assistant"}>
<AssistantMessage
index={index()}
last={lastAssistant()?.id === message.id}
message={message as AssistantMessage}
parts={sync.data.part[message.id] ?? []}
@ -1269,7 +1270,7 @@ function UserMessage(props: {
)
}
function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; last: boolean }) {
function AssistantMessage(props: { index: number; message: AssistantMessage; parts: Part[]; last: boolean }) {
const local = useLocal()
const { theme } = useTheme()
const sync = useSync()
@ -1279,12 +1280,35 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
return props.message.finish && !["tool-calls", "unknown"].includes(props.message.finish)
})
const duration = createMemo(() => {
if (!final()) return 0
if (!props.message.time.completed) return 0
const user = messages().find((x) => x.role === "user" && x.id === props.message.parentID)
if (!user || !user.time) return 0
return props.message.time.completed - user.time.created
const stats = createMemo(() => {
// if (!final() || !props.message.time.completed) return null
const list = messages()
const stats = {
duration: 0,
tps: [] as number[],
}
for (let i = props.index; i >= 0; i--) {
const msg = list[i]
if (msg.id === props.message.parentID && msg.role === "user") {
stats.duration = (props.message.time.completed ?? Date.now()) - msg.time.created
return {
duration: (props.message.time.completed ?? Date.now()) - msg.time.created,
tps: stats.tps.reduce((sum, x) => sum + x, 0) / stats.tps.length,
}
}
if (msg.role === "assistant") {
if (msg.tokens.output && msg.time.started && msg.time.streamed) {
const duration = msg.time.streamed - msg.time.started
const tps = msg.tokens.output / (duration / 1000)
stats.tps.push(tps)
}
}
}
return null
})
return (
@ -1334,8 +1358,14 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las
</span>{" "}
<span style={{ fg: theme.text }}>{Locale.titlecase(props.message.mode)}</span>
<span style={{ fg: theme.textMuted }}> · {props.message.modelID}</span>
<Show when={duration()}>
<span style={{ fg: theme.textMuted }}> · {Locale.duration(duration())}</span>
<Show when={stats()}>
{(s) => (
<span style={{ fg: theme.textMuted }}>
{" "}
· {Locale.duration(s().duration)}
<Show when={s().tps > 0}> · {s().tps.toFixed(0)} tok/s</Show>
</span>
)}
</Show>
<Show when={props.message.error?.name === "MessageAbortedError"}>
<span style={{ fg: theme.textMuted }}> · interrupted</span>

View File

@ -392,6 +392,8 @@ export namespace MessageV2 {
role: z.literal("assistant"),
time: z.object({
created: z.number(),
started: z.number().optional(),
streamed: z.number().optional(),
completed: z.number().optional(),
}),
error: z

View File

@ -57,6 +57,8 @@ export namespace SessionProcessor {
switch (value.type) {
case "start":
SessionStatus.set(input.sessionID, { type: "busy" })
input.assistantMessage.time.started = Date.now()
await Session.updateMessage(input.assistantMessage)
break
case "reasoning-start":
@ -334,6 +336,8 @@ export namespace SessionProcessor {
await Session.updatePart(currentText)
}
currentText = undefined
input.assistantMessage.time.streamed = Date.now()
await Session.updateMessage(input.assistantMessage)
break
case "finish":

View File

@ -207,6 +207,8 @@ export type AssistantMessage = {
role: "assistant"
time: {
created: number
started?: number
streamed?: number
completed?: number
}
error?: