perf(opencode): batch snapshot diffFull blob reads (#20752)
Co-authored-by: Nate Williams <50088025+natewill@users.noreply.github.com>pull/20958/head
parent
59ca4543d8
commit
288eb044cb
|
|
@ -437,6 +437,146 @@ export namespace Snapshot {
|
|||
const diffFull = Effect.fnUntraced(function* (from: string, to: string) {
|
||||
return yield* locked(
|
||||
Effect.gen(function* () {
|
||||
type Row = {
|
||||
file: string
|
||||
status: "added" | "deleted" | "modified"
|
||||
binary: boolean
|
||||
additions: number
|
||||
deletions: number
|
||||
}
|
||||
|
||||
type Ref = {
|
||||
file: string
|
||||
side: "before" | "after"
|
||||
ref: string
|
||||
}
|
||||
|
||||
const show = Effect.fnUntraced(function* (row: Row) {
|
||||
if (row.binary) return ["", ""]
|
||||
if (row.status === "added") {
|
||||
return [
|
||||
"",
|
||||
yield* git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe(
|
||||
Effect.map((item) => item.text),
|
||||
),
|
||||
]
|
||||
}
|
||||
if (row.status === "deleted") {
|
||||
return [
|
||||
yield* git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe(
|
||||
Effect.map((item) => item.text),
|
||||
),
|
||||
"",
|
||||
]
|
||||
}
|
||||
return yield* Effect.all(
|
||||
[
|
||||
git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe(Effect.map((item) => item.text)),
|
||||
git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe(Effect.map((item) => item.text)),
|
||||
],
|
||||
{ concurrency: 2 },
|
||||
)
|
||||
})
|
||||
|
||||
const load = Effect.fnUntraced(
|
||||
function* (rows: Row[]) {
|
||||
const refs = rows.flatMap((row) => {
|
||||
if (row.binary) return []
|
||||
if (row.status === "added")
|
||||
return [{ file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref]
|
||||
if (row.status === "deleted") {
|
||||
return [{ file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref]
|
||||
}
|
||||
return [
|
||||
{ file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref,
|
||||
{ file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref,
|
||||
]
|
||||
})
|
||||
if (!refs.length) return new Map<string, { before: string; after: string }>()
|
||||
|
||||
const proc = ChildProcess.make("git", [...cfg, ...args(["cat-file", "--batch"])], {
|
||||
cwd: state.directory,
|
||||
extendEnv: true,
|
||||
stdin: Stream.make(new TextEncoder().encode(refs.map((item) => item.ref).join("\n") + "\n")),
|
||||
})
|
||||
const handle = yield* spawner.spawn(proc)
|
||||
const [out, err] = yield* Effect.all(
|
||||
[Stream.mkUint8Array(handle.stdout), Stream.mkString(Stream.decodeText(handle.stderr))],
|
||||
{ concurrency: 2 },
|
||||
)
|
||||
const code = yield* handle.exitCode
|
||||
if (code !== 0) {
|
||||
log.info("git cat-file --batch failed during snapshot diff, falling back to per-file git show", {
|
||||
stderr: err,
|
||||
refs: refs.length,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const fail = (msg: string, extra?: Record<string, string>) => {
|
||||
log.info(msg, { ...extra, refs: refs.length })
|
||||
return undefined
|
||||
}
|
||||
|
||||
const map = new Map<string, { before: string; after: string }>()
|
||||
const dec = new TextDecoder()
|
||||
let i = 0
|
||||
// Parse the default `git cat-file --batch` stream: one header line,
|
||||
// then exactly `size` bytes of blob content, then a trailing newline.
|
||||
for (const ref of refs) {
|
||||
let end = i
|
||||
while (end < out.length && out[end] !== 10) end += 1
|
||||
if (end >= out.length) {
|
||||
return fail(
|
||||
"git cat-file --batch returned a truncated header during snapshot diff, falling back to per-file git show",
|
||||
)
|
||||
}
|
||||
|
||||
const head = dec.decode(out.slice(i, end))
|
||||
i = end + 1
|
||||
const hit = map.get(ref.file) ?? { before: "", after: "" }
|
||||
if (head.endsWith(" missing")) {
|
||||
map.set(ref.file, hit)
|
||||
continue
|
||||
}
|
||||
|
||||
const match = head.match(/^[0-9a-f]+ blob (\d+)$/)
|
||||
if (!match) {
|
||||
return fail(
|
||||
"git cat-file --batch returned an unexpected header during snapshot diff, falling back to per-file git show",
|
||||
{ head },
|
||||
)
|
||||
}
|
||||
|
||||
const size = Number(match[1])
|
||||
if (!Number.isInteger(size) || size < 0 || i + size >= out.length || out[i + size] !== 10) {
|
||||
return fail(
|
||||
"git cat-file --batch returned truncated content during snapshot diff, falling back to per-file git show",
|
||||
{ head },
|
||||
)
|
||||
}
|
||||
|
||||
const text = dec.decode(out.slice(i, i + size))
|
||||
if (ref.side === "before") hit.before = text
|
||||
if (ref.side === "after") hit.after = text
|
||||
map.set(ref.file, hit)
|
||||
i += size + 1
|
||||
}
|
||||
|
||||
if (i !== out.length) {
|
||||
return fail(
|
||||
"git cat-file --batch returned trailing data during snapshot diff, falling back to per-file git show",
|
||||
)
|
||||
}
|
||||
|
||||
return map
|
||||
},
|
||||
Effect.scoped,
|
||||
Effect.catch(() =>
|
||||
Effect.succeed<Map<string, { before: string; after: string }> | undefined>(undefined),
|
||||
),
|
||||
)
|
||||
|
||||
const result: Snapshot.FileDiff[] = []
|
||||
const status = new Map<string, "added" | "deleted" | "modified">()
|
||||
|
||||
|
|
@ -459,30 +599,45 @@ export namespace Snapshot {
|
|||
},
|
||||
)
|
||||
|
||||
for (const line of numstat.text.trim().split("\n")) {
|
||||
if (!line) continue
|
||||
const [adds, dels, file] = line.split("\t")
|
||||
if (!file) continue
|
||||
const binary = adds === "-" && dels === "-"
|
||||
const [before, after] = binary
|
||||
? ["", ""]
|
||||
: yield* Effect.all(
|
||||
[
|
||||
git([...cfg, ...args(["show", `${from}:${file}`])]).pipe(Effect.map((item) => item.text)),
|
||||
git([...cfg, ...args(["show", `${to}:${file}`])]).pipe(Effect.map((item) => item.text)),
|
||||
],
|
||||
{ concurrency: 2 },
|
||||
)
|
||||
const additions = binary ? 0 : parseInt(adds)
|
||||
const deletions = binary ? 0 : parseInt(dels)
|
||||
result.push({
|
||||
file,
|
||||
before,
|
||||
after,
|
||||
additions: Number.isFinite(additions) ? additions : 0,
|
||||
deletions: Number.isFinite(deletions) ? deletions : 0,
|
||||
status: status.get(file) ?? "modified",
|
||||
const rows = numstat.text
|
||||
.trim()
|
||||
.split("\n")
|
||||
.filter(Boolean)
|
||||
.flatMap((line) => {
|
||||
const [adds, dels, file] = line.split("\t")
|
||||
if (!file) return []
|
||||
const binary = adds === "-" && dels === "-"
|
||||
const additions = binary ? 0 : parseInt(adds)
|
||||
const deletions = binary ? 0 : parseInt(dels)
|
||||
return [
|
||||
{
|
||||
file,
|
||||
status: status.get(file) ?? "modified",
|
||||
binary,
|
||||
additions: Number.isFinite(additions) ? additions : 0,
|
||||
deletions: Number.isFinite(deletions) ? deletions : 0,
|
||||
} satisfies Row,
|
||||
]
|
||||
})
|
||||
const step = 100
|
||||
|
||||
// Keep batches bounded so a large diff does not buffer every blob at once.
|
||||
for (let i = 0; i < rows.length; i += step) {
|
||||
const run = rows.slice(i, i + step)
|
||||
const text = yield* load(run)
|
||||
|
||||
for (const row of run) {
|
||||
const hit = text?.get(row.file) ?? { before: "", after: "" }
|
||||
const [before, after] = row.binary ? ["", ""] : text ? [hit.before, hit.after] : yield* show(row)
|
||||
result.push({
|
||||
file: row.file,
|
||||
before,
|
||||
after,
|
||||
additions: row.additions,
|
||||
deletions: row.deletions,
|
||||
status: row.status,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -982,6 +982,98 @@ test("diffFull with new file additions", async () => {
|
|||
})
|
||||
})
|
||||
|
||||
test("diffFull with a large interleaved mixed diff", async () => {
|
||||
await using tmp = await bootstrap()
|
||||
await Instance.provide({
|
||||
directory: tmp.path,
|
||||
fn: async () => {
|
||||
const ids = Array.from({ length: 60 }, (_, i) => i.toString().padStart(3, "0"))
|
||||
const mod = ids.map((id) => fwd(tmp.path, "mix", `${id}-mod.txt`))
|
||||
const del = ids.map((id) => fwd(tmp.path, "mix", `${id}-del.txt`))
|
||||
const add = ids.map((id) => fwd(tmp.path, "mix", `${id}-add.txt`))
|
||||
const bin = ids.map((id) => fwd(tmp.path, "mix", `${id}-bin.bin`))
|
||||
|
||||
await $`mkdir -p ${tmp.path}/mix`.quiet()
|
||||
await Promise.all([
|
||||
...mod.map((file, i) => Filesystem.write(file, `before-${ids[i]}-é\n🙂\nline`)),
|
||||
...del.map((file, i) => Filesystem.write(file, `gone-${ids[i]}\n你好`)),
|
||||
...bin.map((file, i) => Filesystem.write(file, new Uint8Array([0, i, 255, i % 251]))),
|
||||
])
|
||||
|
||||
const before = await Snapshot.track()
|
||||
expect(before).toBeTruthy()
|
||||
|
||||
await Promise.all([
|
||||
...mod.map((file, i) => Filesystem.write(file, `after-${ids[i]}-é\n🚀\nline`)),
|
||||
...add.map((file, i) => Filesystem.write(file, `new-${ids[i]}\nこんにちは`)),
|
||||
...bin.map((file, i) => Filesystem.write(file, new Uint8Array([9, i, 8, i % 251]))),
|
||||
...del.map((file) => fs.rm(file)),
|
||||
])
|
||||
|
||||
const after = await Snapshot.track()
|
||||
expect(after).toBeTruthy()
|
||||
|
||||
const diffs = await Snapshot.diffFull(before!, after!)
|
||||
expect(diffs).toHaveLength(ids.length * 4)
|
||||
|
||||
const map = new Map(diffs.map((item) => [item.file, item]))
|
||||
for (let i = 0; i < ids.length; i++) {
|
||||
const m = map.get(fwd("mix", `${ids[i]}-mod.txt`))
|
||||
expect(m).toBeDefined()
|
||||
expect(m!.before).toBe(`before-${ids[i]}-é\n🙂\nline`)
|
||||
expect(m!.after).toBe(`after-${ids[i]}-é\n🚀\nline`)
|
||||
expect(m!.status).toBe("modified")
|
||||
|
||||
const d = map.get(fwd("mix", `${ids[i]}-del.txt`))
|
||||
expect(d).toBeDefined()
|
||||
expect(d!.before).toBe(`gone-${ids[i]}\n你好`)
|
||||
expect(d!.after).toBe("")
|
||||
expect(d!.status).toBe("deleted")
|
||||
|
||||
const a = map.get(fwd("mix", `${ids[i]}-add.txt`))
|
||||
expect(a).toBeDefined()
|
||||
expect(a!.before).toBe("")
|
||||
expect(a!.after).toBe(`new-${ids[i]}\nこんにちは`)
|
||||
expect(a!.status).toBe("added")
|
||||
|
||||
const b = map.get(fwd("mix", `${ids[i]}-bin.bin`))
|
||||
expect(b).toBeDefined()
|
||||
expect(b!.before).toBe("")
|
||||
expect(b!.after).toBe("")
|
||||
expect(b!.additions).toBe(0)
|
||||
expect(b!.deletions).toBe(0)
|
||||
expect(b!.status).toBe("modified")
|
||||
}
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
test("diffFull preserves git diff order across batch boundaries", async () => {
|
||||
await using tmp = await bootstrap()
|
||||
await Instance.provide({
|
||||
directory: tmp.path,
|
||||
fn: async () => {
|
||||
const ids = Array.from({ length: 140 }, (_, i) => i.toString().padStart(3, "0"))
|
||||
|
||||
await $`mkdir -p ${tmp.path}/order`.quiet()
|
||||
await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `before-${id}`)))
|
||||
|
||||
const before = await Snapshot.track()
|
||||
expect(before).toBeTruthy()
|
||||
|
||||
await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `after-${id}`)))
|
||||
|
||||
const after = await Snapshot.track()
|
||||
expect(after).toBeTruthy()
|
||||
|
||||
const expected = ids.map((id) => `order/${id}.txt`)
|
||||
|
||||
const diffs = await Snapshot.diffFull(before!, after!)
|
||||
expect(diffs.map((item) => item.file)).toEqual(expected)
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
test("diffFull with file modifications", async () => {
|
||||
await using tmp = await bootstrap()
|
||||
await Instance.provide({
|
||||
|
|
|
|||
Loading…
Reference in New Issue