perf(opencode): batch snapshot diffFull blob reads (#20752)
Co-authored-by: Nate Williams <50088025+natewill@users.noreply.github.com>pull/20958/head
parent
59ca4543d8
commit
288eb044cb
|
|
@ -437,6 +437,146 @@ export namespace Snapshot {
|
||||||
const diffFull = Effect.fnUntraced(function* (from: string, to: string) {
|
const diffFull = Effect.fnUntraced(function* (from: string, to: string) {
|
||||||
return yield* locked(
|
return yield* locked(
|
||||||
Effect.gen(function* () {
|
Effect.gen(function* () {
|
||||||
|
type Row = {
|
||||||
|
file: string
|
||||||
|
status: "added" | "deleted" | "modified"
|
||||||
|
binary: boolean
|
||||||
|
additions: number
|
||||||
|
deletions: number
|
||||||
|
}
|
||||||
|
|
||||||
|
type Ref = {
|
||||||
|
file: string
|
||||||
|
side: "before" | "after"
|
||||||
|
ref: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const show = Effect.fnUntraced(function* (row: Row) {
|
||||||
|
if (row.binary) return ["", ""]
|
||||||
|
if (row.status === "added") {
|
||||||
|
return [
|
||||||
|
"",
|
||||||
|
yield* git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe(
|
||||||
|
Effect.map((item) => item.text),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
if (row.status === "deleted") {
|
||||||
|
return [
|
||||||
|
yield* git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe(
|
||||||
|
Effect.map((item) => item.text),
|
||||||
|
),
|
||||||
|
"",
|
||||||
|
]
|
||||||
|
}
|
||||||
|
return yield* Effect.all(
|
||||||
|
[
|
||||||
|
git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe(Effect.map((item) => item.text)),
|
||||||
|
git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe(Effect.map((item) => item.text)),
|
||||||
|
],
|
||||||
|
{ concurrency: 2 },
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
const load = Effect.fnUntraced(
|
||||||
|
function* (rows: Row[]) {
|
||||||
|
const refs = rows.flatMap((row) => {
|
||||||
|
if (row.binary) return []
|
||||||
|
if (row.status === "added")
|
||||||
|
return [{ file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref]
|
||||||
|
if (row.status === "deleted") {
|
||||||
|
return [{ file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref]
|
||||||
|
}
|
||||||
|
return [
|
||||||
|
{ file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref,
|
||||||
|
{ file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref,
|
||||||
|
]
|
||||||
|
})
|
||||||
|
if (!refs.length) return new Map<string, { before: string; after: string }>()
|
||||||
|
|
||||||
|
const proc = ChildProcess.make("git", [...cfg, ...args(["cat-file", "--batch"])], {
|
||||||
|
cwd: state.directory,
|
||||||
|
extendEnv: true,
|
||||||
|
stdin: Stream.make(new TextEncoder().encode(refs.map((item) => item.ref).join("\n") + "\n")),
|
||||||
|
})
|
||||||
|
const handle = yield* spawner.spawn(proc)
|
||||||
|
const [out, err] = yield* Effect.all(
|
||||||
|
[Stream.mkUint8Array(handle.stdout), Stream.mkString(Stream.decodeText(handle.stderr))],
|
||||||
|
{ concurrency: 2 },
|
||||||
|
)
|
||||||
|
const code = yield* handle.exitCode
|
||||||
|
if (code !== 0) {
|
||||||
|
log.info("git cat-file --batch failed during snapshot diff, falling back to per-file git show", {
|
||||||
|
stderr: err,
|
||||||
|
refs: refs.length,
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const fail = (msg: string, extra?: Record<string, string>) => {
|
||||||
|
log.info(msg, { ...extra, refs: refs.length })
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
|
||||||
|
const map = new Map<string, { before: string; after: string }>()
|
||||||
|
const dec = new TextDecoder()
|
||||||
|
let i = 0
|
||||||
|
// Parse the default `git cat-file --batch` stream: one header line,
|
||||||
|
// then exactly `size` bytes of blob content, then a trailing newline.
|
||||||
|
for (const ref of refs) {
|
||||||
|
let end = i
|
||||||
|
while (end < out.length && out[end] !== 10) end += 1
|
||||||
|
if (end >= out.length) {
|
||||||
|
return fail(
|
||||||
|
"git cat-file --batch returned a truncated header during snapshot diff, falling back to per-file git show",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const head = dec.decode(out.slice(i, end))
|
||||||
|
i = end + 1
|
||||||
|
const hit = map.get(ref.file) ?? { before: "", after: "" }
|
||||||
|
if (head.endsWith(" missing")) {
|
||||||
|
map.set(ref.file, hit)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
const match = head.match(/^[0-9a-f]+ blob (\d+)$/)
|
||||||
|
if (!match) {
|
||||||
|
return fail(
|
||||||
|
"git cat-file --batch returned an unexpected header during snapshot diff, falling back to per-file git show",
|
||||||
|
{ head },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const size = Number(match[1])
|
||||||
|
if (!Number.isInteger(size) || size < 0 || i + size >= out.length || out[i + size] !== 10) {
|
||||||
|
return fail(
|
||||||
|
"git cat-file --batch returned truncated content during snapshot diff, falling back to per-file git show",
|
||||||
|
{ head },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
const text = dec.decode(out.slice(i, i + size))
|
||||||
|
if (ref.side === "before") hit.before = text
|
||||||
|
if (ref.side === "after") hit.after = text
|
||||||
|
map.set(ref.file, hit)
|
||||||
|
i += size + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i !== out.length) {
|
||||||
|
return fail(
|
||||||
|
"git cat-file --batch returned trailing data during snapshot diff, falling back to per-file git show",
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
return map
|
||||||
|
},
|
||||||
|
Effect.scoped,
|
||||||
|
Effect.catch(() =>
|
||||||
|
Effect.succeed<Map<string, { before: string; after: string }> | undefined>(undefined),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
const result: Snapshot.FileDiff[] = []
|
const result: Snapshot.FileDiff[] = []
|
||||||
const status = new Map<string, "added" | "deleted" | "modified">()
|
const status = new Map<string, "added" | "deleted" | "modified">()
|
||||||
|
|
||||||
|
|
@ -459,30 +599,45 @@ export namespace Snapshot {
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
for (const line of numstat.text.trim().split("\n")) {
|
const rows = numstat.text
|
||||||
if (!line) continue
|
.trim()
|
||||||
|
.split("\n")
|
||||||
|
.filter(Boolean)
|
||||||
|
.flatMap((line) => {
|
||||||
const [adds, dels, file] = line.split("\t")
|
const [adds, dels, file] = line.split("\t")
|
||||||
if (!file) continue
|
if (!file) return []
|
||||||
const binary = adds === "-" && dels === "-"
|
const binary = adds === "-" && dels === "-"
|
||||||
const [before, after] = binary
|
|
||||||
? ["", ""]
|
|
||||||
: yield* Effect.all(
|
|
||||||
[
|
|
||||||
git([...cfg, ...args(["show", `${from}:${file}`])]).pipe(Effect.map((item) => item.text)),
|
|
||||||
git([...cfg, ...args(["show", `${to}:${file}`])]).pipe(Effect.map((item) => item.text)),
|
|
||||||
],
|
|
||||||
{ concurrency: 2 },
|
|
||||||
)
|
|
||||||
const additions = binary ? 0 : parseInt(adds)
|
const additions = binary ? 0 : parseInt(adds)
|
||||||
const deletions = binary ? 0 : parseInt(dels)
|
const deletions = binary ? 0 : parseInt(dels)
|
||||||
result.push({
|
return [
|
||||||
|
{
|
||||||
file,
|
file,
|
||||||
before,
|
status: status.get(file) ?? "modified",
|
||||||
after,
|
binary,
|
||||||
additions: Number.isFinite(additions) ? additions : 0,
|
additions: Number.isFinite(additions) ? additions : 0,
|
||||||
deletions: Number.isFinite(deletions) ? deletions : 0,
|
deletions: Number.isFinite(deletions) ? deletions : 0,
|
||||||
status: status.get(file) ?? "modified",
|
} satisfies Row,
|
||||||
|
]
|
||||||
})
|
})
|
||||||
|
const step = 100
|
||||||
|
|
||||||
|
// Keep batches bounded so a large diff does not buffer every blob at once.
|
||||||
|
for (let i = 0; i < rows.length; i += step) {
|
||||||
|
const run = rows.slice(i, i + step)
|
||||||
|
const text = yield* load(run)
|
||||||
|
|
||||||
|
for (const row of run) {
|
||||||
|
const hit = text?.get(row.file) ?? { before: "", after: "" }
|
||||||
|
const [before, after] = row.binary ? ["", ""] : text ? [hit.before, hit.after] : yield* show(row)
|
||||||
|
result.push({
|
||||||
|
file: row.file,
|
||||||
|
before,
|
||||||
|
after,
|
||||||
|
additions: row.additions,
|
||||||
|
deletions: row.deletions,
|
||||||
|
status: row.status,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
|
||||||
|
|
@ -982,6 +982,98 @@ test("diffFull with new file additions", async () => {
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
test("diffFull with a large interleaved mixed diff", async () => {
|
||||||
|
await using tmp = await bootstrap()
|
||||||
|
await Instance.provide({
|
||||||
|
directory: tmp.path,
|
||||||
|
fn: async () => {
|
||||||
|
const ids = Array.from({ length: 60 }, (_, i) => i.toString().padStart(3, "0"))
|
||||||
|
const mod = ids.map((id) => fwd(tmp.path, "mix", `${id}-mod.txt`))
|
||||||
|
const del = ids.map((id) => fwd(tmp.path, "mix", `${id}-del.txt`))
|
||||||
|
const add = ids.map((id) => fwd(tmp.path, "mix", `${id}-add.txt`))
|
||||||
|
const bin = ids.map((id) => fwd(tmp.path, "mix", `${id}-bin.bin`))
|
||||||
|
|
||||||
|
await $`mkdir -p ${tmp.path}/mix`.quiet()
|
||||||
|
await Promise.all([
|
||||||
|
...mod.map((file, i) => Filesystem.write(file, `before-${ids[i]}-é\n🙂\nline`)),
|
||||||
|
...del.map((file, i) => Filesystem.write(file, `gone-${ids[i]}\n你好`)),
|
||||||
|
...bin.map((file, i) => Filesystem.write(file, new Uint8Array([0, i, 255, i % 251]))),
|
||||||
|
])
|
||||||
|
|
||||||
|
const before = await Snapshot.track()
|
||||||
|
expect(before).toBeTruthy()
|
||||||
|
|
||||||
|
await Promise.all([
|
||||||
|
...mod.map((file, i) => Filesystem.write(file, `after-${ids[i]}-é\n🚀\nline`)),
|
||||||
|
...add.map((file, i) => Filesystem.write(file, `new-${ids[i]}\nこんにちは`)),
|
||||||
|
...bin.map((file, i) => Filesystem.write(file, new Uint8Array([9, i, 8, i % 251]))),
|
||||||
|
...del.map((file) => fs.rm(file)),
|
||||||
|
])
|
||||||
|
|
||||||
|
const after = await Snapshot.track()
|
||||||
|
expect(after).toBeTruthy()
|
||||||
|
|
||||||
|
const diffs = await Snapshot.diffFull(before!, after!)
|
||||||
|
expect(diffs).toHaveLength(ids.length * 4)
|
||||||
|
|
||||||
|
const map = new Map(diffs.map((item) => [item.file, item]))
|
||||||
|
for (let i = 0; i < ids.length; i++) {
|
||||||
|
const m = map.get(fwd("mix", `${ids[i]}-mod.txt`))
|
||||||
|
expect(m).toBeDefined()
|
||||||
|
expect(m!.before).toBe(`before-${ids[i]}-é\n🙂\nline`)
|
||||||
|
expect(m!.after).toBe(`after-${ids[i]}-é\n🚀\nline`)
|
||||||
|
expect(m!.status).toBe("modified")
|
||||||
|
|
||||||
|
const d = map.get(fwd("mix", `${ids[i]}-del.txt`))
|
||||||
|
expect(d).toBeDefined()
|
||||||
|
expect(d!.before).toBe(`gone-${ids[i]}\n你好`)
|
||||||
|
expect(d!.after).toBe("")
|
||||||
|
expect(d!.status).toBe("deleted")
|
||||||
|
|
||||||
|
const a = map.get(fwd("mix", `${ids[i]}-add.txt`))
|
||||||
|
expect(a).toBeDefined()
|
||||||
|
expect(a!.before).toBe("")
|
||||||
|
expect(a!.after).toBe(`new-${ids[i]}\nこんにちは`)
|
||||||
|
expect(a!.status).toBe("added")
|
||||||
|
|
||||||
|
const b = map.get(fwd("mix", `${ids[i]}-bin.bin`))
|
||||||
|
expect(b).toBeDefined()
|
||||||
|
expect(b!.before).toBe("")
|
||||||
|
expect(b!.after).toBe("")
|
||||||
|
expect(b!.additions).toBe(0)
|
||||||
|
expect(b!.deletions).toBe(0)
|
||||||
|
expect(b!.status).toBe("modified")
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
|
test("diffFull preserves git diff order across batch boundaries", async () => {
|
||||||
|
await using tmp = await bootstrap()
|
||||||
|
await Instance.provide({
|
||||||
|
directory: tmp.path,
|
||||||
|
fn: async () => {
|
||||||
|
const ids = Array.from({ length: 140 }, (_, i) => i.toString().padStart(3, "0"))
|
||||||
|
|
||||||
|
await $`mkdir -p ${tmp.path}/order`.quiet()
|
||||||
|
await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `before-${id}`)))
|
||||||
|
|
||||||
|
const before = await Snapshot.track()
|
||||||
|
expect(before).toBeTruthy()
|
||||||
|
|
||||||
|
await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `after-${id}`)))
|
||||||
|
|
||||||
|
const after = await Snapshot.track()
|
||||||
|
expect(after).toBeTruthy()
|
||||||
|
|
||||||
|
const expected = ids.map((id) => `order/${id}.txt`)
|
||||||
|
|
||||||
|
const diffs = await Snapshot.diffFull(before!, after!)
|
||||||
|
expect(diffs.map((item) => item.file)).toEqual(expected)
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
test("diffFull with file modifications", async () => {
|
test("diffFull with file modifications", async () => {
|
||||||
await using tmp = await bootstrap()
|
await using tmp = await bootstrap()
|
||||||
await Instance.provide({
|
await Instance.provide({
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue