diff --git a/packages/opencode/src/snapshot/index.ts b/packages/opencode/src/snapshot/index.ts index 7c952bc54d..2db67695ff 100644 --- a/packages/opencode/src/snapshot/index.ts +++ b/packages/opencode/src/snapshot/index.ts @@ -437,6 +437,146 @@ export namespace Snapshot { const diffFull = Effect.fnUntraced(function* (from: string, to: string) { return yield* locked( Effect.gen(function* () { + type Row = { + file: string + status: "added" | "deleted" | "modified" + binary: boolean + additions: number + deletions: number + } + + type Ref = { + file: string + side: "before" | "after" + ref: string + } + + const show = Effect.fnUntraced(function* (row: Row) { + if (row.binary) return ["", ""] + if (row.status === "added") { + return [ + "", + yield* git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe( + Effect.map((item) => item.text), + ), + ] + } + if (row.status === "deleted") { + return [ + yield* git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe( + Effect.map((item) => item.text), + ), + "", + ] + } + return yield* Effect.all( + [ + git([...cfg, ...args(["show", `${from}:${row.file}`])]).pipe(Effect.map((item) => item.text)), + git([...cfg, ...args(["show", `${to}:${row.file}`])]).pipe(Effect.map((item) => item.text)), + ], + { concurrency: 2 }, + ) + }) + + const load = Effect.fnUntraced( + function* (rows: Row[]) { + const refs = rows.flatMap((row) => { + if (row.binary) return [] + if (row.status === "added") + return [{ file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref] + if (row.status === "deleted") { + return [{ file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref] + } + return [ + { file: row.file, side: "before", ref: `${from}:${row.file}` } satisfies Ref, + { file: row.file, side: "after", ref: `${to}:${row.file}` } satisfies Ref, + ] + }) + if (!refs.length) return new Map() + + const proc = ChildProcess.make("git", [...cfg, ...args(["cat-file", "--batch"])], { + cwd: state.directory, + extendEnv: true, + stdin: Stream.make(new TextEncoder().encode(refs.map((item) => item.ref).join("\n") + "\n")), + }) + const handle = yield* spawner.spawn(proc) + const [out, err] = yield* Effect.all( + [Stream.mkUint8Array(handle.stdout), Stream.mkString(Stream.decodeText(handle.stderr))], + { concurrency: 2 }, + ) + const code = yield* handle.exitCode + if (code !== 0) { + log.info("git cat-file --batch failed during snapshot diff, falling back to per-file git show", { + stderr: err, + refs: refs.length, + }) + return + } + + const fail = (msg: string, extra?: Record) => { + log.info(msg, { ...extra, refs: refs.length }) + return undefined + } + + const map = new Map() + const dec = new TextDecoder() + let i = 0 + // Parse the default `git cat-file --batch` stream: one header line, + // then exactly `size` bytes of blob content, then a trailing newline. + for (const ref of refs) { + let end = i + while (end < out.length && out[end] !== 10) end += 1 + if (end >= out.length) { + return fail( + "git cat-file --batch returned a truncated header during snapshot diff, falling back to per-file git show", + ) + } + + const head = dec.decode(out.slice(i, end)) + i = end + 1 + const hit = map.get(ref.file) ?? { before: "", after: "" } + if (head.endsWith(" missing")) { + map.set(ref.file, hit) + continue + } + + const match = head.match(/^[0-9a-f]+ blob (\d+)$/) + if (!match) { + return fail( + "git cat-file --batch returned an unexpected header during snapshot diff, falling back to per-file git show", + { head }, + ) + } + + const size = Number(match[1]) + if (!Number.isInteger(size) || size < 0 || i + size >= out.length || out[i + size] !== 10) { + return fail( + "git cat-file --batch returned truncated content during snapshot diff, falling back to per-file git show", + { head }, + ) + } + + const text = dec.decode(out.slice(i, i + size)) + if (ref.side === "before") hit.before = text + if (ref.side === "after") hit.after = text + map.set(ref.file, hit) + i += size + 1 + } + + if (i !== out.length) { + return fail( + "git cat-file --batch returned trailing data during snapshot diff, falling back to per-file git show", + ) + } + + return map + }, + Effect.scoped, + Effect.catch(() => + Effect.succeed | undefined>(undefined), + ), + ) + const result: Snapshot.FileDiff[] = [] const status = new Map() @@ -459,30 +599,45 @@ export namespace Snapshot { }, ) - for (const line of numstat.text.trim().split("\n")) { - if (!line) continue - const [adds, dels, file] = line.split("\t") - if (!file) continue - const binary = adds === "-" && dels === "-" - const [before, after] = binary - ? ["", ""] - : yield* Effect.all( - [ - git([...cfg, ...args(["show", `${from}:${file}`])]).pipe(Effect.map((item) => item.text)), - git([...cfg, ...args(["show", `${to}:${file}`])]).pipe(Effect.map((item) => item.text)), - ], - { concurrency: 2 }, - ) - const additions = binary ? 0 : parseInt(adds) - const deletions = binary ? 0 : parseInt(dels) - result.push({ - file, - before, - after, - additions: Number.isFinite(additions) ? additions : 0, - deletions: Number.isFinite(deletions) ? deletions : 0, - status: status.get(file) ?? "modified", + const rows = numstat.text + .trim() + .split("\n") + .filter(Boolean) + .flatMap((line) => { + const [adds, dels, file] = line.split("\t") + if (!file) return [] + const binary = adds === "-" && dels === "-" + const additions = binary ? 0 : parseInt(adds) + const deletions = binary ? 0 : parseInt(dels) + return [ + { + file, + status: status.get(file) ?? "modified", + binary, + additions: Number.isFinite(additions) ? additions : 0, + deletions: Number.isFinite(deletions) ? deletions : 0, + } satisfies Row, + ] }) + const step = 100 + + // Keep batches bounded so a large diff does not buffer every blob at once. + for (let i = 0; i < rows.length; i += step) { + const run = rows.slice(i, i + step) + const text = yield* load(run) + + for (const row of run) { + const hit = text?.get(row.file) ?? { before: "", after: "" } + const [before, after] = row.binary ? ["", ""] : text ? [hit.before, hit.after] : yield* show(row) + result.push({ + file: row.file, + before, + after, + additions: row.additions, + deletions: row.deletions, + status: row.status, + }) + } } return result diff --git a/packages/opencode/test/snapshot/snapshot.test.ts b/packages/opencode/test/snapshot/snapshot.test.ts index 8dc80721de..0cd9366a53 100644 --- a/packages/opencode/test/snapshot/snapshot.test.ts +++ b/packages/opencode/test/snapshot/snapshot.test.ts @@ -982,6 +982,98 @@ test("diffFull with new file additions", async () => { }) }) +test("diffFull with a large interleaved mixed diff", async () => { + await using tmp = await bootstrap() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const ids = Array.from({ length: 60 }, (_, i) => i.toString().padStart(3, "0")) + const mod = ids.map((id) => fwd(tmp.path, "mix", `${id}-mod.txt`)) + const del = ids.map((id) => fwd(tmp.path, "mix", `${id}-del.txt`)) + const add = ids.map((id) => fwd(tmp.path, "mix", `${id}-add.txt`)) + const bin = ids.map((id) => fwd(tmp.path, "mix", `${id}-bin.bin`)) + + await $`mkdir -p ${tmp.path}/mix`.quiet() + await Promise.all([ + ...mod.map((file, i) => Filesystem.write(file, `before-${ids[i]}-é\n🙂\nline`)), + ...del.map((file, i) => Filesystem.write(file, `gone-${ids[i]}\n你好`)), + ...bin.map((file, i) => Filesystem.write(file, new Uint8Array([0, i, 255, i % 251]))), + ]) + + const before = await Snapshot.track() + expect(before).toBeTruthy() + + await Promise.all([ + ...mod.map((file, i) => Filesystem.write(file, `after-${ids[i]}-é\n🚀\nline`)), + ...add.map((file, i) => Filesystem.write(file, `new-${ids[i]}\nこんにちは`)), + ...bin.map((file, i) => Filesystem.write(file, new Uint8Array([9, i, 8, i % 251]))), + ...del.map((file) => fs.rm(file)), + ]) + + const after = await Snapshot.track() + expect(after).toBeTruthy() + + const diffs = await Snapshot.diffFull(before!, after!) + expect(diffs).toHaveLength(ids.length * 4) + + const map = new Map(diffs.map((item) => [item.file, item])) + for (let i = 0; i < ids.length; i++) { + const m = map.get(fwd("mix", `${ids[i]}-mod.txt`)) + expect(m).toBeDefined() + expect(m!.before).toBe(`before-${ids[i]}-é\n🙂\nline`) + expect(m!.after).toBe(`after-${ids[i]}-é\n🚀\nline`) + expect(m!.status).toBe("modified") + + const d = map.get(fwd("mix", `${ids[i]}-del.txt`)) + expect(d).toBeDefined() + expect(d!.before).toBe(`gone-${ids[i]}\n你好`) + expect(d!.after).toBe("") + expect(d!.status).toBe("deleted") + + const a = map.get(fwd("mix", `${ids[i]}-add.txt`)) + expect(a).toBeDefined() + expect(a!.before).toBe("") + expect(a!.after).toBe(`new-${ids[i]}\nこんにちは`) + expect(a!.status).toBe("added") + + const b = map.get(fwd("mix", `${ids[i]}-bin.bin`)) + expect(b).toBeDefined() + expect(b!.before).toBe("") + expect(b!.after).toBe("") + expect(b!.additions).toBe(0) + expect(b!.deletions).toBe(0) + expect(b!.status).toBe("modified") + } + }, + }) +}) + +test("diffFull preserves git diff order across batch boundaries", async () => { + await using tmp = await bootstrap() + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const ids = Array.from({ length: 140 }, (_, i) => i.toString().padStart(3, "0")) + + await $`mkdir -p ${tmp.path}/order`.quiet() + await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `before-${id}`))) + + const before = await Snapshot.track() + expect(before).toBeTruthy() + + await Promise.all(ids.map((id) => Filesystem.write(`${tmp.path}/order/${id}.txt`, `after-${id}`))) + + const after = await Snapshot.track() + expect(after).toBeTruthy() + + const expected = ids.map((id) => `order/${id}.txt`) + + const diffs = await Snapshot.diffFull(before!, after!) + expect(diffs.map((item) => item.file)).toEqual(expected) + }, + }) +}) + test("diffFull with file modifications", async () => { await using tmp = await bootstrap() await Instance.provide({