diff --git a/.github/workflows/unit-tests-internal.yml b/.github/workflows/unit-tests-internal.yml
index 6f2b32f620f..e2aae11b846 100644
--- a/.github/workflows/unit-tests-internal.yml
+++ b/.github/workflows/unit-tests-internal.yml
@@ -19,8 +19,8 @@ jobs:
       # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard
       fail-fast: false
       matrix:
-        shardIndex: [1, 2, 3, 4, 5, 6, 7, 8]
-        shardTotal: [8]
+        shardIndex: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
+        shardTotal: [12]
     env:
       DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
       SHARD_INDEX: ${{ matrix.shardIndex }}
@@ -83,12 +83,22 @@ jobs:
       - name: 🐳 Pre-pull testcontainer images
         if: ${{ env.DOCKERHUB_USERNAME }}
         run: |
+          # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake.
+          pull() {
+            for attempt in 1 2 3; do
+              docker pull "$1" && return 0
+              echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s"
+              sleep 10
+            done
+            echo "::error::docker pull $1 failed after 3 attempts"
+            return 1
+          }
           echo "Pre-pulling Docker images with authenticated session..."
-          docker pull postgres:14
-          docker pull clickhouse/clickhouse-server:25.4-alpine
-          docker pull redis:7.2
-          docker pull testcontainers/ryuk:0.14.0
-          docker pull electricsql/electric:1.2.4
+          pull postgres:14
+          pull clickhouse/clickhouse-server:25.4-alpine
+          pull redis:7.2
+          pull testcontainers/ryuk:0.14.0
+          pull electricsql/electric:1.2.4
           echo "Image pre-pull complete"
 
       - name: 📥 Download deps
diff --git a/.github/workflows/unit-tests-packages.yml b/.github/workflows/unit-tests-packages.yml
index 5251a993313..6642f2443c4 100644
--- a/.github/workflows/unit-tests-packages.yml
+++ b/.github/workflows/unit-tests-packages.yml
@@ -16,9 +16,11 @@ jobs:
     name: "🧪 Unit Tests: Packages"
     runs-on: ubuntu-latest
     strategy:
+      # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard
+      fail-fast: false
       matrix:
-        shardIndex: [1]
-        shardTotal: [1]
+        shardIndex: [1, 2, 3]
+        shardTotal: [3]
     env:
       DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
       SHARD_INDEX: ${{ matrix.shardIndex }}
@@ -81,12 +83,22 @@ jobs:
       - name: 🐳 Pre-pull testcontainer images
         if: ${{ env.DOCKERHUB_USERNAME }}
         run: |
+          # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake.
+          pull() {
+            for attempt in 1 2 3; do
+              docker pull "$1" && return 0
+              echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s"
+              sleep 10
+            done
+            echo "::error::docker pull $1 failed after 3 attempts"
+            return 1
+          }
           echo "Pre-pulling Docker images with authenticated session..."
-          docker pull postgres:14
-          docker pull clickhouse/clickhouse-server:25.4-alpine
-          docker pull redis:7.2
-          docker pull testcontainers/ryuk:0.14.0
-          docker pull electricsql/electric:1.2.4
+          pull postgres:14
+          pull clickhouse/clickhouse-server:25.4-alpine
+          pull redis:7.2
+          pull testcontainers/ryuk:0.14.0
+          pull electricsql/electric:1.2.4
           echo "Image pre-pull complete"
 
       - name: 📥 Download deps
diff --git a/.github/workflows/unit-tests-webapp.yml b/.github/workflows/unit-tests-webapp.yml
index 3517afbba49..dc1cc978f35 100644
--- a/.github/workflows/unit-tests-webapp.yml
+++ b/.github/workflows/unit-tests-webapp.yml
@@ -19,8 +19,8 @@ jobs:
       # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard
       fail-fast: false
       matrix:
-        shardIndex: [1, 2, 3, 4, 5, 6, 7, 8]
-        shardTotal: [8]
+        shardIndex: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        shardTotal: [10]
     env:
       DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
       SHARD_INDEX: ${{ matrix.shardIndex }}
@@ -83,13 +83,23 @@ jobs:
       - name: 🐳 Pre-pull testcontainer images
         if: ${{ env.DOCKERHUB_USERNAME }}
         run: |
+          # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake.
+          pull() {
+            for attempt in 1 2 3; do
+              docker pull "$1" && return 0
+              echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s"
+              sleep 10
+            done
+            echo "::error::docker pull $1 failed after 3 attempts"
+            return 1
+          }
           echo "Pre-pulling Docker images with authenticated session..."
-          docker pull postgres:14
-          docker pull clickhouse/clickhouse-server:25.4-alpine
-          docker pull redis:7.2
-          docker pull testcontainers/ryuk:0.14.0
-          docker pull electricsql/electric:1.2.4
-          docker pull minio/minio:latest
+          pull postgres:14
+          pull clickhouse/clickhouse-server:25.4-alpine
+          pull redis:7.2
+          pull testcontainers/ryuk:0.14.0
+          pull electricsql/electric:1.2.4
+          pull minio/minio:latest
           echo "Image pre-pull complete"
 
       - name: 📥 Download deps
diff --git a/.gitignore b/.gitignore
index d071d5ae4e3..d5f0c945ad1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -72,4 +72,6 @@ apps/**/public/build
 .mcp.log
 .mcp.json
 .cursor/debug.log
-ailogger-output.log
\ No newline at end of file
+ailogger-output.log
+# per-package vitest timing capture (transient; merged into root test-timings.json)
+.vitest-timing.json
diff --git a/apps/webapp/test/engine/streamBatchItems.test.ts b/apps/webapp/test/engine/streamBatchItems.test.ts
index f5348d71b98..f4c2f21f8de 100644
--- a/apps/webapp/test/engine/streamBatchItems.test.ts
+++ b/apps/webapp/test/engine/streamBatchItems.test.ts
@@ -16,7 +16,11 @@ vi.mock("~/services/platform.v3.server", async (importOriginal) => {
 
 import { RunEngine } from "@internal/run-engine";
 import { setupAuthenticatedEnvironment } from "@internal/run-engine/tests";
-import { containerTest } from "@internal/testcontainers";
+// Per-test redis (isolated): each test spins up its own RunEngine and runs batch work, which leaves
+// background activity on redis that outlives the test - sharing a worker redis across the 16 cases
+// here caused cross-test interference and 30s seal-timeout flakes. Same carve-out as the run-engine
+// batch tests.
+import { containerTestWithIsolatedRedis as containerTest } from "@internal/testcontainers";
 import { trace } from "@opentelemetry/api";
 import { PrismaClient } from "@trigger.dev/database";
 import { BatchId } from "@trigger.dev/core/v3/isomorphic";
@@ -1584,10 +1588,7 @@ describe("createNdjsonParserStream", () => {
     const parser = createNdjsonParserStream(1024);
     const results = await collectStream(stream.pipeThrough(parser));
 
-    expect(results).toEqual([
-      { payload: "line1\nline2\nline3" },
-      { payload: "no newlines" },
-    ]);
+    expect(results).toEqual([{ payload: "line1\nline2\nline3" }, { payload: "no newlines" }]);
   });
 
   it("should skip empty lines", async () => {
@@ -1888,7 +1889,9 @@ describe("extractIndexAndTask", () => {
   });
 
   it("should not match nested keys", () => {
-    const bytes = encoder.encode('{"nested":{"index":999,"task":"inner"},"index":5,"task":"outer"}');
+    const bytes = encoder.encode(
+      '{"nested":{"index":999,"task":"inner"},"index":5,"task":"outer"}'
+    );
     const result = extractIndexAndTask(bytes);
     expect(result.index).toBe(5);
     expect(result.task).toBe("outer");
diff --git a/apps/webapp/test/runsBackfiller.test.ts b/apps/webapp/test/runsBackfiller.test.ts
index 87bc3822d98..fbdb16a4a7b 100644
--- a/apps/webapp/test/runsBackfiller.test.ts
+++ b/apps/webapp/test/runsBackfiller.test.ts
@@ -7,7 +7,7 @@ vi.mock("~/db.server", () => ({
 }));
 
 import { ClickHouse } from "@internal/clickhouse";
-import { containerTest } from "@internal/testcontainers";
+import { replicationContainerTest } from "@internal/testcontainers";
 import { z } from "zod";
 import { RunsBackfillerService } from "~/services/runsBackfiller.server";
 import { RunsReplicationService } from "~/services/runsReplicationService.server";
@@ -17,7 +17,7 @@ import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickho
 vi.setConfig({ testTimeout: 60_000 });
 
 describe("RunsBackfillerService", () => {
-  containerTest(
+  replicationContainerTest(
     "should backfill completed runs to clickhouse",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       const clickhouse = new ClickHouse({
diff --git a/apps/webapp/test/runsReplicationBenchmark.test.ts b/apps/webapp/test/runsReplicationBenchmark.test.ts
index e17d6b41212..d1b80d06af0 100644
--- a/apps/webapp/test/runsReplicationBenchmark.test.ts
+++ b/apps/webapp/test/runsReplicationBenchmark.test.ts
@@ -1,5 +1,5 @@
 import { ClickHouse } from "@internal/clickhouse";
-import { containerTest } from "@internal/testcontainers";
+import { replicationContainerTest } from "@internal/testcontainers";
 import { fork, type ChildProcess } from "node:child_process";
 import { performance, PerformanceObserver } from "node:perf_hooks";
 import { setTimeout } from "node:timers/promises";
@@ -501,7 +501,7 @@ function compareBenchmarks(baseline: BenchmarkResult, comparison: BenchmarkResul
 }
 
 describe("RunsReplicationService Benchmark", () => {
-  containerTest.skipIf(process.env.BENCHMARKS_ENABLED !== "1")(
+  replicationContainerTest.skipIf(process.env.BENCHMARKS_ENABLED !== "1")(
     "should benchmark error fingerprinting performance impact",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       // Enable replica identity for TaskRun table
diff --git a/apps/webapp/test/runsReplicationService.part1.test.ts b/apps/webapp/test/runsReplicationService.part1.test.ts
index d2a3c1b7627..5a085944a61 100644
--- a/apps/webapp/test/runsReplicationService.part1.test.ts
+++ b/apps/webapp/test/runsReplicationService.part1.test.ts
@@ -1,5 +1,5 @@
 import { ClickHouse } from "@internal/clickhouse";
-import { containerTest } from "@internal/testcontainers";
+import { replicationContainerTest } from "@internal/testcontainers";
 import { setTimeout } from "node:timers/promises";
 import { z } from "zod";
 import { TaskRunStatus } from "~/database-types";
@@ -10,8 +10,8 @@ import superjson from "superjson";
 
 vi.setConfig({ testTimeout: 60_000 });
 
-describe("RunsReplicationService (part 1/2)", () => {
-  containerTest(
+describe("RunsReplicationService (part 1/7)", () => {
+  replicationContainerTest(
     "should replicate runs to clickhouse",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
@@ -135,7 +135,7 @@ describe("RunsReplicationService (part 1/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should replicate runs with super json payloads to clickhouse",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
@@ -276,7 +276,7 @@ describe("RunsReplicationService (part 1/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should not produce any flush spans when no TaskRun events are produced",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
@@ -348,7 +348,7 @@ describe("RunsReplicationService (part 1/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should replicate a new TaskRun to ClickHouse using batching insert strategy",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
@@ -452,7 +452,7 @@ describe("RunsReplicationService (part 1/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should insert the payload into ClickHouse when a TaskRun is created",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
@@ -553,7 +553,7 @@ describe("RunsReplicationService (part 1/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should insert the payload even if it's very large into ClickHouse when a TaskRun is created",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
@@ -658,701 +658,4 @@ describe("RunsReplicationService (part 1/2)", () => {
       await runsReplicationService.stop();
     }
   );
-
-  containerTest(
-    "should replicate updates to an existing TaskRun to ClickHouse",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
-
-      const clickhouse = new ClickHouse({
-        url: clickhouseContainer.getConnectionUrl(),
-        name: "runs-replication-update",
-        logLevel: "warn",
-      });
-
-      const runsReplicationService = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-update",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 1,
-        flushIntervalMs: 100,
-        flushBatchSize: 1,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      await runsReplicationService.start();
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test-update",
-          slug: "test-update",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test-update",
-          slug: "test-update",
-          organizationId: organization.id,
-          externalRef: "test-update",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-update",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test-update",
-          pkApiKey: "test-update",
-          shortcode: "test-update",
-        },
-      });
-
-      const uniqueFriendlyId = `run_update_${Date.now()}`;
-      const taskRun = await prisma.taskRun.create({
-        data: {
-          friendlyId: uniqueFriendlyId,
-          taskIdentifier: "my-task-update",
-          payload: JSON.stringify({ foo: "update-test" }),
-          payloadType: "application/json",
-          traceId: "update-1234",
-          spanId: "update-1234",
-          queue: "test-update",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-          status: "PENDING",
-        },
-      });
-
-      await setTimeout(1000);
-
-      await prisma.taskRun.update({
-        where: { id: taskRun.id },
-        data: { status: TaskRunStatus.COMPLETED_SUCCESSFULLY },
-      });
-
-      await setTimeout(1000);
-
-      const queryRuns = clickhouse.reader.query({
-        name: "runs-replication-update",
-        query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}",
-        schema: z.any(),
-        params: z.object({ run_id: z.string() }),
-      });
-
-      const [queryError, result] = await queryRuns({ run_id: taskRun.id });
-
-      expect(queryError).toBeNull();
-      expect(result?.length).toBe(1);
-      expect(result?.[0]).toEqual(
-        expect.objectContaining({
-          run_id: taskRun.id,
-          status: TaskRunStatus.COMPLETED_SUCCESSFULLY,
-        })
-      );
-
-      await runsReplicationService.stop();
-    }
-  );
-
-  containerTest(
-    "should replicate deletions of a TaskRun to ClickHouse and mark as deleted",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
-
-      const clickhouse = new ClickHouse({
-        url: clickhouseContainer.getConnectionUrl(),
-        name: "runs-replication-delete",
-        logLevel: "warn",
-      });
-
-      const runsReplicationService = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-delete",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 1,
-        flushIntervalMs: 100,
-        flushBatchSize: 1,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      await runsReplicationService.start();
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test-delete",
-          slug: "test-delete",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test-delete",
-          slug: "test-delete",
-          organizationId: organization.id,
-          externalRef: "test-delete",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-delete",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test-delete",
-          pkApiKey: "test-delete",
-          shortcode: "test-delete",
-        },
-      });
-
-      const uniqueFriendlyId = `run_delete_${Date.now()}`;
-      const taskRun = await prisma.taskRun.create({
-        data: {
-          friendlyId: uniqueFriendlyId,
-          taskIdentifier: "my-task-delete",
-          payload: JSON.stringify({ foo: "delete-test" }),
-          payloadType: "application/json",
-          traceId: "delete-1234",
-          spanId: "delete-1234",
-          queue: "test-delete",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-          status: "PENDING",
-        },
-      });
-
-      await setTimeout(1000);
-
-      await prisma.taskRun.delete({
-        where: { id: taskRun.id },
-      });
-
-      await setTimeout(1000);
-
-      const queryRuns = clickhouse.reader.query({
-        name: "runs-replication-delete",
-        query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}",
-        schema: z.any(),
-        params: z.object({ run_id: z.string() }),
-      });
-
-      const [queryError, result] = await queryRuns({ run_id: taskRun.id });
-
-      expect(queryError).toBeNull();
-      expect(result?.length).toBe(0);
-
-      await runsReplicationService.stop();
-    }
-  );
-
-  containerTest(
-    "should gracefully shutdown and allow a new service to pick up from the correct LSN (handover)",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
-
-      const clickhouse = new ClickHouse({
-        url: clickhouseContainer.getConnectionUrl(),
-        name: "runs-replication-shutdown-handover",
-        logLevel: "warn",
-      });
-
-      // Service A
-      const runsReplicationServiceA = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-shutdown-handover",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 1,
-        flushIntervalMs: 100,
-        flushBatchSize: 1,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      await runsReplicationServiceA.start();
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test-shutdown-handover",
-          slug: "test-shutdown-handover",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test-shutdown-handover",
-          slug: "test-shutdown-handover",
-          organizationId: organization.id,
-          externalRef: "test-shutdown-handover",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-shutdown-handover",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test-shutdown-handover",
-          pkApiKey: "test-shutdown-handover",
-          shortcode: "test-shutdown-handover",
-        },
-      });
-
-      const run1Id = `run_shutdown_handover_1_${Date.now()}`;
-
-      runsReplicationServiceA.events.on("message", async ({ message, service }) => {
-        if (message.tag === "insert") {
-          await service.shutdown();
-        }
-      });
-
-      const taskRun1 = await prisma.taskRun.create({
-        data: {
-          friendlyId: run1Id,
-          taskIdentifier: "my-task-shutdown-handover-1",
-          payload: JSON.stringify({ foo: "handover-1" }),
-          payloadType: "application/json",
-          traceId: "handover-1-1234",
-          spanId: "handover-1-1234",
-          queue: "test-shutdown-handover",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-          status: "PENDING",
-        },
-      });
-
-      const run2Id = `run_shutdown_handover_2_${Date.now()}`;
-      const taskRun2 = await prisma.taskRun.create({
-        data: {
-          friendlyId: run2Id,
-          taskIdentifier: "my-task-shutdown-handover-2",
-          payload: JSON.stringify({ foo: "handover-2" }),
-          payloadType: "application/json",
-          traceId: "handover-2-1234",
-          spanId: "handover-2-1234",
-          queue: "test-shutdown-handover",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-          status: "PENDING",
-        },
-      });
-
-      await setTimeout(1000);
-
-      const queryRuns = clickhouse.reader.query({
-        name: "runs-replication-shutdown-handover",
-        query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL ORDER BY created_at ASC",
-        schema: z.any(),
-      });
-      const [queryError, result] = await queryRuns({});
-      expect(queryError).toBeNull();
-      expect(result?.length).toBe(1);
-      expect(result?.[0]).toEqual(expect.objectContaining({ run_id: taskRun1.id }));
-
-      // Service B
-      const runsReplicationServiceB = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-shutdown-handover",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 1,
-        flushIntervalMs: 100,
-        flushBatchSize: 1,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      await runsReplicationServiceB.start();
-
-      await setTimeout(1000);
-
-      const [queryErrorB, resultB] = await queryRuns({});
-
-      expect(queryErrorB).toBeNull();
-      expect(resultB?.length).toBe(2);
-      expect(resultB).toEqual(
-        expect.arrayContaining([
-          expect.objectContaining({ run_id: taskRun1.id }),
-          expect.objectContaining({ run_id: taskRun2.id }),
-        ])
-      );
-
-      await runsReplicationServiceB.stop();
-    }
-  );
-
-  containerTest(
-    "should not re-process already handled data if shutdown is called after all transactions are processed",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
-
-      const clickhouse = new ClickHouse({
-        url: clickhouseContainer.getConnectionUrl(),
-        name: "runs-replication-shutdown-after-processed",
-        logLevel: "warn",
-      });
-
-      // Service A
-      const runsReplicationServiceA = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-shutdown-after-processed",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 1,
-        flushIntervalMs: 100,
-        flushBatchSize: 1,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      await runsReplicationServiceA.start();
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test-shutdown-after-processed",
-          slug: "test-shutdown-after-processed",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test-shutdown-after-processed",
-          slug: "test-shutdown-after-processed",
-          organizationId: organization.id,
-          externalRef: "test-shutdown-after-processed",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-shutdown-after-processed",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test-shutdown-after-processed",
-          pkApiKey: "test-shutdown-after-processed",
-          shortcode: "test-shutdown-after-processed",
-        },
-      });
-
-      const run1Id = `run_shutdown_after_processed_${Date.now()}`;
-      const taskRun1 = await prisma.taskRun.create({
-        data: {
-          friendlyId: run1Id,
-          taskIdentifier: "my-task-shutdown-after-processed",
-          payload: JSON.stringify({ foo: "after-processed" }),
-          payloadType: "application/json",
-          traceId: "after-processed-1234",
-          spanId: "after-processed-1234",
-          queue: "test-shutdown-after-processed",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-          status: "PENDING",
-        },
-      });
-
-      await setTimeout(1000);
-
-      const queryRuns = clickhouse.reader.query({
-        name: "runs-replication-shutdown-after-processed",
-        query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}",
-        schema: z.any(),
-        params: z.object({ run_id: z.string() }),
-      });
-
-      const [queryErrorA, resultA] = await queryRuns({ run_id: taskRun1.id });
-      expect(queryErrorA).toBeNull();
-      expect(resultA?.length).toBe(1);
-      expect(resultA?.[0]).toEqual(expect.objectContaining({ run_id: taskRun1.id }));
-
-      await runsReplicationServiceA.shutdown();
-
-      await setTimeout(500);
-
-      const taskRun2 = await prisma.taskRun.create({
-        data: {
-          friendlyId: `run_shutdown_after_processed_${Date.now()}`,
-          taskIdentifier: "my-task-shutdown-after-processed",
-          payload: JSON.stringify({ foo: "after-processed-2" }),
-          payloadType: "application/json",
-          traceId: "after-processed-2-1234",
-          spanId: "after-processed-2-1234",
-          queue: "test-shutdown-after-processed",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-          status: "PENDING",
-        },
-      });
-
-      // Service B
-      const runsReplicationServiceB = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-shutdown-after-processed",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 1,
-        flushIntervalMs: 100,
-        flushBatchSize: 1,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      await runsReplicationServiceB.start();
-
-      await setTimeout(1000);
-
-      const [queryErrorB, resultB] = await queryRuns({ run_id: taskRun2.id });
-      expect(queryErrorB).toBeNull();
-      expect(resultB?.length).toBe(1);
-      expect(resultB?.[0]).toEqual(expect.objectContaining({ run_id: taskRun2.id }));
-
-      await runsReplicationServiceB.stop();
-    }
-  );
-
-  containerTest(
-    "should record metrics with correct values when replicating runs",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
-
-      const clickhouse = new ClickHouse({
-        url: clickhouseContainer.getConnectionUrl(),
-        name: "runs-replication-metrics",
-        logLevel: "warn",
-      });
-
-      const { tracer } = createInMemoryTracing();
-      const metricsHelper = createInMemoryMetrics();
-
-      const runsReplicationService = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-metrics",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 2,
-        flushIntervalMs: 100,
-        flushBatchSize: 5,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        tracer,
-        meter: metricsHelper.meter,
-        logLevel: "warn",
-      });
-
-      await runsReplicationService.start();
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test-metrics",
-          slug: "test-metrics",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test-metrics",
-          slug: "test-metrics",
-          organizationId: organization.id,
-          externalRef: "test-metrics",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-metrics",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test-metrics",
-          pkApiKey: "test-metrics",
-          shortcode: "test-metrics",
-        },
-      });
-
-      const now = Date.now();
-      const createdRuns: string[] = [];
-
-      for (let i = 0; i < 5; i++) {
-        const run = await prisma.taskRun.create({
-          data: {
-            friendlyId: `run_metrics_${now}_${i}`,
-            taskIdentifier: "my-task-metrics",
-            payload: JSON.stringify({ index: i }),
-            payloadType: "application/json",
-            traceId: `metrics-${now}-${i}`,
-            spanId: `metrics-${now}-${i}`,
-            queue: "test-metrics",
-            runtimeEnvironmentId: runtimeEnvironment.id,
-            projectId: project.id,
-            organizationId: organization.id,
-            environmentType: "DEVELOPMENT",
-            engine: "V2",
-            status: "PENDING",
-          },
-        });
-        createdRuns.push(run.id);
-      }
-
-      await setTimeout(1000);
-
-      for (let i = 0; i < 3; i++) {
-        await prisma.taskRun.update({
-          where: { id: createdRuns[i] },
-          data: { status: "EXECUTING" },
-        });
-      }
-
-      await setTimeout(1000);
-
-      for (let i = 0; i < 2; i++) {
-        await prisma.taskRun.update({
-          where: { id: createdRuns[i] },
-          data: {
-            status: "COMPLETED_SUCCESSFULLY",
-            completedAt: new Date(),
-            output: JSON.stringify({ result: "success" }),
-            outputType: "application/json",
-          },
-        });
-      }
-
-      await setTimeout(1000);
-
-      const metrics = await metricsHelper.getMetrics();
-
-      function getMetricData(name: string) {
-        for (const resourceMetrics of metrics) {
-          for (const scopeMetrics of resourceMetrics.scopeMetrics) {
-            for (const metric of scopeMetrics.metrics) {
-              if (metric.descriptor.name === name) {
-                return metric;
-              }
-            }
-          }
-        }
-        return null;
-      }
-
-      function sumCounterValues(metric: any): number {
-        if (!metric?.dataPoints) return 0;
-        return metric.dataPoints.reduce((sum: number, dp: any) => sum + (dp.value || 0), 0);
-      }
-
-      function histogramHasData(metric: any): boolean {
-        if (!metric?.dataPoints || metric.dataPoints.length === 0) return false;
-        return metric.dataPoints.some((dp: any) => {
-          return (
-            (typeof dp.count === "number" && dp.count > 0) ||
-            (typeof dp.value?.count === "number" && dp.value.count > 0) ||
-            (Array.isArray(dp.buckets?.counts) && dp.buckets.counts.some((c: number) => c > 0)) ||
-            (typeof dp.sum === "number" && dp.sum > 0) ||
-            typeof dp.min === "number" ||
-            typeof dp.max === "number"
-          );
-        });
-      }
-
-      function getCounterAttributeValues(metric: any, attributeName: string): unknown[] {
-        if (!metric?.dataPoints) return [];
-        return metric.dataPoints
-          .filter((dp: any) => dp.attributes?.[attributeName] !== undefined)
-          .map((dp: any) => dp.attributes[attributeName]);
-      }
-
-      const batchesFlushed = getMetricData("runs_replication.batches_flushed");
-      expect(batchesFlushed).not.toBeNull();
-      const totalBatchesFlushed = sumCounterValues(batchesFlushed);
-      expect(totalBatchesFlushed).toBeGreaterThanOrEqual(1);
-
-      const successAttributeValues = getCounterAttributeValues(batchesFlushed, "success");
-      expect(successAttributeValues.length).toBeGreaterThanOrEqual(1);
-
-      const taskRunsInserted = getMetricData("runs_replication.task_runs_inserted");
-      expect(taskRunsInserted).not.toBeNull();
-      const totalTaskRunsInserted = sumCounterValues(taskRunsInserted);
-      expect(totalTaskRunsInserted).toBeGreaterThanOrEqual(5);
-
-      const payloadsInserted = getMetricData("runs_replication.payloads_inserted");
-      expect(payloadsInserted).not.toBeNull();
-      const totalPayloadsInserted = sumCounterValues(payloadsInserted);
-      expect(totalPayloadsInserted).toBeGreaterThanOrEqual(1);
-
-      const eventsProcessed = getMetricData("runs_replication.events_processed");
-      expect(eventsProcessed).not.toBeNull();
-      const totalEventsProcessed = sumCounterValues(eventsProcessed);
-      expect(totalEventsProcessed).toBeGreaterThanOrEqual(1);
-
-      const eventTypes = getCounterAttributeValues(eventsProcessed, "event_type");
-      expect(eventTypes.length).toBeGreaterThanOrEqual(1);
-      expect(eventTypes).toContain("insert");
-
-      const batchSize = getMetricData("runs_replication.batch_size");
-      expect(batchSize).not.toBeNull();
-      expect(histogramHasData(batchSize)).toBe(true);
-
-      const replicationLag = getMetricData("runs_replication.replication_lag_ms");
-      expect(replicationLag).not.toBeNull();
-      expect(histogramHasData(replicationLag)).toBe(true);
-
-      const flushDuration = getMetricData("runs_replication.flush_duration_ms");
-      expect(flushDuration).not.toBeNull();
-      expect(histogramHasData(flushDuration)).toBe(true);
-
-      await runsReplicationService.stop();
-      await metricsHelper.shutdown();
-    }
-  );
 });
diff --git a/apps/webapp/test/runsReplicationService.part2.test.ts b/apps/webapp/test/runsReplicationService.part2.test.ts
index bd7348186b3..90be5b18322 100644
--- a/apps/webapp/test/runsReplicationService.part2.test.ts
+++ b/apps/webapp/test/runsReplicationService.part2.test.ts
@@ -1,5 +1,5 @@
 import { ClickHouse, getTaskRunField, getPayloadField } from "@internal/clickhouse";
-import { containerTest } from "@internal/testcontainers";
+import { replicationContainerTest } from "@internal/testcontainers";
 import { Logger } from "@trigger.dev/core/logger";
 import { readFile } from "node:fs/promises";
 import { setTimeout } from "node:timers/promises";
@@ -10,8 +10,8 @@ import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickho
 
 vi.setConfig({ testTimeout: 60_000 });
 
-describe("RunsReplicationService (part 2/2)", () => {
-  containerTest(
+describe("RunsReplicationService (part 2/7)", () => {
+  replicationContainerTest(
     "should handover leadership to a second service, and the second service should be able to extend the leader lock",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
@@ -141,7 +141,7 @@ describe("RunsReplicationService (part 2/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should replicate all 1,000 TaskRuns inserted in bulk to ClickHouse",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
@@ -256,7 +256,7 @@ describe("RunsReplicationService (part 2/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should replicate all 1,000 TaskRuns inserted in bulk to ClickHouse with updates",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
@@ -376,1062 +376,4 @@ describe("RunsReplicationService (part 2/2)", () => {
       await runsReplicationService.stop();
     }
   );
-
-  containerTest(
-    "should replicate all events in a single transaction (insert, update)",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
-
-      const clickhouse = new ClickHouse({
-        url: clickhouseContainer.getConnectionUrl(),
-        name: "runs-replication-multi-event-tx",
-        logLevel: "warn",
-      });
-
-      const runsReplicationService = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-multi-event-tx",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 1,
-        flushIntervalMs: 100,
-        flushBatchSize: 10,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      await runsReplicationService.start();
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test-multi-event-tx",
-          slug: "test-multi-event-tx",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test-multi-event-tx",
-          slug: "test-multi-event-tx",
-          organizationId: organization.id,
-          externalRef: "test-multi-event-tx",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-multi-event-tx",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test-multi-event-tx",
-          pkApiKey: "test-multi-event-tx",
-          shortcode: "test-multi-event-tx",
-        },
-      });
-
-      // Start a transaction
-      const [run1, run2] = await prisma.$transaction(async (tx) => {
-        const run1 = await tx.taskRun.create({
-          data: {
-            friendlyId: `run_multi_event_1_${Date.now()}`,
-            taskIdentifier: "my-task-multi-event-1",
-            payload: JSON.stringify({ multi: 1 }),
-            payloadType: "application/json",
-            traceId: `multi-1-${Date.now()}`,
-            spanId: `multi-1-${Date.now()}`,
-            queue: "test-multi-event-tx",
-            runtimeEnvironmentId: runtimeEnvironment.id,
-            projectId: project.id,
-            organizationId: organization.id,
-            environmentType: "DEVELOPMENT",
-            engine: "V2",
-            status: "PENDING",
-            attemptNumber: 1,
-            createdAt: new Date(),
-            updatedAt: new Date(),
-          },
-        });
-        const run2 = await tx.taskRun.create({
-          data: {
-            friendlyId: `run_multi_event_2_${Date.now()}`,
-            taskIdentifier: "my-task-multi-event-2",
-            payload: JSON.stringify({ multi: 2 }),
-            payloadType: "application/json",
-            traceId: `multi-2-${Date.now()}`,
-            spanId: `multi-2-${Date.now()}`,
-            queue: "test-multi-event-tx",
-            runtimeEnvironmentId: runtimeEnvironment.id,
-            projectId: project.id,
-            organizationId: organization.id,
-            environmentType: "DEVELOPMENT",
-            engine: "V2",
-            status: "PENDING",
-            attemptNumber: 1,
-            createdAt: new Date(),
-            updatedAt: new Date(),
-          },
-        });
-        await tx.taskRun.update({
-          where: { id: run1.id },
-          data: { status: "COMPLETED_SUCCESSFULLY" },
-        });
-
-        return [run1, run2];
-      });
-
-      // Wait for replication
-      await setTimeout(1000);
-
-      // Query ClickHouse for both runs using FINAL
-      const queryRuns = clickhouse.reader.query({
-        name: "runs-replication-multi-event-tx",
-        query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id IN ({run_id_1:String}, {run_id_2:String})`,
-        schema: z.any(),
-        params: z.object({ run_id_1: z.string(), run_id_2: z.string() }),
-      });
-
-      const [queryError, result] = await queryRuns({ run_id_1: run1.id, run_id_2: run2.id });
-      expect(queryError).toBeNull();
-      expect(result?.length).toBe(2);
-      const run1Result = result?.find((r: any) => r.run_id === run1.id);
-      const run2Result = result?.find((r: any) => r.run_id === run2.id);
-      expect(run1Result).toBeDefined();
-      expect(run1Result).toEqual(
-        expect.objectContaining({ run_id: run1.id, status: "COMPLETED_SUCCESSFULLY" })
-      );
-      expect(run2Result).toBeDefined();
-      expect(run2Result).toEqual(expect.objectContaining({ run_id: run2.id }));
-
-      await runsReplicationService.stop();
-    }
-  );
-
-  containerTest(
-    "should be able to handle processing transactions for a long period of time",
-    { timeout: 60_000 * 5 },
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
-
-      const clickhouse = new ClickHouse({
-        url: clickhouseContainer.getConnectionUrl(),
-        name: "runs-replication-long-tx",
-        logLevel: "warn",
-      });
-
-      const runsReplicationService = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-long-tx",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 1,
-        flushIntervalMs: 100,
-        flushBatchSize: 10,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      await runsReplicationService.start();
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test-long-tx",
-          slug: "test-long-tx",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test-long-tx",
-          slug: "test-long-tx",
-          organizationId: organization.id,
-          externalRef: "test-long-tx",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-long-tx",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test-long-tx",
-          pkApiKey: "test-long-tx",
-          shortcode: "test-long-tx",
-        },
-      });
-
-      // Start an interval that will create a new run every 500ms for 4 minutes
-      const interval = setInterval(async () => {
-        await prisma.taskRun.create({
-          data: {
-            friendlyId: `run_long_tx_${Date.now()}`,
-            taskIdentifier: "my-task-long-tx",
-            payload: JSON.stringify({ long: 1 }),
-            payloadType: "application/json",
-            traceId: `long-${Date.now()}`,
-            spanId: `long-${Date.now()}`,
-            queue: "test-long-tx",
-            runtimeEnvironmentId: runtimeEnvironment.id,
-            projectId: project.id,
-            organizationId: organization.id,
-            environmentType: "DEVELOPMENT",
-            engine: "V2",
-            status: "PENDING",
-            attemptNumber: 1,
-            createdAt: new Date(),
-            updatedAt: new Date(),
-          },
-        });
-      }, 500);
-
-      // Wait for 1 minute
-      await setTimeout(1 * 60 * 1000);
-
-      // Stop the interval
-      clearInterval(interval);
-
-      // Wait for replication
-      await setTimeout(1000);
-
-      // Query ClickHouse for all runs using FINAL
-      const queryRuns = clickhouse.reader.query({
-        name: "runs-replication-long-tx",
-        query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL`,
-        schema: z.any(),
-      });
-
-      const [queryError, result] = await queryRuns({});
-      expect(queryError).toBeNull();
-
-      expect(result?.length).toBeGreaterThanOrEqual(50);
-
-      await runsReplicationService.stop();
-    }
-  );
-
-  containerTest(
-    "should insert TaskRuns even if there are incomplete Unicode escape sequences in the JSON",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
-
-      const clickhouse = new ClickHouse({
-        url: clickhouseContainer.getConnectionUrl(),
-        name: "runs-replication-stress-bulk-insert",
-        logLevel: "warn",
-      });
-
-      const runsReplicationService = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-stress-bulk-insert",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 10,
-        flushIntervalMs: 100,
-        flushBatchSize: 50,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      await runsReplicationService.start();
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test-stress-bulk-insert",
-          slug: "test-stress-bulk-insert",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test-stress-bulk-insert",
-          slug: "test-stress-bulk-insert",
-          organizationId: organization.id,
-          externalRef: "test-stress-bulk-insert",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-stress-bulk-insert",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test-stress-bulk-insert",
-          pkApiKey: "test-stress-bulk-insert",
-          shortcode: "test-stress-bulk-insert",
-        },
-      });
-
-      // Prepare 9 unique TaskRuns
-      const now = Date.now();
-      const runsData = Array.from({ length: 9 }, (_, i) => ({
-        friendlyId: `run_bulk_${now}_${i}`,
-        taskIdentifier: `my-task-bulk`,
-        payload: `{"title": "hello"}`,
-        payloadType: "application/json",
-        traceId: `bulk-${i}`,
-        spanId: `bulk-${i}`,
-        queue: "test-stress-bulk-insert",
-        runtimeEnvironmentId: runtimeEnvironment.id,
-        projectId: project.id,
-        organizationId: organization.id,
-        environmentType: "DEVELOPMENT" as const,
-        engine: "V2" as const,
-        status: "PENDING" as const,
-        attemptNumber: 1,
-        createdAt: new Date(now + i),
-        updatedAt: new Date(now + i),
-      }));
-
-      //add a run with incomplete Unicode escape sequences
-      const badPayload = await readFile(`${__dirname}/bad-clickhouse-output.json`, "utf-8");
-      const hasProblems = detectBadJsonStrings(badPayload);
-      expect(hasProblems).toBe(true);
-
-      runsData.push({
-        friendlyId: `run_bulk_${now}_10`,
-        taskIdentifier: `my-task-bulk`,
-        payload: badPayload,
-        payloadType: "application/json",
-        traceId: `bulk-10`,
-        spanId: `bulk-10`,
-        queue: "test-stress-bulk-insert",
-        runtimeEnvironmentId: runtimeEnvironment.id,
-        projectId: project.id,
-        organizationId: organization.id,
-        environmentType: "DEVELOPMENT" as const,
-        engine: "V2" as const,
-        status: "PENDING" as const,
-        attemptNumber: 1,
-        createdAt: new Date(now + 10),
-        updatedAt: new Date(now + 10),
-      });
-
-      // Bulk insert
-      const created = await prisma.taskRun.createMany({ data: runsData });
-      expect(created.count).toBe(10);
-
-      // Update the runs (not the 10th one)
-      await prisma.taskRun.updateMany({
-        where: {
-          spanId: { not: "bulk-10" },
-        },
-        data: {
-          status: "COMPLETED_SUCCESSFULLY",
-          output: `{"foo":"bar"}`,
-          outputType: "application/json",
-        },
-      });
-
-      // Give the 10th one a bad payload
-      await prisma.taskRun.updateMany({
-        where: {
-          spanId: "bulk-10",
-        },
-        data: {
-          status: "COMPLETED_SUCCESSFULLY",
-          output: badPayload,
-          outputType: "application/json",
-        },
-      });
-
-      // Wait for replication
-      await setTimeout(5000);
-
-      // Query ClickHouse for all runs using FINAL
-      const queryRuns = clickhouse.reader.query({
-        name: "runs-replication-stress-bulk-insert",
-        query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL`,
-        schema: z.any(),
-      });
-
-      const [queryError, result] = await queryRuns({});
-      expect(queryError).toBeNull();
-      expect(result?.length).toBe(10);
-
-      // Check a few random runs for correctness
-      for (let i = 0; i < 9; i++) {
-        const expected = runsData[i];
-        const found = result?.find((r: any) => r.friendly_id === expected.friendlyId);
-        expect(found).toBeDefined();
-        expect(found).toEqual(
-          expect.objectContaining({
-            friendly_id: expected.friendlyId,
-            trace_id: expected.traceId,
-            task_identifier: expected.taskIdentifier,
-            status: "COMPLETED_SUCCESSFULLY",
-          })
-        );
-        expect(found?.output).toBeDefined();
-      }
-
-      // Check the run with the bad JSON
-      const foundBad = result?.find((r: any) => r.span_id === "bulk-10");
-      expect(foundBad).toBeDefined();
-      expect(foundBad?.output).toStrictEqual({});
-
-      await runsReplicationService.stop();
-    }
-  );
-
-  containerTest(
-    "should merge duplicate event+run.id combinations keeping the latest version",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      await prisma.$executeRawUnsafe(`ALTER TABLE public.\"TaskRun\" REPLICA IDENTITY FULL;`);
-
-      const clickhouse = new ClickHouse({
-        url: clickhouseContainer.getConnectionUrl(),
-        name: "runs-replication-merge-batch",
-        logLevel: "warn",
-      });
-
-      const runsReplicationService = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-merge-batch",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 1,
-        flushIntervalMs: 100,
-        flushBatchSize: 10, // Higher batch size to test merging
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      // Listen to batchFlushed events to verify merging
-      const batchFlushedEvents: Array<{
-        flushId: string;
-        taskRunInserts: any[];
-        payloadInserts: any[];
-      }> = [];
-
-      runsReplicationService.events.on("batchFlushed", (event) => {
-        batchFlushedEvents.push(event);
-      });
-
-      await runsReplicationService.start();
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test-merge-batch",
-          slug: "test-merge-batch",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test-merge-batch",
-          slug: "test-merge-batch",
-          organizationId: organization.id,
-          externalRef: "test-merge-batch",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-merge-batch",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test-merge-batch",
-          pkApiKey: "test-merge-batch",
-          shortcode: "test-merge-batch",
-        },
-      });
-
-      // Create a run and rapidly update it multiple times in a transaction
-      // This should create multiple events for the same run that get merged
-      const run = await prisma.taskRun.create({
-        data: {
-          friendlyId: `run_merge_${Date.now()}`,
-          taskIdentifier: "my-task-merge",
-          payload: JSON.stringify({ version: 1 }),
-          payloadType: "application/json",
-          traceId: `merge-${Date.now()}`,
-          spanId: `merge-${Date.now()}`,
-          queue: "test-merge-batch",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-          status: "PENDING_VERSION",
-        },
-      });
-      await prisma.taskRun.update({
-        where: { id: run.id },
-        data: { status: "DEQUEUED" },
-      });
-      await prisma.taskRun.update({
-        where: { id: run.id },
-        data: { status: "EXECUTING" },
-      });
-      await prisma.taskRun.update({
-        where: { id: run.id },
-        data: { status: "PAUSED" },
-      });
-      await prisma.taskRun.update({
-        where: { id: run.id },
-        data: { status: "EXECUTING" },
-      });
-      await prisma.taskRun.update({
-        where: { id: run.id },
-        data: { status: "COMPLETED_SUCCESSFULLY" },
-      });
-
-      await setTimeout(1000);
-
-      expect(batchFlushedEvents?.[0].taskRunInserts).toHaveLength(2);
-      // Use getTaskRunField for type-safe array access
-      expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[0], "run_id")).toEqual(run.id);
-      expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[0], "status")).toEqual(
-        "PENDING_VERSION"
-      );
-      expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[1], "run_id")).toEqual(run.id);
-      expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[1], "status")).toEqual(
-        "COMPLETED_SUCCESSFULLY"
-      );
-
-      await runsReplicationService.stop();
-    }
-  );
-
-  containerTest(
-    "should sort batch inserts according to table schema ordering for optimal performance",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      await prisma.$executeRawUnsafe(`ALTER TABLE public.\"TaskRun\" REPLICA IDENTITY FULL;`);
-
-      const clickhouse = new ClickHouse({
-        url: clickhouseContainer.getConnectionUrl(),
-        name: "runs-replication-sorting",
-        logLevel: "warn",
-      });
-
-      const runsReplicationService = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-sorting",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 1,
-        flushIntervalMs: 100,
-        flushBatchSize: 10,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      // Listen to batchFlushed events to verify sorting
-      const batchFlushedEvents: Array<{
-        flushId: string;
-        taskRunInserts: any[];
-        payloadInserts: any[];
-      }> = [];
-
-      runsReplicationService.events.on("batchFlushed", (event) => {
-        batchFlushedEvents.push(event);
-      });
-
-      await runsReplicationService.start();
-
-      // Create two organizations to test sorting by organization_id
-      const org1 = await prisma.organization.create({
-        data: { title: "org-z", slug: "org-z" },
-      });
-
-      const org2 = await prisma.organization.create({
-        data: { title: "org-a", slug: "org-a" },
-      });
-
-      const project1 = await prisma.project.create({
-        data: {
-          name: "test-sorting-z",
-          slug: "test-sorting-z",
-          organizationId: org1.id,
-          externalRef: "test-sorting-z",
-        },
-      });
-
-      const project2 = await prisma.project.create({
-        data: {
-          name: "test-sorting-a",
-          slug: "test-sorting-a",
-          organizationId: org2.id,
-          externalRef: "test-sorting-a",
-        },
-      });
-
-      const env1 = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-sorting-z",
-          type: "DEVELOPMENT",
-          projectId: project1.id,
-          organizationId: org1.id,
-          apiKey: "test-sorting-z",
-          pkApiKey: "test-sorting-z",
-          shortcode: "test-sorting-z",
-        },
-      });
-
-      const env2 = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-sorting-a",
-          type: "DEVELOPMENT",
-          projectId: project2.id,
-          organizationId: org2.id,
-          apiKey: "test-sorting-a",
-          pkApiKey: "test-sorting-a",
-          shortcode: "test-sorting-a",
-        },
-      });
-
-      const now = Date.now();
-
-      const run1 = await prisma.taskRun.create({
-        data: {
-          friendlyId: `run_sort_org_z_${now}`,
-          taskIdentifier: "my-task-sort",
-          payload: JSON.stringify({ org: "z" }),
-          payloadType: "application/json",
-          traceId: `sort-z-${now}`,
-          spanId: `sort-z-${now}`,
-          queue: "test-sorting",
-          runtimeEnvironmentId: env1.id,
-          projectId: project1.id,
-          organizationId: org1.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-          status: "PENDING",
-          createdAt: new Date(now + 2000),
-        },
-      });
-      await prisma.taskRun.update({
-        where: { id: run1.id },
-        data: { status: "DEQUEUED" },
-      });
-
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: `run_sort_org_a_${now}`,
-          taskIdentifier: "my-task-sort",
-          payload: JSON.stringify({ org: "a" }),
-          payloadType: "application/json",
-          traceId: `sort-a-${now}`,
-          spanId: `sort-a-${now}`,
-          queue: "test-sorting",
-          runtimeEnvironmentId: env2.id,
-          projectId: project2.id,
-          organizationId: org2.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-          status: "PENDING",
-          createdAt: new Date(now + 1000),
-        },
-      });
-
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: `run_sort_org_a_${now}_2`,
-          taskIdentifier: "my-task-sort",
-          payload: JSON.stringify({ org: "a" }),
-          payloadType: "application/json",
-          traceId: `sort-a-${now}`,
-          spanId: `sort-a-${now}`,
-          queue: "test-sorting",
-          runtimeEnvironmentId: env2.id,
-          projectId: project2.id,
-          organizationId: org2.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-          status: "PENDING",
-          createdAt: new Date(now),
-        },
-      });
-
-      await setTimeout(1000);
-
-      expect(batchFlushedEvents[0]?.taskRunInserts.length).toBeGreaterThan(1);
-      expect(batchFlushedEvents[0]?.payloadInserts.length).toBeGreaterThan(1);
-
-      // Verify sorting order: organization_id, project_id, environment_id, created_at, run_id
-      for (let i = 1; i < batchFlushedEvents[0]?.taskRunInserts.length; i++) {
-        const prev = batchFlushedEvents[0]!.taskRunInserts[i - 1];
-        const curr = batchFlushedEvents[0]!.taskRunInserts[i];
-
-        const prevKey = [
-          getTaskRunField(prev, "organization_id"),
-          getTaskRunField(prev, "project_id"),
-          getTaskRunField(prev, "environment_id"),
-          getTaskRunField(prev, "created_at"),
-          getTaskRunField(prev, "run_id"),
-        ];
-        const currKey = [
-          getTaskRunField(curr, "organization_id"),
-          getTaskRunField(curr, "project_id"),
-          getTaskRunField(curr, "environment_id"),
-          getTaskRunField(curr, "created_at"),
-          getTaskRunField(curr, "run_id"),
-        ];
-
-        const keysAreEqual = prevKey.every((val, idx) => val === currKey[idx]);
-        if (keysAreEqual) {
-          // Also valid order
-          continue;
-        }
-
-        // Compare tuples lexicographically
-        let isCorrectOrder = false;
-        for (let j = 0; j < prevKey.length; j++) {
-          if (prevKey[j] < currKey[j]) {
-            isCorrectOrder = true;
-            break;
-          }
-          if (prevKey[j] > currKey[j]) {
-            isCorrectOrder = false;
-            break;
-          }
-          // If equal, continue to next field
-        }
-
-        expect(isCorrectOrder).toBeTruthy();
-      }
-
-      // Verify payloadInserts are also sorted by run_id
-      for (let i = 1; i < batchFlushedEvents[0]?.payloadInserts.length; i++) {
-        const prev = batchFlushedEvents[0]!.payloadInserts[i - 1];
-        const curr = batchFlushedEvents[0]!.payloadInserts[i];
-        expect(getPayloadField(prev, "run_id") <= getPayloadField(curr, "run_id")).toBeTruthy();
-      }
-
-      await runsReplicationService.stop();
-    }
-  );
-
-  containerTest(
-    "should exhaustively replicate all TaskRun columns to ClickHouse",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
-
-      const clickhouse = new ClickHouse({
-        url: clickhouseContainer.getConnectionUrl(),
-        name: "runs-replication-exhaustive",
-        logLevel: "warn",
-      });
-
-      const runsReplicationService = new RunsReplicationService({
-        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
-        pgConnectionUrl: postgresContainer.getConnectionUri(),
-        serviceName: "runs-replication-exhaustive",
-        slotName: "task_runs_to_clickhouse_v1",
-        publicationName: "task_runs_to_clickhouse_v1_publication",
-        redisOptions,
-        maxFlushConcurrency: 1,
-        flushIntervalMs: 100,
-        flushBatchSize: 1,
-        leaderLockTimeoutMs: 5000,
-        leaderLockExtendIntervalMs: 1000,
-        ackIntervalSeconds: 5,
-        logLevel: "warn",
-      });
-
-      await runsReplicationService.start();
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test-exhaustive",
-          slug: "test-exhaustive",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test-exhaustive",
-          slug: "test-exhaustive",
-          organizationId: organization.id,
-          externalRef: "test-exhaustive",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test-exhaustive",
-          type: "PRODUCTION",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test-exhaustive",
-          pkApiKey: "test-exhaustive",
-          shortcode: "test-exhaustive",
-        },
-      });
-
-      // Create a batch for the batchId field
-      const batch = await prisma.batchTaskRun.create({
-        data: {
-          friendlyId: "batch_exhaustive",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          status: "PENDING",
-        },
-      });
-
-      // Create a root run for the rootTaskRunId field
-      const rootRun = await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_root_exhaustive",
-          taskIdentifier: "root-task",
-          payload: JSON.stringify({ root: true }),
-          traceId: "root-trace-id",
-          spanId: "root-span-id",
-          queue: "root-queue",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "PRODUCTION",
-          engine: "V2",
-        },
-      });
-
-      // Create a parent run for the parentTaskRunId field
-      const parentRun = await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_parent_exhaustive",
-          taskIdentifier: "parent-task",
-          payload: JSON.stringify({ parent: true }),
-          traceId: "parent-trace-id",
-          spanId: "parent-span-id",
-          queue: "parent-queue",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "PRODUCTION",
-          engine: "V2",
-          rootTaskRunId: rootRun.id,
-          depth: 1,
-        },
-      });
-
-      // Set up all the dates we'll use
-      const now = new Date();
-      const createdAt = new Date(now.getTime() - 10000);
-      const updatedAt = new Date(now.getTime() - 5000);
-      const startedAt = new Date(now.getTime() - 8000);
-      const executedAt = new Date(now.getTime() - 7500);
-      const completedAt = new Date(now.getTime() - 6000);
-      const delayUntil = new Date(now.getTime() - 9000);
-      const queuedAt = new Date(now.getTime() - 9500);
-      const expiredAt = null; // Not expired
-
-      // Create the main task run with ALL fields populated
-      const taskRun = await prisma.taskRun.create({
-        data: {
-          // Core identifiers
-          friendlyId: "run_exhaustive_test",
-          taskIdentifier: "exhaustive-task",
-
-          // Environment/project/org
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "PRODUCTION",
-
-          // Engine and execution
-          engine: "V2",
-          status: "COMPLETED_SUCCESSFULLY",
-          attemptNumber: 3,
-          queue: "exhaustive-queue",
-          workerQueue: "exhaustive-worker-queue",
-
-          // Relationships
-          // Note: scheduleId is not set to test empty string handling
-          batchId: batch.id,
-          rootTaskRunId: rootRun.id,
-          parentTaskRunId: parentRun.id,
-          depth: 2,
-
-          // Timestamps
-          createdAt,
-          updatedAt,
-          startedAt,
-          executedAt,
-          completedAt,
-          delayUntil,
-          queuedAt,
-          expiredAt,
-
-          // Payload and output
-          payload: JSON.stringify({ input: "test-payload" }),
-          payloadType: "application/json",
-          output: JSON.stringify({ result: "test-output" }),
-          outputType: "application/json",
-          error: { message: "test error", name: "TestError" },
-
-          // Tracing
-          traceId: "exhaustive-trace-id-12345",
-          spanId: "exhaustive-span-id-67890",
-
-          // Versioning
-          taskVersion: "1.2.3",
-          sdkVersion: "3.0.0",
-          cliVersion: "2.5.1",
-
-          // Execution settings
-          machinePreset: "large-1x",
-          idempotencyKey: "exhaustive-idempotency-key-hashed",
-          idempotencyKeyOptions: {
-            key: "exhaustive-idempotency-key",
-            scope: "run",
-          },
-          ttl: "1h",
-          isTest: true,
-          concurrencyKey: "exhaustive-concurrency-key",
-          maxDurationInSeconds: 3600,
-
-          // Tags and bulk actions
-          runTags: ["tag1", "tag2", "exhaustive-tag"],
-          bulkActionGroupIds: ["bulk-group-1", "bulk-group-2"],
-
-          // Usage metrics
-          usageDurationMs: 12345,
-          costInCents: 50,
-          baseCostInCents: 25,
-        },
-      });
-
-      // Wait for replication
-      await setTimeout(1500);
-
-      // Query ClickHouse directly to get all columns
-      const queryRuns = clickhouse.reader.query({
-        name: "exhaustive-replication-test",
-        query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}",
-        schema: z.any(),
-        params: z.object({ run_id: z.string() }),
-      });
-
-      const [queryError, result] = await queryRuns({ run_id: taskRun.id });
-
-      expect(queryError).toBeNull();
-      expect(result).toHaveLength(1);
-
-      const clickhouseRun = result![0];
-
-      // Exhaustively verify each column
-      // Core identifiers
-      expect(clickhouseRun.run_id).toBe(taskRun.id);
-      expect(clickhouseRun.friendly_id).toBe("run_exhaustive_test");
-      expect(clickhouseRun.task_identifier).toBe("exhaustive-task");
-
-      // Environment/project/org
-      expect(clickhouseRun.environment_id).toBe(runtimeEnvironment.id);
-      expect(clickhouseRun.project_id).toBe(project.id);
-      expect(clickhouseRun.organization_id).toBe(organization.id);
-      expect(clickhouseRun.environment_type).toBe("PRODUCTION");
-
-      // Engine and execution
-      expect(clickhouseRun.engine).toBe("V2");
-      expect(clickhouseRun.status).toBe("COMPLETED_SUCCESSFULLY");
-      expect(clickhouseRun.attempt).toBe(3);
-      expect(clickhouseRun.queue).toBe("exhaustive-queue");
-      expect(clickhouseRun.worker_queue).toBe("exhaustive-worker-queue");
-
-      // Relationships
-      expect(clickhouseRun.schedule_id).toBe(""); // Empty when not set
-      expect(clickhouseRun.batch_id).toBe(batch.id);
-      expect(clickhouseRun.root_run_id).toBe(rootRun.id);
-      expect(clickhouseRun.parent_run_id).toBe(parentRun.id);
-      expect(clickhouseRun.depth).toBe(2);
-
-      // Timestamps (ClickHouse returns DateTime64 as strings in UTC without 'Z' suffix)
-      // Helper to parse ClickHouse timestamp strings to milliseconds
-      function parseClickhouseTimestamp(ts: string | null): number | null {
-        if (ts === null || ts === "1970-01-01 00:00:00.000") return null;
-        return new Date(ts + "Z").getTime();
-      }
-
-      expect(parseClickhouseTimestamp(clickhouseRun.created_at)).toBe(createdAt.getTime());
-      expect(parseClickhouseTimestamp(clickhouseRun.updated_at)).toBe(updatedAt.getTime());
-      expect(parseClickhouseTimestamp(clickhouseRun.started_at)).toBe(startedAt.getTime());
-      expect(parseClickhouseTimestamp(clickhouseRun.executed_at)).toBe(executedAt.getTime());
-      expect(parseClickhouseTimestamp(clickhouseRun.completed_at)).toBe(completedAt.getTime());
-      expect(parseClickhouseTimestamp(clickhouseRun.delay_until)).toBe(delayUntil.getTime());
-      expect(parseClickhouseTimestamp(clickhouseRun.queued_at)).toBe(queuedAt.getTime());
-      expect(parseClickhouseTimestamp(clickhouseRun.expired_at)).toBeNull();
-
-      // Output (parsed JSON)
-      expect(clickhouseRun.output).toEqual({ data: { result: "test-output" } });
-
-      // Error
-      expect(clickhouseRun.error).toEqual({
-        data: { message: "test error", name: "TestError" },
-      });
-
-      // Tracing
-      expect(clickhouseRun.trace_id).toBe("exhaustive-trace-id-12345");
-      expect(clickhouseRun.span_id).toBe("exhaustive-span-id-67890");
-
-      // Versioning
-      expect(clickhouseRun.task_version).toBe("1.2.3");
-      expect(clickhouseRun.sdk_version).toBe("3.0.0");
-      expect(clickhouseRun.cli_version).toBe("2.5.1");
-
-      // Execution settings
-      expect(clickhouseRun.machine_preset).toBe("large-1x");
-      expect(clickhouseRun.idempotency_key).toBe("exhaustive-idempotency-key-hashed");
-      expect(clickhouseRun.idempotency_key_user).toBe("exhaustive-idempotency-key");
-      expect(clickhouseRun.idempotency_key_scope).toBe("run");
-      expect(clickhouseRun.expiration_ttl).toBe("1h");
-      expect(clickhouseRun.is_test).toBe(1); // ClickHouse returns booleans as integers
-      expect(clickhouseRun.concurrency_key).toBe("exhaustive-concurrency-key");
-      expect(clickhouseRun.max_duration_in_seconds).toBe(3600);
-
-      // Tags and bulk actions
-      expect(clickhouseRun.tags).toEqual(["tag1", "tag2", "exhaustive-tag"]);
-      expect(clickhouseRun.bulk_action_group_ids).toEqual(["bulk-group-1", "bulk-group-2"]);
-
-      // Usage metrics
-      expect(clickhouseRun.usage_duration_ms).toBe(12345);
-      expect(clickhouseRun.cost_in_cents).toBe(50);
-      expect(clickhouseRun.base_cost_in_cents).toBe(25);
-
-      // Internal ClickHouse columns
-      expect(clickhouseRun._is_deleted).toBe(0);
-      expect(clickhouseRun._version).toBeDefined();
-      expect(typeof clickhouseRun._version).toBe("number"); // ClickHouse returns UInt64 as number
-
-      // Also verify the payload was inserted into the payloads table
-      const queryPayloads = clickhouse.reader.query({
-        name: "exhaustive-payload-test",
-        query: "SELECT * FROM trigger_dev.raw_task_runs_payload_v1 WHERE run_id = {run_id:String}",
-        schema: z.any(),
-        params: z.object({ run_id: z.string() }),
-      });
-
-      const [payloadError, payloadResult] = await queryPayloads({ run_id: taskRun.id });
-
-      expect(payloadError).toBeNull();
-      expect(payloadResult).toHaveLength(1);
-      expect(payloadResult![0].run_id).toBe(taskRun.id);
-      expect(parseClickhouseTimestamp(payloadResult![0].created_at)).toBe(createdAt.getTime());
-      expect(payloadResult![0].payload).toEqual({ data: { input: "test-payload" } });
-
-      await runsReplicationService.stop();
-    }
-  );
 });
diff --git a/apps/webapp/test/runsReplicationService.part3.test.ts b/apps/webapp/test/runsReplicationService.part3.test.ts
new file mode 100644
index 00000000000..1261be3b513
--- /dev/null
+++ b/apps/webapp/test/runsReplicationService.part3.test.ts
@@ -0,0 +1,307 @@
+import { ClickHouse, getTaskRunField, getPayloadField } from "@internal/clickhouse";
+import { replicationContainerTest } from "@internal/testcontainers";
+import { Logger } from "@trigger.dev/core/logger";
+import { readFile } from "node:fs/promises";
+import { setTimeout } from "node:timers/promises";
+import { z } from "zod";
+import { RunsReplicationService } from "~/services/runsReplicationService.server";
+import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings";
+import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory";
+
+vi.setConfig({ testTimeout: 60_000 });
+
+describe("RunsReplicationService (part 3/7)", () => {
+  replicationContainerTest(
+    "should insert TaskRuns even if there are incomplete Unicode escape sequences in the JSON",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
+
+      const clickhouse = new ClickHouse({
+        url: clickhouseContainer.getConnectionUrl(),
+        name: "runs-replication-stress-bulk-insert",
+        logLevel: "warn",
+      });
+
+      const runsReplicationService = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-stress-bulk-insert",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 10,
+        flushIntervalMs: 100,
+        flushBatchSize: 50,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      await runsReplicationService.start();
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test-stress-bulk-insert",
+          slug: "test-stress-bulk-insert",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test-stress-bulk-insert",
+          slug: "test-stress-bulk-insert",
+          organizationId: organization.id,
+          externalRef: "test-stress-bulk-insert",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-stress-bulk-insert",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test-stress-bulk-insert",
+          pkApiKey: "test-stress-bulk-insert",
+          shortcode: "test-stress-bulk-insert",
+        },
+      });
+
+      // Prepare 9 unique TaskRuns
+      const now = Date.now();
+      const runsData = Array.from({ length: 9 }, (_, i) => ({
+        friendlyId: `run_bulk_${now}_${i}`,
+        taskIdentifier: `my-task-bulk`,
+        payload: `{"title": "hello"}`,
+        payloadType: "application/json",
+        traceId: `bulk-${i}`,
+        spanId: `bulk-${i}`,
+        queue: "test-stress-bulk-insert",
+        runtimeEnvironmentId: runtimeEnvironment.id,
+        projectId: project.id,
+        organizationId: organization.id,
+        environmentType: "DEVELOPMENT" as const,
+        engine: "V2" as const,
+        status: "PENDING" as const,
+        attemptNumber: 1,
+        createdAt: new Date(now + i),
+        updatedAt: new Date(now + i),
+      }));
+
+      //add a run with incomplete Unicode escape sequences
+      const badPayload = await readFile(`${__dirname}/bad-clickhouse-output.json`, "utf-8");
+      const hasProblems = detectBadJsonStrings(badPayload);
+      expect(hasProblems).toBe(true);
+
+      runsData.push({
+        friendlyId: `run_bulk_${now}_10`,
+        taskIdentifier: `my-task-bulk`,
+        payload: badPayload,
+        payloadType: "application/json",
+        traceId: `bulk-10`,
+        spanId: `bulk-10`,
+        queue: "test-stress-bulk-insert",
+        runtimeEnvironmentId: runtimeEnvironment.id,
+        projectId: project.id,
+        organizationId: organization.id,
+        environmentType: "DEVELOPMENT" as const,
+        engine: "V2" as const,
+        status: "PENDING" as const,
+        attemptNumber: 1,
+        createdAt: new Date(now + 10),
+        updatedAt: new Date(now + 10),
+      });
+
+      // Bulk insert
+      const created = await prisma.taskRun.createMany({ data: runsData });
+      expect(created.count).toBe(10);
+
+      // Update the runs (not the 10th one)
+      await prisma.taskRun.updateMany({
+        where: {
+          spanId: { not: "bulk-10" },
+        },
+        data: {
+          status: "COMPLETED_SUCCESSFULLY",
+          output: `{"foo":"bar"}`,
+          outputType: "application/json",
+        },
+      });
+
+      // Give the 10th one a bad payload
+      await prisma.taskRun.updateMany({
+        where: {
+          spanId: "bulk-10",
+        },
+        data: {
+          status: "COMPLETED_SUCCESSFULLY",
+          output: badPayload,
+          outputType: "application/json",
+        },
+      });
+
+      // Wait for replication
+      await setTimeout(5000);
+
+      // Query ClickHouse for all runs using FINAL
+      const queryRuns = clickhouse.reader.query({
+        name: "runs-replication-stress-bulk-insert",
+        query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL`,
+        schema: z.any(),
+      });
+
+      const [queryError, result] = await queryRuns({});
+      expect(queryError).toBeNull();
+      expect(result?.length).toBe(10);
+
+      // Check a few random runs for correctness
+      for (let i = 0; i < 9; i++) {
+        const expected = runsData[i];
+        const found = result?.find((r: any) => r.friendly_id === expected.friendlyId);
+        expect(found).toBeDefined();
+        expect(found).toEqual(
+          expect.objectContaining({
+            friendly_id: expected.friendlyId,
+            trace_id: expected.traceId,
+            task_identifier: expected.taskIdentifier,
+            status: "COMPLETED_SUCCESSFULLY",
+          })
+        );
+        expect(found?.output).toBeDefined();
+      }
+
+      // Check the run with the bad JSON
+      const foundBad = result?.find((r: any) => r.span_id === "bulk-10");
+      expect(foundBad).toBeDefined();
+      expect(foundBad?.output).toStrictEqual({});
+
+      await runsReplicationService.stop();
+    }
+  );
+
+  replicationContainerTest(
+    "should merge duplicate event+run.id combinations keeping the latest version",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      await prisma.$executeRawUnsafe(`ALTER TABLE public.\"TaskRun\" REPLICA IDENTITY FULL;`);
+
+      const clickhouse = new ClickHouse({
+        url: clickhouseContainer.getConnectionUrl(),
+        name: "runs-replication-merge-batch",
+        logLevel: "warn",
+      });
+
+      const runsReplicationService = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-merge-batch",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 1,
+        flushIntervalMs: 100,
+        flushBatchSize: 10, // Higher batch size to test merging
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      // Listen to batchFlushed events to verify merging
+      const batchFlushedEvents: Array<{
+        flushId: string;
+        taskRunInserts: any[];
+        payloadInserts: any[];
+      }> = [];
+
+      runsReplicationService.events.on("batchFlushed", (event) => {
+        batchFlushedEvents.push(event);
+      });
+
+      await runsReplicationService.start();
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test-merge-batch",
+          slug: "test-merge-batch",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test-merge-batch",
+          slug: "test-merge-batch",
+          organizationId: organization.id,
+          externalRef: "test-merge-batch",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-merge-batch",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test-merge-batch",
+          pkApiKey: "test-merge-batch",
+          shortcode: "test-merge-batch",
+        },
+      });
+
+      // Create a run and rapidly update it multiple times in a transaction
+      // This should create multiple events for the same run that get merged
+      const run = await prisma.taskRun.create({
+        data: {
+          friendlyId: `run_merge_${Date.now()}`,
+          taskIdentifier: "my-task-merge",
+          payload: JSON.stringify({ version: 1 }),
+          payloadType: "application/json",
+          traceId: `merge-${Date.now()}`,
+          spanId: `merge-${Date.now()}`,
+          queue: "test-merge-batch",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+          status: "PENDING_VERSION",
+        },
+      });
+      await prisma.taskRun.update({
+        where: { id: run.id },
+        data: { status: "DEQUEUED" },
+      });
+      await prisma.taskRun.update({
+        where: { id: run.id },
+        data: { status: "EXECUTING" },
+      });
+      await prisma.taskRun.update({
+        where: { id: run.id },
+        data: { status: "PAUSED" },
+      });
+      await prisma.taskRun.update({
+        where: { id: run.id },
+        data: { status: "EXECUTING" },
+      });
+      await prisma.taskRun.update({
+        where: { id: run.id },
+        data: { status: "COMPLETED_SUCCESSFULLY" },
+      });
+
+      await setTimeout(1000);
+
+      expect(batchFlushedEvents?.[0].taskRunInserts).toHaveLength(2);
+      // Use getTaskRunField for type-safe array access
+      expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[0], "run_id")).toEqual(run.id);
+      expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[0], "status")).toEqual(
+        "PENDING_VERSION"
+      );
+      expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[1], "run_id")).toEqual(run.id);
+      expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[1], "status")).toEqual(
+        "COMPLETED_SUCCESSFULLY"
+      );
+
+      await runsReplicationService.stop();
+    }
+  );
+});
diff --git a/apps/webapp/test/runsReplicationService.part4.test.ts b/apps/webapp/test/runsReplicationService.part4.test.ts
new file mode 100644
index 00000000000..835192ad0fb
--- /dev/null
+++ b/apps/webapp/test/runsReplicationService.part4.test.ts
@@ -0,0 +1,710 @@
+import { ClickHouse } from "@internal/clickhouse";
+import { replicationContainerTest } from "@internal/testcontainers";
+import { setTimeout } from "node:timers/promises";
+import { z } from "zod";
+import { TaskRunStatus } from "~/database-types";
+import { RunsReplicationService } from "~/services/runsReplicationService.server";
+import { createInMemoryTracing, createInMemoryMetrics } from "./utils/tracing";
+import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory";
+import superjson from "superjson";
+
+vi.setConfig({ testTimeout: 60_000 });
+
+describe("RunsReplicationService (part 4/7)", () => {
+  replicationContainerTest(
+    "should replicate updates to an existing TaskRun to ClickHouse",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
+
+      const clickhouse = new ClickHouse({
+        url: clickhouseContainer.getConnectionUrl(),
+        name: "runs-replication-update",
+        logLevel: "warn",
+      });
+
+      const runsReplicationService = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-update",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 1,
+        flushIntervalMs: 100,
+        flushBatchSize: 1,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      await runsReplicationService.start();
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test-update",
+          slug: "test-update",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test-update",
+          slug: "test-update",
+          organizationId: organization.id,
+          externalRef: "test-update",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-update",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test-update",
+          pkApiKey: "test-update",
+          shortcode: "test-update",
+        },
+      });
+
+      const uniqueFriendlyId = `run_update_${Date.now()}`;
+      const taskRun = await prisma.taskRun.create({
+        data: {
+          friendlyId: uniqueFriendlyId,
+          taskIdentifier: "my-task-update",
+          payload: JSON.stringify({ foo: "update-test" }),
+          payloadType: "application/json",
+          traceId: "update-1234",
+          spanId: "update-1234",
+          queue: "test-update",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+          status: "PENDING",
+        },
+      });
+
+      await setTimeout(1000);
+
+      await prisma.taskRun.update({
+        where: { id: taskRun.id },
+        data: { status: TaskRunStatus.COMPLETED_SUCCESSFULLY },
+      });
+
+      await setTimeout(1000);
+
+      const queryRuns = clickhouse.reader.query({
+        name: "runs-replication-update",
+        query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}",
+        schema: z.any(),
+        params: z.object({ run_id: z.string() }),
+      });
+
+      const [queryError, result] = await queryRuns({ run_id: taskRun.id });
+
+      expect(queryError).toBeNull();
+      expect(result?.length).toBe(1);
+      expect(result?.[0]).toEqual(
+        expect.objectContaining({
+          run_id: taskRun.id,
+          status: TaskRunStatus.COMPLETED_SUCCESSFULLY,
+        })
+      );
+
+      await runsReplicationService.stop();
+    }
+  );
+
+  replicationContainerTest(
+    "should replicate deletions of a TaskRun to ClickHouse and mark as deleted",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
+
+      const clickhouse = new ClickHouse({
+        url: clickhouseContainer.getConnectionUrl(),
+        name: "runs-replication-delete",
+        logLevel: "warn",
+      });
+
+      const runsReplicationService = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-delete",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 1,
+        flushIntervalMs: 100,
+        flushBatchSize: 1,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      await runsReplicationService.start();
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test-delete",
+          slug: "test-delete",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test-delete",
+          slug: "test-delete",
+          organizationId: organization.id,
+          externalRef: "test-delete",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-delete",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test-delete",
+          pkApiKey: "test-delete",
+          shortcode: "test-delete",
+        },
+      });
+
+      const uniqueFriendlyId = `run_delete_${Date.now()}`;
+      const taskRun = await prisma.taskRun.create({
+        data: {
+          friendlyId: uniqueFriendlyId,
+          taskIdentifier: "my-task-delete",
+          payload: JSON.stringify({ foo: "delete-test" }),
+          payloadType: "application/json",
+          traceId: "delete-1234",
+          spanId: "delete-1234",
+          queue: "test-delete",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+          status: "PENDING",
+        },
+      });
+
+      await setTimeout(1000);
+
+      await prisma.taskRun.delete({
+        where: { id: taskRun.id },
+      });
+
+      await setTimeout(1000);
+
+      const queryRuns = clickhouse.reader.query({
+        name: "runs-replication-delete",
+        query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}",
+        schema: z.any(),
+        params: z.object({ run_id: z.string() }),
+      });
+
+      const [queryError, result] = await queryRuns({ run_id: taskRun.id });
+
+      expect(queryError).toBeNull();
+      expect(result?.length).toBe(0);
+
+      await runsReplicationService.stop();
+    }
+  );
+
+  replicationContainerTest(
+    "should gracefully shutdown and allow a new service to pick up from the correct LSN (handover)",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
+
+      const clickhouse = new ClickHouse({
+        url: clickhouseContainer.getConnectionUrl(),
+        name: "runs-replication-shutdown-handover",
+        logLevel: "warn",
+      });
+
+      // Service A
+      const runsReplicationServiceA = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-shutdown-handover",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 1,
+        flushIntervalMs: 100,
+        flushBatchSize: 1,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      await runsReplicationServiceA.start();
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test-shutdown-handover",
+          slug: "test-shutdown-handover",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test-shutdown-handover",
+          slug: "test-shutdown-handover",
+          organizationId: organization.id,
+          externalRef: "test-shutdown-handover",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-shutdown-handover",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test-shutdown-handover",
+          pkApiKey: "test-shutdown-handover",
+          shortcode: "test-shutdown-handover",
+        },
+      });
+
+      const run1Id = `run_shutdown_handover_1_${Date.now()}`;
+
+      runsReplicationServiceA.events.on("message", async ({ message, service }) => {
+        if (message.tag === "insert") {
+          await service.shutdown();
+        }
+      });
+
+      const taskRun1 = await prisma.taskRun.create({
+        data: {
+          friendlyId: run1Id,
+          taskIdentifier: "my-task-shutdown-handover-1",
+          payload: JSON.stringify({ foo: "handover-1" }),
+          payloadType: "application/json",
+          traceId: "handover-1-1234",
+          spanId: "handover-1-1234",
+          queue: "test-shutdown-handover",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+          status: "PENDING",
+        },
+      });
+
+      const run2Id = `run_shutdown_handover_2_${Date.now()}`;
+      const taskRun2 = await prisma.taskRun.create({
+        data: {
+          friendlyId: run2Id,
+          taskIdentifier: "my-task-shutdown-handover-2",
+          payload: JSON.stringify({ foo: "handover-2" }),
+          payloadType: "application/json",
+          traceId: "handover-2-1234",
+          spanId: "handover-2-1234",
+          queue: "test-shutdown-handover",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+          status: "PENDING",
+        },
+      });
+
+      await setTimeout(1000);
+
+      const queryRuns = clickhouse.reader.query({
+        name: "runs-replication-shutdown-handover",
+        query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL ORDER BY created_at ASC",
+        schema: z.any(),
+      });
+      const [queryError, result] = await queryRuns({});
+      expect(queryError).toBeNull();
+      expect(result?.length).toBe(1);
+      expect(result?.[0]).toEqual(expect.objectContaining({ run_id: taskRun1.id }));
+
+      // Service B
+      const runsReplicationServiceB = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-shutdown-handover",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 1,
+        flushIntervalMs: 100,
+        flushBatchSize: 1,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      await runsReplicationServiceB.start();
+
+      await setTimeout(1000);
+
+      const [queryErrorB, resultB] = await queryRuns({});
+
+      expect(queryErrorB).toBeNull();
+      expect(resultB?.length).toBe(2);
+      expect(resultB).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({ run_id: taskRun1.id }),
+          expect.objectContaining({ run_id: taskRun2.id }),
+        ])
+      );
+
+      await runsReplicationServiceB.stop();
+    }
+  );
+
+  replicationContainerTest(
+    "should not re-process already handled data if shutdown is called after all transactions are processed",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
+
+      const clickhouse = new ClickHouse({
+        url: clickhouseContainer.getConnectionUrl(),
+        name: "runs-replication-shutdown-after-processed",
+        logLevel: "warn",
+      });
+
+      // Service A
+      const runsReplicationServiceA = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-shutdown-after-processed",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 1,
+        flushIntervalMs: 100,
+        flushBatchSize: 1,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      await runsReplicationServiceA.start();
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test-shutdown-after-processed",
+          slug: "test-shutdown-after-processed",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test-shutdown-after-processed",
+          slug: "test-shutdown-after-processed",
+          organizationId: organization.id,
+          externalRef: "test-shutdown-after-processed",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-shutdown-after-processed",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test-shutdown-after-processed",
+          pkApiKey: "test-shutdown-after-processed",
+          shortcode: "test-shutdown-after-processed",
+        },
+      });
+
+      const run1Id = `run_shutdown_after_processed_${Date.now()}`;
+      const taskRun1 = await prisma.taskRun.create({
+        data: {
+          friendlyId: run1Id,
+          taskIdentifier: "my-task-shutdown-after-processed",
+          payload: JSON.stringify({ foo: "after-processed" }),
+          payloadType: "application/json",
+          traceId: "after-processed-1234",
+          spanId: "after-processed-1234",
+          queue: "test-shutdown-after-processed",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+          status: "PENDING",
+        },
+      });
+
+      await setTimeout(1000);
+
+      const queryRuns = clickhouse.reader.query({
+        name: "runs-replication-shutdown-after-processed",
+        query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}",
+        schema: z.any(),
+        params: z.object({ run_id: z.string() }),
+      });
+
+      const [queryErrorA, resultA] = await queryRuns({ run_id: taskRun1.id });
+      expect(queryErrorA).toBeNull();
+      expect(resultA?.length).toBe(1);
+      expect(resultA?.[0]).toEqual(expect.objectContaining({ run_id: taskRun1.id }));
+
+      await runsReplicationServiceA.shutdown();
+
+      await setTimeout(500);
+
+      const taskRun2 = await prisma.taskRun.create({
+        data: {
+          friendlyId: `run_shutdown_after_processed_${Date.now()}`,
+          taskIdentifier: "my-task-shutdown-after-processed",
+          payload: JSON.stringify({ foo: "after-processed-2" }),
+          payloadType: "application/json",
+          traceId: "after-processed-2-1234",
+          spanId: "after-processed-2-1234",
+          queue: "test-shutdown-after-processed",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+          status: "PENDING",
+        },
+      });
+
+      // Service B
+      const runsReplicationServiceB = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-shutdown-after-processed",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 1,
+        flushIntervalMs: 100,
+        flushBatchSize: 1,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      await runsReplicationServiceB.start();
+
+      await setTimeout(1000);
+
+      const [queryErrorB, resultB] = await queryRuns({ run_id: taskRun2.id });
+      expect(queryErrorB).toBeNull();
+      expect(resultB?.length).toBe(1);
+      expect(resultB?.[0]).toEqual(expect.objectContaining({ run_id: taskRun2.id }));
+
+      await runsReplicationServiceB.stop();
+    }
+  );
+
+  replicationContainerTest(
+    "should record metrics with correct values when replicating runs",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
+
+      const clickhouse = new ClickHouse({
+        url: clickhouseContainer.getConnectionUrl(),
+        name: "runs-replication-metrics",
+        logLevel: "warn",
+      });
+
+      const { tracer } = createInMemoryTracing();
+      const metricsHelper = createInMemoryMetrics();
+
+      const runsReplicationService = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-metrics",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 2,
+        flushIntervalMs: 100,
+        flushBatchSize: 5,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        tracer,
+        meter: metricsHelper.meter,
+        logLevel: "warn",
+      });
+
+      await runsReplicationService.start();
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test-metrics",
+          slug: "test-metrics",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test-metrics",
+          slug: "test-metrics",
+          organizationId: organization.id,
+          externalRef: "test-metrics",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-metrics",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test-metrics",
+          pkApiKey: "test-metrics",
+          shortcode: "test-metrics",
+        },
+      });
+
+      const now = Date.now();
+      const createdRuns: string[] = [];
+
+      for (let i = 0; i < 5; i++) {
+        const run = await prisma.taskRun.create({
+          data: {
+            friendlyId: `run_metrics_${now}_${i}`,
+            taskIdentifier: "my-task-metrics",
+            payload: JSON.stringify({ index: i }),
+            payloadType: "application/json",
+            traceId: `metrics-${now}-${i}`,
+            spanId: `metrics-${now}-${i}`,
+            queue: "test-metrics",
+            runtimeEnvironmentId: runtimeEnvironment.id,
+            projectId: project.id,
+            organizationId: organization.id,
+            environmentType: "DEVELOPMENT",
+            engine: "V2",
+            status: "PENDING",
+          },
+        });
+        createdRuns.push(run.id);
+      }
+
+      await setTimeout(1000);
+
+      for (let i = 0; i < 3; i++) {
+        await prisma.taskRun.update({
+          where: { id: createdRuns[i] },
+          data: { status: "EXECUTING" },
+        });
+      }
+
+      await setTimeout(1000);
+
+      for (let i = 0; i < 2; i++) {
+        await prisma.taskRun.update({
+          where: { id: createdRuns[i] },
+          data: {
+            status: "COMPLETED_SUCCESSFULLY",
+            completedAt: new Date(),
+            output: JSON.stringify({ result: "success" }),
+            outputType: "application/json",
+          },
+        });
+      }
+
+      await setTimeout(1000);
+
+      const metrics = await metricsHelper.getMetrics();
+
+      function getMetricData(name: string) {
+        for (const resourceMetrics of metrics) {
+          for (const scopeMetrics of resourceMetrics.scopeMetrics) {
+            for (const metric of scopeMetrics.metrics) {
+              if (metric.descriptor.name === name) {
+                return metric;
+              }
+            }
+          }
+        }
+        return null;
+      }
+
+      function sumCounterValues(metric: any): number {
+        if (!metric?.dataPoints) return 0;
+        return metric.dataPoints.reduce((sum: number, dp: any) => sum + (dp.value || 0), 0);
+      }
+
+      function histogramHasData(metric: any): boolean {
+        if (!metric?.dataPoints || metric.dataPoints.length === 0) return false;
+        return metric.dataPoints.some((dp: any) => {
+          return (
+            (typeof dp.count === "number" && dp.count > 0) ||
+            (typeof dp.value?.count === "number" && dp.value.count > 0) ||
+            (Array.isArray(dp.buckets?.counts) && dp.buckets.counts.some((c: number) => c > 0)) ||
+            (typeof dp.sum === "number" && dp.sum > 0) ||
+            typeof dp.min === "number" ||
+            typeof dp.max === "number"
+          );
+        });
+      }
+
+      function getCounterAttributeValues(metric: any, attributeName: string): unknown[] {
+        if (!metric?.dataPoints) return [];
+        return metric.dataPoints
+          .filter((dp: any) => dp.attributes?.[attributeName] !== undefined)
+          .map((dp: any) => dp.attributes[attributeName]);
+      }
+
+      const batchesFlushed = getMetricData("runs_replication.batches_flushed");
+      expect(batchesFlushed).not.toBeNull();
+      const totalBatchesFlushed = sumCounterValues(batchesFlushed);
+      expect(totalBatchesFlushed).toBeGreaterThanOrEqual(1);
+
+      const successAttributeValues = getCounterAttributeValues(batchesFlushed, "success");
+      expect(successAttributeValues.length).toBeGreaterThanOrEqual(1);
+
+      const taskRunsInserted = getMetricData("runs_replication.task_runs_inserted");
+      expect(taskRunsInserted).not.toBeNull();
+      const totalTaskRunsInserted = sumCounterValues(taskRunsInserted);
+      expect(totalTaskRunsInserted).toBeGreaterThanOrEqual(5);
+
+      const payloadsInserted = getMetricData("runs_replication.payloads_inserted");
+      expect(payloadsInserted).not.toBeNull();
+      const totalPayloadsInserted = sumCounterValues(payloadsInserted);
+      expect(totalPayloadsInserted).toBeGreaterThanOrEqual(1);
+
+      const eventsProcessed = getMetricData("runs_replication.events_processed");
+      expect(eventsProcessed).not.toBeNull();
+      const totalEventsProcessed = sumCounterValues(eventsProcessed);
+      expect(totalEventsProcessed).toBeGreaterThanOrEqual(1);
+
+      const eventTypes = getCounterAttributeValues(eventsProcessed, "event_type");
+      expect(eventTypes.length).toBeGreaterThanOrEqual(1);
+      expect(eventTypes).toContain("insert");
+
+      const batchSize = getMetricData("runs_replication.batch_size");
+      expect(batchSize).not.toBeNull();
+      expect(histogramHasData(batchSize)).toBe(true);
+
+      const replicationLag = getMetricData("runs_replication.replication_lag_ms");
+      expect(replicationLag).not.toBeNull();
+      expect(histogramHasData(replicationLag)).toBe(true);
+
+      const flushDuration = getMetricData("runs_replication.flush_duration_ms");
+      expect(flushDuration).not.toBeNull();
+      expect(histogramHasData(flushDuration)).toBe(true);
+
+      await runsReplicationService.stop();
+      await metricsHelper.shutdown();
+    }
+  );
+});
diff --git a/apps/webapp/test/runsReplicationService.part5.test.ts b/apps/webapp/test/runsReplicationService.part5.test.ts
new file mode 100644
index 00000000000..3263efae7b8
--- /dev/null
+++ b/apps/webapp/test/runsReplicationService.part5.test.ts
@@ -0,0 +1,147 @@
+import { ClickHouse, getTaskRunField, getPayloadField } from "@internal/clickhouse";
+import { replicationContainerTest } from "@internal/testcontainers";
+import { Logger } from "@trigger.dev/core/logger";
+import { readFile } from "node:fs/promises";
+import { setTimeout } from "node:timers/promises";
+import { z } from "zod";
+import { RunsReplicationService } from "~/services/runsReplicationService.server";
+import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings";
+import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory";
+
+vi.setConfig({ testTimeout: 60_000 });
+
+describe("RunsReplicationService (part 5/7)", () => {
+  replicationContainerTest(
+    "should replicate all events in a single transaction (insert, update)",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
+
+      const clickhouse = new ClickHouse({
+        url: clickhouseContainer.getConnectionUrl(),
+        name: "runs-replication-multi-event-tx",
+        logLevel: "warn",
+      });
+
+      const runsReplicationService = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-multi-event-tx",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 1,
+        flushIntervalMs: 100,
+        flushBatchSize: 10,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      await runsReplicationService.start();
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test-multi-event-tx",
+          slug: "test-multi-event-tx",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test-multi-event-tx",
+          slug: "test-multi-event-tx",
+          organizationId: organization.id,
+          externalRef: "test-multi-event-tx",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-multi-event-tx",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test-multi-event-tx",
+          pkApiKey: "test-multi-event-tx",
+          shortcode: "test-multi-event-tx",
+        },
+      });
+
+      // Start a transaction
+      const [run1, run2] = await prisma.$transaction(async (tx) => {
+        const run1 = await tx.taskRun.create({
+          data: {
+            friendlyId: `run_multi_event_1_${Date.now()}`,
+            taskIdentifier: "my-task-multi-event-1",
+            payload: JSON.stringify({ multi: 1 }),
+            payloadType: "application/json",
+            traceId: `multi-1-${Date.now()}`,
+            spanId: `multi-1-${Date.now()}`,
+            queue: "test-multi-event-tx",
+            runtimeEnvironmentId: runtimeEnvironment.id,
+            projectId: project.id,
+            organizationId: organization.id,
+            environmentType: "DEVELOPMENT",
+            engine: "V2",
+            status: "PENDING",
+            attemptNumber: 1,
+            createdAt: new Date(),
+            updatedAt: new Date(),
+          },
+        });
+        const run2 = await tx.taskRun.create({
+          data: {
+            friendlyId: `run_multi_event_2_${Date.now()}`,
+            taskIdentifier: "my-task-multi-event-2",
+            payload: JSON.stringify({ multi: 2 }),
+            payloadType: "application/json",
+            traceId: `multi-2-${Date.now()}`,
+            spanId: `multi-2-${Date.now()}`,
+            queue: "test-multi-event-tx",
+            runtimeEnvironmentId: runtimeEnvironment.id,
+            projectId: project.id,
+            organizationId: organization.id,
+            environmentType: "DEVELOPMENT",
+            engine: "V2",
+            status: "PENDING",
+            attemptNumber: 1,
+            createdAt: new Date(),
+            updatedAt: new Date(),
+          },
+        });
+        await tx.taskRun.update({
+          where: { id: run1.id },
+          data: { status: "COMPLETED_SUCCESSFULLY" },
+        });
+
+        return [run1, run2];
+      });
+
+      // Wait for replication
+      await setTimeout(1000);
+
+      // Query ClickHouse for both runs using FINAL
+      const queryRuns = clickhouse.reader.query({
+        name: "runs-replication-multi-event-tx",
+        query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id IN ({run_id_1:String}, {run_id_2:String})`,
+        schema: z.any(),
+        params: z.object({ run_id_1: z.string(), run_id_2: z.string() }),
+      });
+
+      const [queryError, result] = await queryRuns({ run_id_1: run1.id, run_id_2: run2.id });
+      expect(queryError).toBeNull();
+      expect(result?.length).toBe(2);
+      const run1Result = result?.find((r: any) => r.run_id === run1.id);
+      const run2Result = result?.find((r: any) => r.run_id === run2.id);
+      expect(run1Result).toBeDefined();
+      expect(run1Result).toEqual(
+        expect.objectContaining({ run_id: run1.id, status: "COMPLETED_SUCCESSFULLY" })
+      );
+      expect(run2Result).toBeDefined();
+      expect(run2Result).toEqual(expect.objectContaining({ run_id: run2.id }));
+
+      await runsReplicationService.stop();
+    }
+  );
+});
diff --git a/apps/webapp/test/runsReplicationService.part6.test.ts b/apps/webapp/test/runsReplicationService.part6.test.ts
new file mode 100644
index 00000000000..276920f8491
--- /dev/null
+++ b/apps/webapp/test/runsReplicationService.part6.test.ts
@@ -0,0 +1,536 @@
+import { ClickHouse, getTaskRunField, getPayloadField } from "@internal/clickhouse";
+import { replicationContainerTest } from "@internal/testcontainers";
+import { Logger } from "@trigger.dev/core/logger";
+import { readFile } from "node:fs/promises";
+import { setTimeout } from "node:timers/promises";
+import { z } from "zod";
+import { RunsReplicationService } from "~/services/runsReplicationService.server";
+import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings";
+import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory";
+
+vi.setConfig({ testTimeout: 60_000 });
+
+describe("RunsReplicationService (part 6/7)", () => {
+  replicationContainerTest(
+    "should sort batch inserts according to table schema ordering for optimal performance",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      await prisma.$executeRawUnsafe(`ALTER TABLE public.\"TaskRun\" REPLICA IDENTITY FULL;`);
+
+      const clickhouse = new ClickHouse({
+        url: clickhouseContainer.getConnectionUrl(),
+        name: "runs-replication-sorting",
+        logLevel: "warn",
+      });
+
+      const runsReplicationService = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-sorting",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 1,
+        flushIntervalMs: 100,
+        flushBatchSize: 10,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      // Listen to batchFlushed events to verify sorting
+      const batchFlushedEvents: Array<{
+        flushId: string;
+        taskRunInserts: any[];
+        payloadInserts: any[];
+      }> = [];
+
+      runsReplicationService.events.on("batchFlushed", (event) => {
+        batchFlushedEvents.push(event);
+      });
+
+      await runsReplicationService.start();
+
+      // Create two organizations to test sorting by organization_id
+      const org1 = await prisma.organization.create({
+        data: { title: "org-z", slug: "org-z" },
+      });
+
+      const org2 = await prisma.organization.create({
+        data: { title: "org-a", slug: "org-a" },
+      });
+
+      const project1 = await prisma.project.create({
+        data: {
+          name: "test-sorting-z",
+          slug: "test-sorting-z",
+          organizationId: org1.id,
+          externalRef: "test-sorting-z",
+        },
+      });
+
+      const project2 = await prisma.project.create({
+        data: {
+          name: "test-sorting-a",
+          slug: "test-sorting-a",
+          organizationId: org2.id,
+          externalRef: "test-sorting-a",
+        },
+      });
+
+      const env1 = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-sorting-z",
+          type: "DEVELOPMENT",
+          projectId: project1.id,
+          organizationId: org1.id,
+          apiKey: "test-sorting-z",
+          pkApiKey: "test-sorting-z",
+          shortcode: "test-sorting-z",
+        },
+      });
+
+      const env2 = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-sorting-a",
+          type: "DEVELOPMENT",
+          projectId: project2.id,
+          organizationId: org2.id,
+          apiKey: "test-sorting-a",
+          pkApiKey: "test-sorting-a",
+          shortcode: "test-sorting-a",
+        },
+      });
+
+      const now = Date.now();
+
+      const run1 = await prisma.taskRun.create({
+        data: {
+          friendlyId: `run_sort_org_z_${now}`,
+          taskIdentifier: "my-task-sort",
+          payload: JSON.stringify({ org: "z" }),
+          payloadType: "application/json",
+          traceId: `sort-z-${now}`,
+          spanId: `sort-z-${now}`,
+          queue: "test-sorting",
+          runtimeEnvironmentId: env1.id,
+          projectId: project1.id,
+          organizationId: org1.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+          status: "PENDING",
+          createdAt: new Date(now + 2000),
+        },
+      });
+      await prisma.taskRun.update({
+        where: { id: run1.id },
+        data: { status: "DEQUEUED" },
+      });
+
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: `run_sort_org_a_${now}`,
+          taskIdentifier: "my-task-sort",
+          payload: JSON.stringify({ org: "a" }),
+          payloadType: "application/json",
+          traceId: `sort-a-${now}`,
+          spanId: `sort-a-${now}`,
+          queue: "test-sorting",
+          runtimeEnvironmentId: env2.id,
+          projectId: project2.id,
+          organizationId: org2.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+          status: "PENDING",
+          createdAt: new Date(now + 1000),
+        },
+      });
+
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: `run_sort_org_a_${now}_2`,
+          taskIdentifier: "my-task-sort",
+          payload: JSON.stringify({ org: "a" }),
+          payloadType: "application/json",
+          traceId: `sort-a-${now}`,
+          spanId: `sort-a-${now}`,
+          queue: "test-sorting",
+          runtimeEnvironmentId: env2.id,
+          projectId: project2.id,
+          organizationId: org2.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+          status: "PENDING",
+          createdAt: new Date(now),
+        },
+      });
+
+      await setTimeout(1000);
+
+      expect(batchFlushedEvents[0]?.taskRunInserts.length).toBeGreaterThan(1);
+      expect(batchFlushedEvents[0]?.payloadInserts.length).toBeGreaterThan(1);
+
+      // Verify sorting order: organization_id, project_id, environment_id, created_at, run_id
+      for (let i = 1; i < batchFlushedEvents[0]?.taskRunInserts.length; i++) {
+        const prev = batchFlushedEvents[0]!.taskRunInserts[i - 1];
+        const curr = batchFlushedEvents[0]!.taskRunInserts[i];
+
+        const prevKey = [
+          getTaskRunField(prev, "organization_id"),
+          getTaskRunField(prev, "project_id"),
+          getTaskRunField(prev, "environment_id"),
+          getTaskRunField(prev, "created_at"),
+          getTaskRunField(prev, "run_id"),
+        ];
+        const currKey = [
+          getTaskRunField(curr, "organization_id"),
+          getTaskRunField(curr, "project_id"),
+          getTaskRunField(curr, "environment_id"),
+          getTaskRunField(curr, "created_at"),
+          getTaskRunField(curr, "run_id"),
+        ];
+
+        const keysAreEqual = prevKey.every((val, idx) => val === currKey[idx]);
+        if (keysAreEqual) {
+          // Also valid order
+          continue;
+        }
+
+        // Compare tuples lexicographically
+        let isCorrectOrder = false;
+        for (let j = 0; j < prevKey.length; j++) {
+          if (prevKey[j] < currKey[j]) {
+            isCorrectOrder = true;
+            break;
+          }
+          if (prevKey[j] > currKey[j]) {
+            isCorrectOrder = false;
+            break;
+          }
+          // If equal, continue to next field
+        }
+
+        expect(isCorrectOrder).toBeTruthy();
+      }
+
+      // Verify payloadInserts are also sorted by run_id
+      for (let i = 1; i < batchFlushedEvents[0]?.payloadInserts.length; i++) {
+        const prev = batchFlushedEvents[0]!.payloadInserts[i - 1];
+        const curr = batchFlushedEvents[0]!.payloadInserts[i];
+        expect(getPayloadField(prev, "run_id") <= getPayloadField(curr, "run_id")).toBeTruthy();
+      }
+
+      await runsReplicationService.stop();
+    }
+  );
+
+  replicationContainerTest(
+    "should exhaustively replicate all TaskRun columns to ClickHouse",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
+
+      const clickhouse = new ClickHouse({
+        url: clickhouseContainer.getConnectionUrl(),
+        name: "runs-replication-exhaustive",
+        logLevel: "warn",
+      });
+
+      const runsReplicationService = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-exhaustive",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 1,
+        flushIntervalMs: 100,
+        flushBatchSize: 1,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      await runsReplicationService.start();
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test-exhaustive",
+          slug: "test-exhaustive",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test-exhaustive",
+          slug: "test-exhaustive",
+          organizationId: organization.id,
+          externalRef: "test-exhaustive",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-exhaustive",
+          type: "PRODUCTION",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test-exhaustive",
+          pkApiKey: "test-exhaustive",
+          shortcode: "test-exhaustive",
+        },
+      });
+
+      // Create a batch for the batchId field
+      const batch = await prisma.batchTaskRun.create({
+        data: {
+          friendlyId: "batch_exhaustive",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          status: "PENDING",
+        },
+      });
+
+      // Create a root run for the rootTaskRunId field
+      const rootRun = await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_root_exhaustive",
+          taskIdentifier: "root-task",
+          payload: JSON.stringify({ root: true }),
+          traceId: "root-trace-id",
+          spanId: "root-span-id",
+          queue: "root-queue",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "PRODUCTION",
+          engine: "V2",
+        },
+      });
+
+      // Create a parent run for the parentTaskRunId field
+      const parentRun = await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_parent_exhaustive",
+          taskIdentifier: "parent-task",
+          payload: JSON.stringify({ parent: true }),
+          traceId: "parent-trace-id",
+          spanId: "parent-span-id",
+          queue: "parent-queue",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "PRODUCTION",
+          engine: "V2",
+          rootTaskRunId: rootRun.id,
+          depth: 1,
+        },
+      });
+
+      // Set up all the dates we'll use
+      const now = new Date();
+      const createdAt = new Date(now.getTime() - 10000);
+      const updatedAt = new Date(now.getTime() - 5000);
+      const startedAt = new Date(now.getTime() - 8000);
+      const executedAt = new Date(now.getTime() - 7500);
+      const completedAt = new Date(now.getTime() - 6000);
+      const delayUntil = new Date(now.getTime() - 9000);
+      const queuedAt = new Date(now.getTime() - 9500);
+      const expiredAt = null; // Not expired
+
+      // Create the main task run with ALL fields populated
+      const taskRun = await prisma.taskRun.create({
+        data: {
+          // Core identifiers
+          friendlyId: "run_exhaustive_test",
+          taskIdentifier: "exhaustive-task",
+
+          // Environment/project/org
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "PRODUCTION",
+
+          // Engine and execution
+          engine: "V2",
+          status: "COMPLETED_SUCCESSFULLY",
+          attemptNumber: 3,
+          queue: "exhaustive-queue",
+          workerQueue: "exhaustive-worker-queue",
+
+          // Relationships
+          // Note: scheduleId is not set to test empty string handling
+          batchId: batch.id,
+          rootTaskRunId: rootRun.id,
+          parentTaskRunId: parentRun.id,
+          depth: 2,
+
+          // Timestamps
+          createdAt,
+          updatedAt,
+          startedAt,
+          executedAt,
+          completedAt,
+          delayUntil,
+          queuedAt,
+          expiredAt,
+
+          // Payload and output
+          payload: JSON.stringify({ input: "test-payload" }),
+          payloadType: "application/json",
+          output: JSON.stringify({ result: "test-output" }),
+          outputType: "application/json",
+          error: { message: "test error", name: "TestError" },
+
+          // Tracing
+          traceId: "exhaustive-trace-id-12345",
+          spanId: "exhaustive-span-id-67890",
+
+          // Versioning
+          taskVersion: "1.2.3",
+          sdkVersion: "3.0.0",
+          cliVersion: "2.5.1",
+
+          // Execution settings
+          machinePreset: "large-1x",
+          idempotencyKey: "exhaustive-idempotency-key-hashed",
+          idempotencyKeyOptions: {
+            key: "exhaustive-idempotency-key",
+            scope: "run",
+          },
+          ttl: "1h",
+          isTest: true,
+          concurrencyKey: "exhaustive-concurrency-key",
+          maxDurationInSeconds: 3600,
+
+          // Tags and bulk actions
+          runTags: ["tag1", "tag2", "exhaustive-tag"],
+          bulkActionGroupIds: ["bulk-group-1", "bulk-group-2"],
+
+          // Usage metrics
+          usageDurationMs: 12345,
+          costInCents: 50,
+          baseCostInCents: 25,
+        },
+      });
+
+      // Wait for replication
+      await setTimeout(1500);
+
+      // Query ClickHouse directly to get all columns
+      const queryRuns = clickhouse.reader.query({
+        name: "exhaustive-replication-test",
+        query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}",
+        schema: z.any(),
+        params: z.object({ run_id: z.string() }),
+      });
+
+      const [queryError, result] = await queryRuns({ run_id: taskRun.id });
+
+      expect(queryError).toBeNull();
+      expect(result).toHaveLength(1);
+
+      const clickhouseRun = result![0];
+
+      // Exhaustively verify each column
+      // Core identifiers
+      expect(clickhouseRun.run_id).toBe(taskRun.id);
+      expect(clickhouseRun.friendly_id).toBe("run_exhaustive_test");
+      expect(clickhouseRun.task_identifier).toBe("exhaustive-task");
+
+      // Environment/project/org
+      expect(clickhouseRun.environment_id).toBe(runtimeEnvironment.id);
+      expect(clickhouseRun.project_id).toBe(project.id);
+      expect(clickhouseRun.organization_id).toBe(organization.id);
+      expect(clickhouseRun.environment_type).toBe("PRODUCTION");
+
+      // Engine and execution
+      expect(clickhouseRun.engine).toBe("V2");
+      expect(clickhouseRun.status).toBe("COMPLETED_SUCCESSFULLY");
+      expect(clickhouseRun.attempt).toBe(3);
+      expect(clickhouseRun.queue).toBe("exhaustive-queue");
+      expect(clickhouseRun.worker_queue).toBe("exhaustive-worker-queue");
+
+      // Relationships
+      expect(clickhouseRun.schedule_id).toBe(""); // Empty when not set
+      expect(clickhouseRun.batch_id).toBe(batch.id);
+      expect(clickhouseRun.root_run_id).toBe(rootRun.id);
+      expect(clickhouseRun.parent_run_id).toBe(parentRun.id);
+      expect(clickhouseRun.depth).toBe(2);
+
+      // Timestamps (ClickHouse returns DateTime64 as strings in UTC without 'Z' suffix)
+      // Helper to parse ClickHouse timestamp strings to milliseconds
+      function parseClickhouseTimestamp(ts: string | null): number | null {
+        if (ts === null || ts === "1970-01-01 00:00:00.000") return null;
+        return new Date(ts + "Z").getTime();
+      }
+
+      expect(parseClickhouseTimestamp(clickhouseRun.created_at)).toBe(createdAt.getTime());
+      expect(parseClickhouseTimestamp(clickhouseRun.updated_at)).toBe(updatedAt.getTime());
+      expect(parseClickhouseTimestamp(clickhouseRun.started_at)).toBe(startedAt.getTime());
+      expect(parseClickhouseTimestamp(clickhouseRun.executed_at)).toBe(executedAt.getTime());
+      expect(parseClickhouseTimestamp(clickhouseRun.completed_at)).toBe(completedAt.getTime());
+      expect(parseClickhouseTimestamp(clickhouseRun.delay_until)).toBe(delayUntil.getTime());
+      expect(parseClickhouseTimestamp(clickhouseRun.queued_at)).toBe(queuedAt.getTime());
+      expect(parseClickhouseTimestamp(clickhouseRun.expired_at)).toBeNull();
+
+      // Output (parsed JSON)
+      expect(clickhouseRun.output).toEqual({ data: { result: "test-output" } });
+
+      // Error
+      expect(clickhouseRun.error).toEqual({
+        data: { message: "test error", name: "TestError" },
+      });
+
+      // Tracing
+      expect(clickhouseRun.trace_id).toBe("exhaustive-trace-id-12345");
+      expect(clickhouseRun.span_id).toBe("exhaustive-span-id-67890");
+
+      // Versioning
+      expect(clickhouseRun.task_version).toBe("1.2.3");
+      expect(clickhouseRun.sdk_version).toBe("3.0.0");
+      expect(clickhouseRun.cli_version).toBe("2.5.1");
+
+      // Execution settings
+      expect(clickhouseRun.machine_preset).toBe("large-1x");
+      expect(clickhouseRun.idempotency_key).toBe("exhaustive-idempotency-key-hashed");
+      expect(clickhouseRun.idempotency_key_user).toBe("exhaustive-idempotency-key");
+      expect(clickhouseRun.idempotency_key_scope).toBe("run");
+      expect(clickhouseRun.expiration_ttl).toBe("1h");
+      expect(clickhouseRun.is_test).toBe(1); // ClickHouse returns booleans as integers
+      expect(clickhouseRun.concurrency_key).toBe("exhaustive-concurrency-key");
+      expect(clickhouseRun.max_duration_in_seconds).toBe(3600);
+
+      // Tags and bulk actions
+      expect(clickhouseRun.tags).toEqual(["tag1", "tag2", "exhaustive-tag"]);
+      expect(clickhouseRun.bulk_action_group_ids).toEqual(["bulk-group-1", "bulk-group-2"]);
+
+      // Usage metrics
+      expect(clickhouseRun.usage_duration_ms).toBe(12345);
+      expect(clickhouseRun.cost_in_cents).toBe(50);
+      expect(clickhouseRun.base_cost_in_cents).toBe(25);
+
+      // Internal ClickHouse columns
+      expect(clickhouseRun._is_deleted).toBe(0);
+      expect(clickhouseRun._version).toBeDefined();
+      expect(typeof clickhouseRun._version).toBe("number"); // ClickHouse returns UInt64 as number
+
+      // Also verify the payload was inserted into the payloads table
+      const queryPayloads = clickhouse.reader.query({
+        name: "exhaustive-payload-test",
+        query: "SELECT * FROM trigger_dev.raw_task_runs_payload_v1 WHERE run_id = {run_id:String}",
+        schema: z.any(),
+        params: z.object({ run_id: z.string() }),
+      });
+
+      const [payloadError, payloadResult] = await queryPayloads({ run_id: taskRun.id });
+
+      expect(payloadError).toBeNull();
+      expect(payloadResult).toHaveLength(1);
+      expect(payloadResult![0].run_id).toBe(taskRun.id);
+      expect(parseClickhouseTimestamp(payloadResult![0].created_at)).toBe(createdAt.getTime());
+      expect(payloadResult![0].payload).toEqual({ data: { input: "test-payload" } });
+
+      await runsReplicationService.stop();
+    }
+  );
+});
diff --git a/apps/webapp/test/runsReplicationService.part7.test.ts b/apps/webapp/test/runsReplicationService.part7.test.ts
new file mode 100644
index 00000000000..4f091d8eb4c
--- /dev/null
+++ b/apps/webapp/test/runsReplicationService.part7.test.ts
@@ -0,0 +1,120 @@
+import { ClickHouse, getTaskRunField, getPayloadField } from "@internal/clickhouse";
+import { replicationContainerTest } from "@internal/testcontainers";
+import { Logger } from "@trigger.dev/core/logger";
+import { readFile } from "node:fs/promises";
+import { setTimeout } from "node:timers/promises";
+import { z } from "zod";
+import { RunsReplicationService } from "~/services/runsReplicationService.server";
+import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings";
+import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory";
+
+vi.setConfig({ testTimeout: 60_000 });
+
+describe("RunsReplicationService (part 7/7)", () => {
+  replicationContainerTest(
+    "should be able to handle processing transactions for a long period of time",
+    { timeout: 60_000 * 5 },
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`);
+
+      const clickhouse = new ClickHouse({
+        url: clickhouseContainer.getConnectionUrl(),
+        name: "runs-replication-long-tx",
+        logLevel: "warn",
+      });
+
+      const runsReplicationService = new RunsReplicationService({
+        clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse),
+        pgConnectionUrl: postgresContainer.getConnectionUri(),
+        serviceName: "runs-replication-long-tx",
+        slotName: "task_runs_to_clickhouse_v1",
+        publicationName: "task_runs_to_clickhouse_v1_publication",
+        redisOptions,
+        maxFlushConcurrency: 1,
+        flushIntervalMs: 100,
+        flushBatchSize: 10,
+        leaderLockTimeoutMs: 5000,
+        leaderLockExtendIntervalMs: 1000,
+        ackIntervalSeconds: 5,
+        logLevel: "warn",
+      });
+
+      await runsReplicationService.start();
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test-long-tx",
+          slug: "test-long-tx",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test-long-tx",
+          slug: "test-long-tx",
+          organizationId: organization.id,
+          externalRef: "test-long-tx",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test-long-tx",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test-long-tx",
+          pkApiKey: "test-long-tx",
+          shortcode: "test-long-tx",
+        },
+      });
+
+      // Start an interval that will create a new run every 500ms for 4 minutes
+      const interval = setInterval(async () => {
+        await prisma.taskRun.create({
+          data: {
+            friendlyId: `run_long_tx_${Date.now()}`,
+            taskIdentifier: "my-task-long-tx",
+            payload: JSON.stringify({ long: 1 }),
+            payloadType: "application/json",
+            traceId: `long-${Date.now()}`,
+            spanId: `long-${Date.now()}`,
+            queue: "test-long-tx",
+            runtimeEnvironmentId: runtimeEnvironment.id,
+            projectId: project.id,
+            organizationId: organization.id,
+            environmentType: "DEVELOPMENT",
+            engine: "V2",
+            status: "PENDING",
+            attemptNumber: 1,
+            createdAt: new Date(),
+            updatedAt: new Date(),
+          },
+        });
+      }, 500);
+
+      // Wait for 1 minute
+      await setTimeout(1 * 60 * 1000);
+
+      // Stop the interval
+      clearInterval(interval);
+
+      // Wait for replication
+      await setTimeout(1000);
+
+      // Query ClickHouse for all runs using FINAL
+      const queryRuns = clickhouse.reader.query({
+        name: "runs-replication-long-tx",
+        query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL`,
+        schema: z.any(),
+      });
+
+      const [queryError, result] = await queryRuns({});
+      expect(queryError).toBeNull();
+
+      expect(result?.length).toBeGreaterThanOrEqual(50);
+
+      await runsReplicationService.stop();
+    }
+  );
+});
diff --git a/apps/webapp/test/runsRepository.part1.test.ts b/apps/webapp/test/runsRepository.part1.test.ts
index 45d91ad44e7..e33f4464db3 100644
--- a/apps/webapp/test/runsRepository.part1.test.ts
+++ b/apps/webapp/test/runsRepository.part1.test.ts
@@ -6,15 +6,15 @@ vi.mock("~/db.server", () => ({
   $replica: {},
 }));
 
-import { containerTest } from "@internal/testcontainers";
+import { replicationContainerTest } from "@internal/testcontainers";
 import { setTimeout } from "node:timers/promises";
 import { RunsRepository } from "~/services/runsRepository/runsRepository.server";
 import { setupClickhouseReplication } from "./utils/replicationUtils";
 
 vi.setConfig({ testTimeout: 60_000 });
 
-describe("RunsRepository (part 1/2)", () => {
-  containerTest(
+describe("RunsRepository (part 1/4)", () => {
+  replicationContainerTest(
     "should list runs, using clickhouse as the source",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       const { clickhouse } = await setupClickhouseReplication({
@@ -90,7 +90,7 @@ describe("RunsRepository (part 1/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should filter runs by task identifiers",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       const { clickhouse } = await setupClickhouseReplication({
@@ -198,7 +198,7 @@ describe("RunsRepository (part 1/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should filter runs by task versions",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       const { clickhouse } = await setupClickhouseReplication({
@@ -309,7 +309,7 @@ describe("RunsRepository (part 1/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should filter runs by status",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       const { clickhouse } = await setupClickhouseReplication({
@@ -419,331 +419,4 @@ describe("RunsRepository (part 1/2)", () => {
       expect(runs.map((r) => r.status).sort()).toEqual(["COMPLETED_SUCCESSFULLY", "PENDING"]);
     }
   );
-
-  containerTest(
-    "should filter runs by tags",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      const { clickhouse } = await setupClickhouseReplication({
-        prisma,
-        databaseUrl: postgresContainer.getConnectionUri(),
-        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
-        redisOptions,
-      });
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test",
-          slug: "test",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test",
-          slug: "test",
-          organizationId: organization.id,
-          externalRef: "test",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test",
-          pkApiKey: "test",
-          shortcode: "test",
-        },
-      });
-
-      // Create runs with different tags
-      const taskRun1 = await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_urgent",
-          taskIdentifier: "my-task",
-          runTags: ["urgent", "production"],
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1234",
-          spanId: "1234",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      const taskRun2 = await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_regular",
-          taskIdentifier: "my-task",
-          runTags: ["regular", "development"],
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1235",
-          spanId: "1235",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      const taskRun3 = await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_urgent_dev",
-          taskIdentifier: "my-task",
-          runTags: ["urgent", "development"],
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1236",
-          spanId: "1236",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await setTimeout(1000);
-
-      const runsRepository = new RunsRepository({
-        prisma,
-        clickhouse,
-      });
-
-      // Test filtering by tags
-      const { runs } = await runsRepository.listRuns({
-        page: { size: 10 },
-        projectId: project.id,
-        environmentId: runtimeEnvironment.id,
-        organizationId: organization.id,
-        tags: ["urgent"],
-      });
-
-      expect(runs).toHaveLength(2);
-      expect(runs.map((r) => r.friendlyId).sort()).toEqual(["run_urgent", "run_urgent_dev"]);
-    }
-  );
-
-  containerTest(
-    "should filter runs by scheduleId",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      const { clickhouse } = await setupClickhouseReplication({
-        prisma,
-        databaseUrl: postgresContainer.getConnectionUri(),
-        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
-        redisOptions,
-      });
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test",
-          slug: "test",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test",
-          slug: "test",
-          organizationId: organization.id,
-          externalRef: "test",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test",
-          pkApiKey: "test",
-          shortcode: "test",
-        },
-      });
-
-      // Create runs with different schedule IDs
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_scheduled_1",
-          taskIdentifier: "my-task",
-          scheduleId: "schedule_1",
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1234",
-          spanId: "1234",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_scheduled_2",
-          taskIdentifier: "my-task",
-          scheduleId: "schedule_2",
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1235",
-          spanId: "1235",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_unscheduled",
-          taskIdentifier: "my-task",
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1236",
-          spanId: "1236",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await setTimeout(1000);
-
-      const runsRepository = new RunsRepository({
-        prisma,
-        clickhouse,
-      });
-
-      // Test filtering by schedule ID
-      const { runs } = await runsRepository.listRuns({
-        page: { size: 10 },
-        projectId: project.id,
-        environmentId: runtimeEnvironment.id,
-        organizationId: organization.id,
-        scheduleId: "schedule_1",
-      });
-
-      expect(runs).toHaveLength(1);
-      expect(runs[0].friendlyId).toBe("run_scheduled_1");
-    }
-  );
-
-  containerTest(
-    "should filter runs by isTest flag",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      const { clickhouse } = await setupClickhouseReplication({
-        prisma,
-        databaseUrl: postgresContainer.getConnectionUri(),
-        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
-        redisOptions,
-      });
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test",
-          slug: "test",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test",
-          slug: "test",
-          organizationId: organization.id,
-          externalRef: "test",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test",
-          pkApiKey: "test",
-          shortcode: "test",
-        },
-      });
-
-      // Create test and non-test runs
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_test",
-          taskIdentifier: "my-task",
-          isTest: true,
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1234",
-          spanId: "1234",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_production",
-          taskIdentifier: "my-task",
-          isTest: false,
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1235",
-          spanId: "1235",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await setTimeout(1000);
-
-      const runsRepository = new RunsRepository({
-        prisma,
-        clickhouse,
-      });
-
-      // Test filtering by isTest=true
-      const testRuns = await runsRepository.listRuns({
-        page: { size: 10 },
-        projectId: project.id,
-        environmentId: runtimeEnvironment.id,
-        organizationId: organization.id,
-        isTest: true,
-      });
-
-      expect(testRuns.runs).toHaveLength(1);
-      expect(testRuns.runs[0].friendlyId).toBe("run_test");
-
-      // Test filtering by isTest=false
-      const productionRuns = await runsRepository.listRuns({
-        page: { size: 10 },
-        projectId: project.id,
-        environmentId: runtimeEnvironment.id,
-        organizationId: organization.id,
-        isTest: false,
-      });
-
-      expect(productionRuns.runs).toHaveLength(1);
-      expect(productionRuns.runs[0].friendlyId).toBe("run_production");
-    }
-  );
-});
\ No newline at end of file
+});
diff --git a/apps/webapp/test/runsRepository.part2.test.ts b/apps/webapp/test/runsRepository.part2.test.ts
index 793e19236c0..55cba6854f6 100644
--- a/apps/webapp/test/runsRepository.part2.test.ts
+++ b/apps/webapp/test/runsRepository.part2.test.ts
@@ -6,15 +6,15 @@ vi.mock("~/db.server", () => ({
   $replica: {},
 }));
 
-import { containerTest } from "@internal/testcontainers";
+import { replicationContainerTest } from "@internal/testcontainers";
 import { setTimeout } from "node:timers/promises";
 import { RunsRepository } from "~/services/runsRepository/runsRepository.server";
 import { setupClickhouseReplication } from "./utils/replicationUtils";
 
 vi.setConfig({ testTimeout: 60_000 });
 
-describe("RunsRepository (part 2/2)", () => {
-  containerTest(
+describe("RunsRepository (part 2/4)", () => {
+  replicationContainerTest(
     "should filter runs by rootOnly flag",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       const { clickhouse } = await setupClickhouseReplication({
@@ -108,7 +108,7 @@ describe("RunsRepository (part 2/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should filter runs by batchId",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       const { clickhouse } = await setupClickhouseReplication({
@@ -238,7 +238,7 @@ describe("RunsRepository (part 2/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should filter runs by runFriendlyIds",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       const { clickhouse } = await setupClickhouseReplication({
@@ -346,7 +346,7 @@ describe("RunsRepository (part 2/2)", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "should filter runs by runIds",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       const { clickhouse } = await setupClickhouseReplication({
@@ -453,443 +453,4 @@ describe("RunsRepository (part 2/2)", () => {
       expect(runs.map((r) => r.id).sort()).toEqual([run1.id, run3.id].sort());
     }
   );
-
-  containerTest(
-    "should filter runs by date range (from/to)",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      const { clickhouse } = await setupClickhouseReplication({
-        prisma,
-        databaseUrl: postgresContainer.getConnectionUri(),
-        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
-        redisOptions,
-      });
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test",
-          slug: "test",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test",
-          slug: "test",
-          organizationId: organization.id,
-          externalRef: "test",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test",
-          pkApiKey: "test",
-          shortcode: "test",
-        },
-      });
-
-      const now = new Date();
-      const yesterday = new Date(now.getTime() - 24 * 60 * 60 * 1000);
-      const tomorrow = new Date(now.getTime() + 24 * 60 * 60 * 1000);
-
-      // Create runs with different creation dates
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_yesterday",
-          taskIdentifier: "my-task",
-          createdAt: yesterday,
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1234",
-          spanId: "1234",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_today",
-          taskIdentifier: "my-task",
-          createdAt: now,
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1235",
-          spanId: "1235",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_tomorrow",
-          taskIdentifier: "my-task",
-          createdAt: tomorrow,
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1236",
-          spanId: "1236",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await setTimeout(1000);
-
-      const runsRepository = new RunsRepository({
-        prisma,
-        clickhouse,
-      });
-
-      // Test filtering by date range (from yesterday to today)
-      const { runs } = await runsRepository.listRuns({
-        page: { size: 10 },
-        projectId: project.id,
-        environmentId: runtimeEnvironment.id,
-        organizationId: organization.id,
-        from: yesterday.getTime(),
-        to: now.getTime(),
-      });
-
-      expect(runs).toHaveLength(2);
-      expect(runs.map((r) => r.friendlyId).sort()).toEqual(["run_today", "run_yesterday"]);
-    }
-  );
-
-  containerTest(
-    "should handle multiple filters combined",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      const { clickhouse } = await setupClickhouseReplication({
-        prisma,
-        databaseUrl: postgresContainer.getConnectionUri(),
-        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
-        redisOptions,
-      });
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test",
-          slug: "test",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test",
-          slug: "test",
-          organizationId: organization.id,
-          externalRef: "test",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test",
-          pkApiKey: "test",
-          shortcode: "test",
-        },
-      });
-
-      // Create runs with different combinations of properties
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_match",
-          taskIdentifier: "task-1",
-          taskVersion: "1.0.0",
-          status: "COMPLETED_SUCCESSFULLY",
-          isTest: false,
-          runTags: ["urgent"],
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1234",
-          spanId: "1234",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_no_match_task",
-          taskIdentifier: "task-2", // Different task
-          taskVersion: "1.0.0",
-          status: "COMPLETED_SUCCESSFULLY",
-          isTest: false,
-          runTags: ["urgent"],
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1235",
-          spanId: "1235",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_no_match_status",
-          taskIdentifier: "task-1",
-          taskVersion: "1.0.0",
-          status: "PENDING", // Different status
-          isTest: false,
-          runTags: ["urgent"],
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1236",
-          spanId: "1236",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await setTimeout(1000);
-
-      const runsRepository = new RunsRepository({
-        prisma,
-        clickhouse,
-      });
-
-      // Test combining multiple filters
-      const { runs } = await runsRepository.listRuns({
-        page: { size: 10 },
-        projectId: project.id,
-        environmentId: runtimeEnvironment.id,
-        organizationId: organization.id,
-        tasks: ["task-1"],
-        versions: ["1.0.0"],
-        statuses: ["COMPLETED_SUCCESSFULLY"],
-        isTest: false,
-        tags: ["urgent"],
-      });
-
-      expect(runs).toHaveLength(1);
-      expect(runs[0].friendlyId).toBe("run_match");
-    }
-  );
-
-  containerTest(
-    "should handle pagination correctly",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      const { clickhouse } = await setupClickhouseReplication({
-        prisma,
-        databaseUrl: postgresContainer.getConnectionUri(),
-        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
-        redisOptions,
-      });
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test",
-          slug: "test",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test",
-          slug: "test",
-          organizationId: organization.id,
-          externalRef: "test",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test",
-          pkApiKey: "test",
-          shortcode: "test",
-        },
-      });
-
-      // Create multiple runs for pagination testing
-      const runs = [];
-      for (let i = 1; i <= 5; i++) {
-        const run = await prisma.taskRun.create({
-          data: {
-            friendlyId: `run_${i}`,
-            taskIdentifier: "my-task",
-            payload: JSON.stringify({ foo: "bar" }),
-            traceId: `123${i}`,
-            spanId: `123${i}`,
-            queue: "test",
-            runtimeEnvironmentId: runtimeEnvironment.id,
-            projectId: project.id,
-            organizationId: organization.id,
-            environmentType: "DEVELOPMENT",
-            engine: "V2",
-          },
-        });
-        runs.push(run);
-      }
-
-      await setTimeout(1000);
-
-      const runsRepository = new RunsRepository({
-        prisma,
-        clickhouse,
-      });
-
-      // Test first page
-      const firstPage = await runsRepository.listRuns({
-        page: { size: 2 },
-        projectId: project.id,
-        environmentId: runtimeEnvironment.id,
-        organizationId: organization.id,
-      });
-
-      expect(firstPage.runs).toHaveLength(2);
-      expect(firstPage.pagination.nextCursor).toBeTruthy();
-      expect(firstPage.pagination.previousCursor).toBe(null);
-
-      // Test next page using cursor
-      const secondPage = await runsRepository.listRuns({
-        page: {
-          size: 2,
-          cursor: firstPage.pagination.nextCursor!,
-          direction: "forward",
-        },
-        projectId: project.id,
-        environmentId: runtimeEnvironment.id,
-        organizationId: organization.id,
-      });
-
-      expect(secondPage.runs).toHaveLength(2);
-      expect(secondPage.pagination.nextCursor).toBeTruthy();
-      expect(secondPage.pagination.previousCursor).toBeTruthy();
-    }
-  );
-
-  containerTest(
-    "should count new runs with listRunIds",
-    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
-      const { clickhouse } = await setupClickhouseReplication({
-        prisma,
-        databaseUrl: postgresContainer.getConnectionUri(),
-        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
-        redisOptions,
-      });
-
-      const organization = await prisma.organization.create({
-        data: {
-          title: "test",
-          slug: "test",
-        },
-      });
-
-      const project = await prisma.project.create({
-        data: {
-          name: "test",
-          slug: "test",
-          organizationId: organization.id,
-          externalRef: "test",
-        },
-      });
-
-      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
-        data: {
-          slug: "test",
-          type: "DEVELOPMENT",
-          projectId: project.id,
-          organizationId: organization.id,
-          apiKey: "test",
-          pkApiKey: "test",
-          shortcode: "test",
-        },
-      });
-
-      const taskRun = await prisma.taskRun.create({
-        data: {
-          friendlyId: "run_has_new",
-          taskIdentifier: "my-task",
-          payload: JSON.stringify({ foo: "bar" }),
-          traceId: "1234",
-          spanId: "1234",
-          queue: "test",
-          runtimeEnvironmentId: runtimeEnvironment.id,
-          projectId: project.id,
-          organizationId: organization.id,
-          environmentType: "DEVELOPMENT",
-          engine: "V2",
-        },
-      });
-
-      await setTimeout(1000);
-
-      const runsRepository = new RunsRepository({
-        prisma,
-        clickhouse,
-      });
-
-      const baseOptions = {
-        projectId: project.id,
-        environmentId: runtimeEnvironment.id,
-        organizationId: organization.id,
-      };
-
-      const createdAtMs = taskRun.createdAt.getTime();
-
-      const newRunIdsBefore = await runsRepository.listRunIds({
-        ...baseOptions,
-        from: createdAtMs - 1,
-        page: { size: 100 },
-      });
-      expect(newRunIdsBefore.length).toBeGreaterThanOrEqual(1);
-
-      const newRunIdsAfter = await runsRepository.listRunIds({
-        ...baseOptions,
-        from: createdAtMs + 60_000,
-        page: { size: 100 },
-      });
-      expect(newRunIdsAfter).toHaveLength(0);
-
-      const fromBeforeRun = createdAtMs - 1;
-
-      const matchingTaskIds = await runsRepository.listRunIds({
-        ...baseOptions,
-        from: fromBeforeRun,
-        tasks: ["my-task"],
-        page: { size: 100 },
-      });
-      expect(matchingTaskIds.length).toBeGreaterThanOrEqual(1);
-
-      const otherTaskIds = await runsRepository.listRunIds({
-        ...baseOptions,
-        from: fromBeforeRun,
-        tasks: ["other-task"],
-        page: { size: 100 },
-      });
-      expect(otherTaskIds).toHaveLength(0);
-    }
-  );
 });
diff --git a/apps/webapp/test/runsRepository.part3.test.ts b/apps/webapp/test/runsRepository.part3.test.ts
new file mode 100644
index 00000000000..543ce47a018
--- /dev/null
+++ b/apps/webapp/test/runsRepository.part3.test.ts
@@ -0,0 +1,343 @@
+import { describe, expect, vi } from "vitest";
+
+// Mock the db prisma client
+vi.mock("~/db.server", () => ({
+  prisma: {},
+  $replica: {},
+}));
+
+import { replicationContainerTest } from "@internal/testcontainers";
+import { setTimeout } from "node:timers/promises";
+import { RunsRepository } from "~/services/runsRepository/runsRepository.server";
+import { setupClickhouseReplication } from "./utils/replicationUtils";
+
+vi.setConfig({ testTimeout: 60_000 });
+
+describe("RunsRepository (part 3/4)", () => {
+  replicationContainerTest(
+    "should filter runs by tags",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      const { clickhouse } = await setupClickhouseReplication({
+        prisma,
+        databaseUrl: postgresContainer.getConnectionUri(),
+        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
+        redisOptions,
+      });
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test",
+          slug: "test",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test",
+          slug: "test",
+          organizationId: organization.id,
+          externalRef: "test",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test",
+          pkApiKey: "test",
+          shortcode: "test",
+        },
+      });
+
+      // Create runs with different tags
+      const taskRun1 = await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_urgent",
+          taskIdentifier: "my-task",
+          runTags: ["urgent", "production"],
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1234",
+          spanId: "1234",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      const taskRun2 = await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_regular",
+          taskIdentifier: "my-task",
+          runTags: ["regular", "development"],
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1235",
+          spanId: "1235",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      const taskRun3 = await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_urgent_dev",
+          taskIdentifier: "my-task",
+          runTags: ["urgent", "development"],
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1236",
+          spanId: "1236",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await setTimeout(1000);
+
+      const runsRepository = new RunsRepository({
+        prisma,
+        clickhouse,
+      });
+
+      // Test filtering by tags
+      const { runs } = await runsRepository.listRuns({
+        page: { size: 10 },
+        projectId: project.id,
+        environmentId: runtimeEnvironment.id,
+        organizationId: organization.id,
+        tags: ["urgent"],
+      });
+
+      expect(runs).toHaveLength(2);
+      expect(runs.map((r) => r.friendlyId).sort()).toEqual(["run_urgent", "run_urgent_dev"]);
+    }
+  );
+
+  replicationContainerTest(
+    "should filter runs by scheduleId",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      const { clickhouse } = await setupClickhouseReplication({
+        prisma,
+        databaseUrl: postgresContainer.getConnectionUri(),
+        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
+        redisOptions,
+      });
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test",
+          slug: "test",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test",
+          slug: "test",
+          organizationId: organization.id,
+          externalRef: "test",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test",
+          pkApiKey: "test",
+          shortcode: "test",
+        },
+      });
+
+      // Create runs with different schedule IDs
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_scheduled_1",
+          taskIdentifier: "my-task",
+          scheduleId: "schedule_1",
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1234",
+          spanId: "1234",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_scheduled_2",
+          taskIdentifier: "my-task",
+          scheduleId: "schedule_2",
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1235",
+          spanId: "1235",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_unscheduled",
+          taskIdentifier: "my-task",
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1236",
+          spanId: "1236",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await setTimeout(1000);
+
+      const runsRepository = new RunsRepository({
+        prisma,
+        clickhouse,
+      });
+
+      // Test filtering by schedule ID
+      const { runs } = await runsRepository.listRuns({
+        page: { size: 10 },
+        projectId: project.id,
+        environmentId: runtimeEnvironment.id,
+        organizationId: organization.id,
+        scheduleId: "schedule_1",
+      });
+
+      expect(runs).toHaveLength(1);
+      expect(runs[0].friendlyId).toBe("run_scheduled_1");
+    }
+  );
+
+  replicationContainerTest(
+    "should filter runs by isTest flag",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      const { clickhouse } = await setupClickhouseReplication({
+        prisma,
+        databaseUrl: postgresContainer.getConnectionUri(),
+        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
+        redisOptions,
+      });
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test",
+          slug: "test",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test",
+          slug: "test",
+          organizationId: organization.id,
+          externalRef: "test",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test",
+          pkApiKey: "test",
+          shortcode: "test",
+        },
+      });
+
+      // Create test and non-test runs
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_test",
+          taskIdentifier: "my-task",
+          isTest: true,
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1234",
+          spanId: "1234",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_production",
+          taskIdentifier: "my-task",
+          isTest: false,
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1235",
+          spanId: "1235",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await setTimeout(1000);
+
+      const runsRepository = new RunsRepository({
+        prisma,
+        clickhouse,
+      });
+
+      // Test filtering by isTest=true
+      const testRuns = await runsRepository.listRuns({
+        page: { size: 10 },
+        projectId: project.id,
+        environmentId: runtimeEnvironment.id,
+        organizationId: organization.id,
+        isTest: true,
+      });
+
+      expect(testRuns.runs).toHaveLength(1);
+      expect(testRuns.runs[0].friendlyId).toBe("run_test");
+
+      // Test filtering by isTest=false
+      const productionRuns = await runsRepository.listRuns({
+        page: { size: 10 },
+        projectId: project.id,
+        environmentId: runtimeEnvironment.id,
+        organizationId: organization.id,
+        isTest: false,
+      });
+
+      expect(productionRuns.runs).toHaveLength(1);
+      expect(productionRuns.runs[0].friendlyId).toBe("run_production");
+    }
+  );
+});
diff --git a/apps/webapp/test/runsRepository.part4.test.ts b/apps/webapp/test/runsRepository.part4.test.ts
new file mode 100644
index 00000000000..b79e41397ee
--- /dev/null
+++ b/apps/webapp/test/runsRepository.part4.test.ts
@@ -0,0 +1,455 @@
+import { describe, expect, vi } from "vitest";
+
+// Mock the db prisma client
+vi.mock("~/db.server", () => ({
+  prisma: {},
+  $replica: {},
+}));
+
+import { replicationContainerTest } from "@internal/testcontainers";
+import { setTimeout } from "node:timers/promises";
+import { RunsRepository } from "~/services/runsRepository/runsRepository.server";
+import { setupClickhouseReplication } from "./utils/replicationUtils";
+
+vi.setConfig({ testTimeout: 60_000 });
+
+describe("RunsRepository (part 4/4)", () => {
+  replicationContainerTest(
+    "should filter runs by date range (from/to)",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      const { clickhouse } = await setupClickhouseReplication({
+        prisma,
+        databaseUrl: postgresContainer.getConnectionUri(),
+        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
+        redisOptions,
+      });
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test",
+          slug: "test",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test",
+          slug: "test",
+          organizationId: organization.id,
+          externalRef: "test",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test",
+          pkApiKey: "test",
+          shortcode: "test",
+        },
+      });
+
+      const now = new Date();
+      const yesterday = new Date(now.getTime() - 24 * 60 * 60 * 1000);
+      const tomorrow = new Date(now.getTime() + 24 * 60 * 60 * 1000);
+
+      // Create runs with different creation dates
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_yesterday",
+          taskIdentifier: "my-task",
+          createdAt: yesterday,
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1234",
+          spanId: "1234",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_today",
+          taskIdentifier: "my-task",
+          createdAt: now,
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1235",
+          spanId: "1235",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_tomorrow",
+          taskIdentifier: "my-task",
+          createdAt: tomorrow,
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1236",
+          spanId: "1236",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await setTimeout(1000);
+
+      const runsRepository = new RunsRepository({
+        prisma,
+        clickhouse,
+      });
+
+      // Test filtering by date range (from yesterday to today)
+      const { runs } = await runsRepository.listRuns({
+        page: { size: 10 },
+        projectId: project.id,
+        environmentId: runtimeEnvironment.id,
+        organizationId: organization.id,
+        from: yesterday.getTime(),
+        to: now.getTime(),
+      });
+
+      expect(runs).toHaveLength(2);
+      expect(runs.map((r) => r.friendlyId).sort()).toEqual(["run_today", "run_yesterday"]);
+    }
+  );
+
+  replicationContainerTest(
+    "should handle multiple filters combined",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      const { clickhouse } = await setupClickhouseReplication({
+        prisma,
+        databaseUrl: postgresContainer.getConnectionUri(),
+        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
+        redisOptions,
+      });
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test",
+          slug: "test",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test",
+          slug: "test",
+          organizationId: organization.id,
+          externalRef: "test",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test",
+          pkApiKey: "test",
+          shortcode: "test",
+        },
+      });
+
+      // Create runs with different combinations of properties
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_match",
+          taskIdentifier: "task-1",
+          taskVersion: "1.0.0",
+          status: "COMPLETED_SUCCESSFULLY",
+          isTest: false,
+          runTags: ["urgent"],
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1234",
+          spanId: "1234",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_no_match_task",
+          taskIdentifier: "task-2", // Different task
+          taskVersion: "1.0.0",
+          status: "COMPLETED_SUCCESSFULLY",
+          isTest: false,
+          runTags: ["urgent"],
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1235",
+          spanId: "1235",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_no_match_status",
+          taskIdentifier: "task-1",
+          taskVersion: "1.0.0",
+          status: "PENDING", // Different status
+          isTest: false,
+          runTags: ["urgent"],
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1236",
+          spanId: "1236",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await setTimeout(1000);
+
+      const runsRepository = new RunsRepository({
+        prisma,
+        clickhouse,
+      });
+
+      // Test combining multiple filters
+      const { runs } = await runsRepository.listRuns({
+        page: { size: 10 },
+        projectId: project.id,
+        environmentId: runtimeEnvironment.id,
+        organizationId: organization.id,
+        tasks: ["task-1"],
+        versions: ["1.0.0"],
+        statuses: ["COMPLETED_SUCCESSFULLY"],
+        isTest: false,
+        tags: ["urgent"],
+      });
+
+      expect(runs).toHaveLength(1);
+      expect(runs[0].friendlyId).toBe("run_match");
+    }
+  );
+
+  replicationContainerTest(
+    "should handle pagination correctly",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      const { clickhouse } = await setupClickhouseReplication({
+        prisma,
+        databaseUrl: postgresContainer.getConnectionUri(),
+        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
+        redisOptions,
+      });
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test",
+          slug: "test",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test",
+          slug: "test",
+          organizationId: organization.id,
+          externalRef: "test",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test",
+          pkApiKey: "test",
+          shortcode: "test",
+        },
+      });
+
+      // Create multiple runs for pagination testing
+      const runs = [];
+      for (let i = 1; i <= 5; i++) {
+        const run = await prisma.taskRun.create({
+          data: {
+            friendlyId: `run_${i}`,
+            taskIdentifier: "my-task",
+            payload: JSON.stringify({ foo: "bar" }),
+            traceId: `123${i}`,
+            spanId: `123${i}`,
+            queue: "test",
+            runtimeEnvironmentId: runtimeEnvironment.id,
+            projectId: project.id,
+            organizationId: organization.id,
+            environmentType: "DEVELOPMENT",
+            engine: "V2",
+          },
+        });
+        runs.push(run);
+      }
+
+      await setTimeout(1000);
+
+      const runsRepository = new RunsRepository({
+        prisma,
+        clickhouse,
+      });
+
+      // Test first page
+      const firstPage = await runsRepository.listRuns({
+        page: { size: 2 },
+        projectId: project.id,
+        environmentId: runtimeEnvironment.id,
+        organizationId: organization.id,
+      });
+
+      expect(firstPage.runs).toHaveLength(2);
+      expect(firstPage.pagination.nextCursor).toBeTruthy();
+      expect(firstPage.pagination.previousCursor).toBe(null);
+
+      // Test next page using cursor
+      const secondPage = await runsRepository.listRuns({
+        page: {
+          size: 2,
+          cursor: firstPage.pagination.nextCursor!,
+          direction: "forward",
+        },
+        projectId: project.id,
+        environmentId: runtimeEnvironment.id,
+        organizationId: organization.id,
+      });
+
+      expect(secondPage.runs).toHaveLength(2);
+      expect(secondPage.pagination.nextCursor).toBeTruthy();
+      expect(secondPage.pagination.previousCursor).toBeTruthy();
+    }
+  );
+
+  replicationContainerTest(
+    "should count new runs with listRunIds",
+    async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
+      const { clickhouse } = await setupClickhouseReplication({
+        prisma,
+        databaseUrl: postgresContainer.getConnectionUri(),
+        clickhouseUrl: clickhouseContainer.getConnectionUrl(),
+        redisOptions,
+      });
+
+      const organization = await prisma.organization.create({
+        data: {
+          title: "test",
+          slug: "test",
+        },
+      });
+
+      const project = await prisma.project.create({
+        data: {
+          name: "test",
+          slug: "test",
+          organizationId: organization.id,
+          externalRef: "test",
+        },
+      });
+
+      const runtimeEnvironment = await prisma.runtimeEnvironment.create({
+        data: {
+          slug: "test",
+          type: "DEVELOPMENT",
+          projectId: project.id,
+          organizationId: organization.id,
+          apiKey: "test",
+          pkApiKey: "test",
+          shortcode: "test",
+        },
+      });
+
+      const taskRun = await prisma.taskRun.create({
+        data: {
+          friendlyId: "run_has_new",
+          taskIdentifier: "my-task",
+          payload: JSON.stringify({ foo: "bar" }),
+          traceId: "1234",
+          spanId: "1234",
+          queue: "test",
+          runtimeEnvironmentId: runtimeEnvironment.id,
+          projectId: project.id,
+          organizationId: organization.id,
+          environmentType: "DEVELOPMENT",
+          engine: "V2",
+        },
+      });
+
+      await setTimeout(1000);
+
+      const runsRepository = new RunsRepository({
+        prisma,
+        clickhouse,
+      });
+
+      const baseOptions = {
+        projectId: project.id,
+        environmentId: runtimeEnvironment.id,
+        organizationId: organization.id,
+      };
+
+      const createdAtMs = taskRun.createdAt.getTime();
+
+      const newRunIdsBefore = await runsRepository.listRunIds({
+        ...baseOptions,
+        from: createdAtMs - 1,
+        page: { size: 100 },
+      });
+      expect(newRunIdsBefore.length).toBeGreaterThanOrEqual(1);
+
+      const newRunIdsAfter = await runsRepository.listRunIds({
+        ...baseOptions,
+        from: createdAtMs + 60_000,
+        page: { size: 100 },
+      });
+      expect(newRunIdsAfter).toHaveLength(0);
+
+      const fromBeforeRun = createdAtMs - 1;
+
+      const matchingTaskIds = await runsRepository.listRunIds({
+        ...baseOptions,
+        from: fromBeforeRun,
+        tasks: ["my-task"],
+        page: { size: 100 },
+      });
+      expect(matchingTaskIds.length).toBeGreaterThanOrEqual(1);
+
+      const otherTaskIds = await runsRepository.listRunIds({
+        ...baseOptions,
+        from: fromBeforeRun,
+        tasks: ["other-task"],
+        page: { size: 100 },
+      });
+      expect(otherTaskIds).toHaveLength(0);
+    }
+  );
+});
diff --git a/apps/webapp/test/sessionsReplicationService.test.ts b/apps/webapp/test/sessionsReplicationService.test.ts
index 8b5dfe22fe1..1d3c761e813 100644
--- a/apps/webapp/test/sessionsReplicationService.test.ts
+++ b/apps/webapp/test/sessionsReplicationService.test.ts
@@ -1,5 +1,5 @@
 import { ClickHouse } from "@internal/clickhouse";
-import { containerTest } from "@internal/testcontainers";
+import { replicationContainerTest } from "@internal/testcontainers";
 import { setTimeout } from "node:timers/promises";
 import { z } from "zod";
 import { SessionsReplicationService } from "~/services/sessionsReplicationService.server";
@@ -8,7 +8,7 @@ import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickho
 vi.setConfig({ testTimeout: 60_000 });
 
 describe("SessionsReplicationService", () => {
-  containerTest(
+  replicationContainerTest(
     "replicates an insert from Postgres Session → ClickHouse sessions_v1",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       // Logical replication needs full-row images for DELETE events.
@@ -116,7 +116,7 @@ describe("SessionsReplicationService", () => {
     }
   );
 
-  containerTest(
+  replicationContainerTest(
     "replicates an update (close) from Postgres → ClickHouse",
     async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => {
       await prisma.$executeRawUnsafe(`ALTER TABLE public."Session" REPLICA IDENTITY FULL;`);
diff --git a/apps/webapp/vitest.config.ts b/apps/webapp/vitest.config.ts
index 6a6b550fc64..69eb980732f 100644
--- a/apps/webapp/vitest.config.ts
+++ b/apps/webapp/vitest.config.ts
@@ -1,8 +1,10 @@
 import { defineConfig } from "vitest/config";
+import { DurationShardingSequencer } from "@internal/testcontainers/sequencer";
 import tsconfigPaths from "vite-tsconfig-paths";
 
 export default defineConfig({
   test: {
+    sequence: { sequencer: DurationShardingSequencer },
     include: ["test/**/*.test.ts"],
     // *.e2e.test.ts: smoke matrix, run via vitest.e2e.config.ts.
     // *.e2e.full.test.ts: full auth suite, runs via vitest.e2e.full.config.ts
diff --git a/internal-packages/clickhouse/vitest.config.ts b/internal-packages/clickhouse/vitest.config.ts
index f3687eb4098..26c9ecebf11 100644
--- a/internal-packages/clickhouse/vitest.config.ts
+++ b/internal-packages/clickhouse/vitest.config.ts
@@ -1,7 +1,9 @@
 import { defineConfig } from "vitest/config";
+import { DurationShardingSequencer } from "@internal/testcontainers/sequencer";
 
 export default defineConfig({
   test: {
+    sequence: { sequencer: DurationShardingSequencer },
     include: ["**/*.test.ts"],
     globals: true,
     isolate: true,
diff --git a/internal-packages/llm-model-catalog/vitest.config.ts b/internal-packages/llm-model-catalog/vitest.config.ts
index 9ba46467cad..88831ee2ae5 100644
--- a/internal-packages/llm-model-catalog/vitest.config.ts
+++ b/internal-packages/llm-model-catalog/vitest.config.ts
@@ -1,7 +1,9 @@
 import { defineConfig } from "vitest/config";
+import { DurationShardingSequencer } from "@internal/testcontainers/sequencer";
 
 export default defineConfig({
   test: {
+    sequence: { sequencer: DurationShardingSequencer },
     include: ["**/*.test.ts"],
     globals: true,
     isolate: true,
diff --git a/internal-packages/replication/vitest.config.ts b/internal-packages/replication/vitest.config.ts
index f3687eb4098..26c9ecebf11 100644
--- a/internal-packages/replication/vitest.config.ts
+++ b/internal-packages/replication/vitest.config.ts
@@ -1,7 +1,9 @@
 import { defineConfig } from "vitest/config";
+import { DurationShardingSequencer } from "@internal/testcontainers/sequencer";
 
 export default defineConfig({
   test: {
+    sequence: { sequencer: DurationShardingSequencer },
     include: ["**/*.test.ts"],
     globals: true,
     isolate: true,
diff --git a/internal-packages/run-engine/src/engine/tests/batchTriggerAndWait.test.ts b/internal-packages/run-engine/src/engine/tests/batchTriggerAndWait.test.ts
index 3fe9d3348a0..a632c707390 100644
--- a/internal-packages/run-engine/src/engine/tests/batchTriggerAndWait.test.ts
+++ b/internal-packages/run-engine/src/engine/tests/batchTriggerAndWait.test.ts
@@ -1,4 +1,7 @@
-import { assertNonNullable, containerTest } from "@internal/testcontainers";
+import {
+  assertNonNullable,
+  containerTestWithIsolatedRedis as containerTest,
+} from "@internal/testcontainers";
 import { trace } from "@internal/tracing";
 import { expect, describe } from "vitest";
 import { RunEngine } from "../index.js";
diff --git a/internal-packages/run-engine/src/engine/tests/batchTwoPhase.test.ts b/internal-packages/run-engine/src/engine/tests/batchTwoPhase.test.ts
index 6208560a56a..8471c07844b 100644
--- a/internal-packages/run-engine/src/engine/tests/batchTwoPhase.test.ts
+++ b/internal-packages/run-engine/src/engine/tests/batchTwoPhase.test.ts
@@ -1,4 +1,7 @@
-import { assertNonNullable, containerTest } from "@internal/testcontainers";
+import {
+  assertNonNullable,
+  containerTestWithIsolatedRedis as containerTest,
+} from "@internal/testcontainers";
 import { trace } from "@internal/tracing";
 import { expect, describe, vi } from "vitest";
 import { RunEngine } from "../index.js";
diff --git a/internal-packages/run-engine/src/engine/tests/ttl.test.ts b/internal-packages/run-engine/src/engine/tests/ttl.test.ts
index e787d916f8a..13d4c55b669 100644
--- a/internal-packages/run-engine/src/engine/tests/ttl.test.ts
+++ b/internal-packages/run-engine/src/engine/tests/ttl.test.ts
@@ -142,177 +142,177 @@ describe("RunEngine ttl", () => {
     }
   });
 
-  containerTest("First enqueue from trigger includes ttlExpiresAt in message", async ({
-    prisma,
-    redisOptions,
-  }) => {
-    const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+  containerTest(
+    "First enqueue from trigger includes ttlExpiresAt in message",
+    async ({ prisma, redisOptions }) => {
+      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
 
-    const engine = new RunEngine({
-      prisma,
-      worker: {
-        redis: redisOptions,
-        workers: 1,
-        tasksPerWorker: 10,
-        pollIntervalMs: 100,
-      },
-      queue: {
-        redis: redisOptions,
-        processWorkerQueueDebounceMs: 50,
-        masterQueueConsumersDisabled: true,
-        ttlSystem: {
+      const engine = new RunEngine({
+        prisma,
+        worker: {
+          redis: redisOptions,
+          workers: 1,
+          tasksPerWorker: 10,
           pollIntervalMs: 100,
-          batchSize: 10,
-          batchMaxWaitMs: 100,
         },
-      },
-      runLock: {
-        redis: redisOptions,
-      },
-      machines: {
-        defaultMachine: "small-1x",
+        queue: {
+          redis: redisOptions,
+          processWorkerQueueDebounceMs: 50,
+          masterQueueConsumersDisabled: true,
+          ttlSystem: {
+            pollIntervalMs: 100,
+            batchSize: 10,
+            batchMaxWaitMs: 100,
+          },
+        },
+        runLock: {
+          redis: redisOptions,
+        },
         machines: {
-          "small-1x": {
-            name: "small-1x" as const,
-            cpu: 0.5,
-            memory: 0.5,
-            centsPerMs: 0.0001,
+          defaultMachine: "small-1x",
+          machines: {
+            "small-1x": {
+              name: "small-1x" as const,
+              cpu: 0.5,
+              memory: 0.5,
+              centsPerMs: 0.0001,
+            },
           },
+          baseCostInCents: 0.0001,
         },
-        baseCostInCents: 0.0001,
-      },
-      tracer: trace.getTracer("test", "0.0.0"),
-    });
+        tracer: trace.getTracer("test", "0.0.0"),
+      });
 
-    try {
-      const taskIdentifier = "test-task";
-      await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
+      try {
+        const taskIdentifier = "test-task";
+        await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
 
-      const run = await engine.trigger(
-        {
-          number: 1,
-          friendlyId: "run_ttlmsg1",
-          environment: authenticatedEnvironment,
-          taskIdentifier,
-          payload: "{}",
-          payloadType: "application/json",
-          context: {},
-          traceContext: {},
-          traceId: "t_ttl",
-          spanId: "s_ttl",
-          workerQueue: "main",
-          queue: "task/test-task",
-          isTest: false,
-          tags: [],
-          ttl: "1s",
-        },
-        prisma
-      );
+        const run = await engine.trigger(
+          {
+            number: 1,
+            friendlyId: "run_ttlmsg1",
+            environment: authenticatedEnvironment,
+            taskIdentifier,
+            payload: "{}",
+            payloadType: "application/json",
+            context: {},
+            traceContext: {},
+            traceId: "t_ttl",
+            spanId: "s_ttl",
+            workerQueue: "main",
+            queue: "task/test-task",
+            isTest: false,
+            tags: [],
+            ttl: "1s",
+          },
+          prisma
+        );
 
-      const message = await engine.runQueue.readMessage(
-        authenticatedEnvironment.organization.id,
-        run.id
-      );
-      assertNonNullable(message);
-      expect(message.ttlExpiresAt).toBeDefined();
-      expect(typeof message.ttlExpiresAt).toBe("number");
-    } finally {
-      await engine.quit();
+        const message = await engine.runQueue.readMessage(
+          authenticatedEnvironment.organization.id,
+          run.id
+        );
+        assertNonNullable(message);
+        expect(message.ttlExpiresAt).toBeDefined();
+        expect(typeof message.ttlExpiresAt).toBe("number");
+      } finally {
+        await engine.quit();
+      }
     }
-  });
+  );
 
-  containerTest("Re-enqueue with includeTtl false does not set ttlExpiresAt", async ({
-    prisma,
-    redisOptions,
-  }) => {
-    const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+  containerTest(
+    "Re-enqueue with includeTtl false does not set ttlExpiresAt",
+    async ({ prisma, redisOptions }) => {
+      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
 
-    const engine = new RunEngine({
-      prisma,
-      worker: {
-        redis: redisOptions,
-        workers: 1,
-        tasksPerWorker: 10,
-        pollIntervalMs: 100,
-      },
-      queue: {
-        redis: redisOptions,
-        processWorkerQueueDebounceMs: 50,
-        masterQueueConsumersDisabled: true,
-        ttlSystem: {
+      const engine = new RunEngine({
+        prisma,
+        worker: {
+          redis: redisOptions,
+          workers: 1,
+          tasksPerWorker: 10,
           pollIntervalMs: 100,
-          batchSize: 10,
-          batchMaxWaitMs: 100,
         },
-      },
-      runLock: {
-        redis: redisOptions,
-      },
-      machines: {
-        defaultMachine: "small-1x",
+        queue: {
+          redis: redisOptions,
+          processWorkerQueueDebounceMs: 50,
+          masterQueueConsumersDisabled: true,
+          ttlSystem: {
+            pollIntervalMs: 100,
+            batchSize: 10,
+            batchMaxWaitMs: 100,
+          },
+        },
+        runLock: {
+          redis: redisOptions,
+        },
         machines: {
-          "small-1x": {
-            name: "small-1x" as const,
-            cpu: 0.5,
-            memory: 0.5,
-            centsPerMs: 0.0001,
+          defaultMachine: "small-1x",
+          machines: {
+            "small-1x": {
+              name: "small-1x" as const,
+              cpu: 0.5,
+              memory: 0.5,
+              centsPerMs: 0.0001,
+            },
           },
+          baseCostInCents: 0.0001,
         },
-        baseCostInCents: 0.0001,
-      },
-      tracer: trace.getTracer("test", "0.0.0"),
-    });
+        tracer: trace.getTracer("test", "0.0.0"),
+      });
 
-    try {
-      const taskIdentifier = "test-task";
-      await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
+      try {
+        const taskIdentifier = "test-task";
+        await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
 
-      const run = await engine.trigger(
-        {
-          number: 1,
-          friendlyId: "run_reenq01",
-          environment: authenticatedEnvironment,
-          taskIdentifier,
-          payload: "{}",
-          payloadType: "application/json",
-          context: {},
-          traceContext: {},
-          traceId: "t_re",
-          spanId: "s_re",
-          workerQueue: "main",
-          queue: "task/test-task",
-          isTest: false,
-          tags: [],
-          ttl: "1s",
-        },
-        prisma
-      );
+        const run = await engine.trigger(
+          {
+            number: 1,
+            friendlyId: "run_reenq01",
+            environment: authenticatedEnvironment,
+            taskIdentifier,
+            payload: "{}",
+            payloadType: "application/json",
+            context: {},
+            traceContext: {},
+            traceId: "t_re",
+            spanId: "s_re",
+            workerQueue: "main",
+            queue: "task/test-task",
+            isTest: false,
+            tags: [],
+            ttl: "1s",
+          },
+          prisma
+        );
 
-      const messageAfterTrigger = await engine.runQueue.readMessage(
-        authenticatedEnvironment.organization.id,
-        run.id
-      );
-      assertNonNullable(messageAfterTrigger);
-      expect(messageAfterTrigger.ttlExpiresAt).toBeDefined();
-
-      await engine.enqueueSystem.enqueueRun({
-        run,
-        env: authenticatedEnvironment,
-        tx: prisma,
-        skipRunLock: true,
-        includeTtl: false,
-      });
+        const messageAfterTrigger = await engine.runQueue.readMessage(
+          authenticatedEnvironment.organization.id,
+          run.id
+        );
+        assertNonNullable(messageAfterTrigger);
+        expect(messageAfterTrigger.ttlExpiresAt).toBeDefined();
 
-      const messageAfterReenqueue = await engine.runQueue.readMessage(
-        authenticatedEnvironment.organization.id,
-        run.id
-      );
-      assertNonNullable(messageAfterReenqueue);
-      expect(messageAfterReenqueue.ttlExpiresAt).toBeUndefined();
-    } finally {
-      await engine.quit();
+        await engine.enqueueSystem.enqueueRun({
+          run,
+          env: authenticatedEnvironment,
+          tx: prisma,
+          skipRunLock: true,
+          includeTtl: false,
+        });
+
+        const messageAfterReenqueue = await engine.runQueue.readMessage(
+          authenticatedEnvironment.organization.id,
+          run.id
+        );
+        assertNonNullable(messageAfterReenqueue);
+        expect(messageAfterReenqueue.ttlExpiresAt).toBeUndefined();
+      } finally {
+        await engine.quit();
+      }
     }
-  });
+  );
 
   containerTest(
     "Re-enqueued runs are not expired by TTL once they have started",
@@ -392,10 +392,7 @@ describe("RunEngine ttl", () => {
 
         // Dequeue the run — this simulates the run starting to execute, which
         // ZREMs its TTL set entry.
-        await engine.runQueue.processMasterQueueForEnvironment(
-          authenticatedEnvironment.id,
-          10
-        );
+        await engine.runQueue.processMasterQueueForEnvironment(authenticatedEnvironment.id, 10);
         const dequeued = await engine.dequeueFromWorkerQueue({
           consumerId: "test-consumer",
           workerQueue: "main",
@@ -854,11 +851,9 @@ describe("RunEngine ttl", () => {
           consumerId: "test-consumer",
           workerQueue: "main",
           maxRunCount: 1,
-          backgroundWorkerId: (
-            await prisma.backgroundWorker.findFirst({
-              where: { runtimeEnvironmentId: authenticatedEnvironment.id },
-            })
-          )!.id,
+          backgroundWorkerId: (await prisma.backgroundWorker.findFirst({
+            where: { runtimeEnvironmentId: authenticatedEnvironment.id },
+          }))!.id,
         });
 
         expect(dequeued.length).toBe(0);
@@ -957,10 +952,7 @@ describe("RunEngine ttl", () => {
         // Manually process the master queue - the dequeue Lua script should
         // encounter the expired message and skip it (removing from queue sorted
         // sets but leaving messageKey and ttlQueueKey for TTL consumer)
-        await engine.runQueue.processMasterQueueForEnvironment(
-          authenticatedEnvironment.id,
-          10
-        );
+        await engine.runQueue.processMasterQueueForEnvironment(authenticatedEnvironment.id, 10);
 
         // Try to dequeue from worker queue - nothing should be there since
         // the expired message was skipped by the Lua script
@@ -976,12 +968,13 @@ describe("RunEngine ttl", () => {
         assertNonNullable(executionData2);
         expect(executionData2.run.status).toBe("PENDING");
 
-        // Now wait for the TTL consumer to poll and expire the run
-        // (pollIntervalMs is 5000 for TTL scan + up to 5000ms batch maxWaitMs + processing)
-        await setTimeout(13_000);
-
-        // The TTL consumer should have found and expired the run
-        expect(expiredEvents.length).toBe(1);
+        // Wait (event-driven) for the TTL consumer to poll and expire the run. pollIntervalMs is
+        // 5000ms here so the consumer fires only after the dequeue-skip assertions above; waitFor
+        // resolves as soon as the event lands instead of a fixed worst-case sleep.
+        await vi.waitFor(() => expect(expiredEvents.length).toBe(1), {
+          timeout: 15_000,
+          interval: 100,
+        });
         expect(expiredEvents[0]?.run.id).toBe(run.id);
 
         // Check the run status directly from the database (the batch TTL path
@@ -1006,8 +999,7 @@ describe("RunEngine ttl", () => {
   containerTest(
     "TTL expiration clears env concurrency keys with proj segment",
     async ({ prisma, redisOptions }) => {
-      const authenticatedEnvironment =
-        await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
 
       const engine = new RunEngine({
         prisma,
@@ -1071,14 +1063,9 @@ describe("RunEngine ttl", () => {
           prisma
         );
 
-        const queue = engine.runQueue.keys.queueKey(
-          authenticatedEnvironment,
-          "task/test-task"
-        );
-        const envConcurrencyKey =
-          engine.runQueue.keys.envCurrentConcurrencyKeyFromQueue(queue);
-        const envDequeuedKey =
-          engine.runQueue.keys.envCurrentDequeuedKeyFromQueue(queue);
+        const queue = engine.runQueue.keys.queueKey(authenticatedEnvironment, "task/test-task");
+        const envConcurrencyKey = engine.runQueue.keys.envCurrentConcurrencyKeyFromQueue(queue);
+        const envDequeuedKey = engine.runQueue.keys.envCurrentDequeuedKeyFromQueue(queue);
 
         await engine.runQueue.redis.sadd(envConcurrencyKey, run.id);
         await engine.runQueue.redis.sadd(envDequeuedKey, run.id);
@@ -1089,28 +1076,26 @@ describe("RunEngine ttl", () => {
         expect(concurrencyBefore).toContain(run.id);
 
         await setTimeout(1_500);
-        await engine.runQueue.processMasterQueueForEnvironment(
-          authenticatedEnvironment.id,
-          10
+        await engine.runQueue.processMasterQueueForEnvironment(authenticatedEnvironment.id, 10);
+        // Wait (event-driven) for the TTL consumer to expire the run; resolves as soon as the DB
+        // reflects EXPIRED instead of a fixed worst-case sleep (pollIntervalMs is 5000ms here).
+        await vi.waitFor(
+          async () => {
+            const expiredRun = await prisma.taskRun.findUnique({
+              where: { id: run.id },
+              select: { status: true },
+            });
+            expect(expiredRun?.status).toBe("EXPIRED");
+          },
+          { timeout: 15_000, interval: 200 }
         );
-        // Wait for TTL scan (5000ms) + batch maxWaitMs (5000ms) + processing buffer
-        await setTimeout(13_000);
-
-        const expiredRun = await prisma.taskRun.findUnique({
-          where: { id: run.id },
-          select: { status: true },
-        });
-        expect(expiredRun?.status).toBe("EXPIRED");
 
         const concurrencyAfter = await engine.runQueue.getCurrentConcurrencyOfEnvironment(
           authenticatedEnvironment
         );
         expect(concurrencyAfter).not.toContain(run.id);
 
-        const stillInDequeued = await engine.runQueue.redis.sismember(
-          envDequeuedKey,
-          run.id
-        );
+        const stillInDequeued = await engine.runQueue.redis.sismember(envDequeuedKey, run.id);
         expect(stillInDequeued).toBe(0);
       } finally {
         await engine.quit();
@@ -1215,10 +1200,7 @@ describe("RunEngine ttl", () => {
 
         // Manually process the master queue - the Lua script should skip the
         // expired message and dequeue only the non-expired one to the worker queue
-        await engine.runQueue.processMasterQueueForEnvironment(
-          authenticatedEnvironment.id,
-          10
-        );
+        await engine.runQueue.processMasterQueueForEnvironment(authenticatedEnvironment.id, 10);
 
         // Dequeue from worker queue - only the non-expired run should be there
         const dequeued = await engine.dequeueFromWorkerQueue({
@@ -1238,95 +1220,92 @@ describe("RunEngine ttl", () => {
     }
   );
 
-  containerTest(
-    "expireRunsBatch skips runs that are locked",
-    async ({ prisma, redisOptions }) => {
-      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+  containerTest("expireRunsBatch skips runs that are locked", async ({ prisma, redisOptions }) => {
+    const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
 
-      const engine = new RunEngine({
-        prisma,
-        worker: {
-          redis: redisOptions,
-          workers: 1,
-          tasksPerWorker: 10,
-          pollIntervalMs: 100,
-        },
-        queue: {
-          redis: redisOptions,
-          processWorkerQueueDebounceMs: 50,
-          masterQueueConsumersDisabled: true,
-          ttlSystem: {
-            disabled: true, // We'll manually test the batch function
-          },
-        },
-        runLock: {
-          redis: redisOptions,
+    const engine = new RunEngine({
+      prisma,
+      worker: {
+        redis: redisOptions,
+        workers: 1,
+        tasksPerWorker: 10,
+        pollIntervalMs: 100,
+      },
+      queue: {
+        redis: redisOptions,
+        processWorkerQueueDebounceMs: 50,
+        masterQueueConsumersDisabled: true,
+        ttlSystem: {
+          disabled: true, // We'll manually test the batch function
         },
+      },
+      runLock: {
+        redis: redisOptions,
+      },
+      machines: {
+        defaultMachine: "small-1x",
         machines: {
-          defaultMachine: "small-1x",
-          machines: {
-            "small-1x": {
-              name: "small-1x" as const,
-              cpu: 0.5,
-              memory: 0.5,
-              centsPerMs: 0.0001,
-            },
+          "small-1x": {
+            name: "small-1x" as const,
+            cpu: 0.5,
+            memory: 0.5,
+            centsPerMs: 0.0001,
           },
-          baseCostInCents: 0.0001,
         },
-        tracer: trace.getTracer("test", "0.0.0"),
-      });
+        baseCostInCents: 0.0001,
+      },
+      tracer: trace.getTracer("test", "0.0.0"),
+    });
 
-      try {
-        const taskIdentifier = "test-task";
+    try {
+      const taskIdentifier = "test-task";
 
-        await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
+      await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier);
 
-        // Trigger a run with TTL
-        const run = await engine.trigger(
-          {
-            number: 1,
-            friendlyId: "run_l1234",
-            environment: authenticatedEnvironment,
-            taskIdentifier,
-            payload: "{}",
-            payloadType: "application/json",
-            context: {},
-            traceContext: {},
-            traceId: "t1",
-            spanId: "s1",
-            workerQueue: "main",
-            queue: "task/test-task",
-            isTest: false,
-            tags: [],
-            ttl: "1s",
-          },
-          prisma
-        );
+      // Trigger a run with TTL
+      const run = await engine.trigger(
+        {
+          number: 1,
+          friendlyId: "run_l1234",
+          environment: authenticatedEnvironment,
+          taskIdentifier,
+          payload: "{}",
+          payloadType: "application/json",
+          context: {},
+          traceContext: {},
+          traceId: "t1",
+          spanId: "s1",
+          workerQueue: "main",
+          queue: "task/test-task",
+          isTest: false,
+          tags: [],
+          ttl: "1s",
+        },
+        prisma
+      );
 
-        // Manually lock the run (simulating it being about to execute)
-        await prisma.taskRun.update({
-          where: { id: run.id },
-          data: { lockedAt: new Date() },
-        });
+      // Manually lock the run (simulating it being about to execute)
+      await prisma.taskRun.update({
+        where: { id: run.id },
+        data: { lockedAt: new Date() },
+      });
 
-        // Try to expire the run via batch
-        const result = await engine.ttlSystem.expireRunsBatch([run.id]);
+      // Try to expire the run via batch
+      const result = await engine.ttlSystem.expireRunsBatch([run.id]);
 
-        // Should be skipped because it's locked
-        expect(result.expired.length).toBe(0);
-        expect(result.skipped.length).toBe(1);
-        expect(result.skipped[0]?.reason).toBe("locked");
+      // Should be skipped because it's locked
+      expect(result.expired.length).toBe(0);
+      expect(result.skipped.length).toBe(1);
+      expect(result.skipped[0]?.reason).toBe("locked");
 
-        // Run should still be PENDING
-        const executionData = await engine.getRunExecutionData({ runId: run.id });
-        assertNonNullable(executionData);
-        expect(executionData.run.status).toBe("PENDING");
-      } finally {
-        await engine.quit();
-      }
+      // Run should still be PENDING
+      const executionData = await engine.getRunExecutionData({ runId: run.id });
+      assertNonNullable(executionData);
+      expect(executionData.run.status).toBe("PENDING");
+    } finally {
+      await engine.quit();
     }
-  );
+  });
 
   containerTest(
     "expireRunsBatch skips runs with non-PENDING status",
@@ -1417,58 +1396,55 @@ describe("RunEngine ttl", () => {
     }
   );
 
-  containerTest(
-    "expireRunsBatch handles non-existent runs",
-    async ({ prisma, redisOptions }) => {
-      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+  containerTest("expireRunsBatch handles non-existent runs", async ({ prisma, redisOptions }) => {
+    const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
 
-      const engine = new RunEngine({
-        prisma,
-        worker: {
-          redis: redisOptions,
-          workers: 1,
-          tasksPerWorker: 10,
-          pollIntervalMs: 100,
-        },
-        queue: {
-          redis: redisOptions,
-          processWorkerQueueDebounceMs: 50,
-          masterQueueConsumersDisabled: true,
-          ttlSystem: {
-            disabled: true,
-          },
-        },
-        runLock: {
-          redis: redisOptions,
+    const engine = new RunEngine({
+      prisma,
+      worker: {
+        redis: redisOptions,
+        workers: 1,
+        tasksPerWorker: 10,
+        pollIntervalMs: 100,
+      },
+      queue: {
+        redis: redisOptions,
+        processWorkerQueueDebounceMs: 50,
+        masterQueueConsumersDisabled: true,
+        ttlSystem: {
+          disabled: true,
         },
+      },
+      runLock: {
+        redis: redisOptions,
+      },
+      machines: {
+        defaultMachine: "small-1x",
         machines: {
-          defaultMachine: "small-1x",
-          machines: {
-            "small-1x": {
-              name: "small-1x" as const,
-              cpu: 0.5,
-              memory: 0.5,
-              centsPerMs: 0.0001,
-            },
+          "small-1x": {
+            name: "small-1x" as const,
+            cpu: 0.5,
+            memory: 0.5,
+            centsPerMs: 0.0001,
           },
-          baseCostInCents: 0.0001,
         },
-        tracer: trace.getTracer("test", "0.0.0"),
-      });
+        baseCostInCents: 0.0001,
+      },
+      tracer: trace.getTracer("test", "0.0.0"),
+    });
 
-      try {
-        // Try to expire a non-existent run
-        const result = await engine.ttlSystem.expireRunsBatch(["non_existent_run_id"]);
+    try {
+      // Try to expire a non-existent run
+      const result = await engine.ttlSystem.expireRunsBatch(["non_existent_run_id"]);
 
-        // Should be skipped as not found
-        expect(result.expired.length).toBe(0);
-        expect(result.skipped.length).toBe(1);
-        expect(result.skipped[0]?.reason).toBe("not_found");
-      } finally {
-        await engine.quit();
-      }
+      // Should be skipped as not found
+      expect(result.expired.length).toBe(0);
+      expect(result.skipped.length).toBe(1);
+      expect(result.skipped[0]?.reason).toBe("not_found");
+    } finally {
+      await engine.quit();
     }
-  );
+  });
 
   containerTest(
     "TTL-expired child run completes waitpoint and resumes parent",
@@ -1639,54 +1615,51 @@ describe("RunEngine ttl", () => {
     }
   );
 
-  containerTest(
-    "expireRunsBatch handles empty array",
-    async ({ prisma, redisOptions }) => {
-      const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
+  containerTest("expireRunsBatch handles empty array", async ({ prisma, redisOptions }) => {
+    const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION");
 
-      const engine = new RunEngine({
-        prisma,
-        worker: {
-          redis: redisOptions,
-          workers: 1,
-          tasksPerWorker: 10,
-          pollIntervalMs: 100,
-        },
-        queue: {
-          redis: redisOptions,
-          processWorkerQueueDebounceMs: 50,
-          masterQueueConsumersDisabled: true,
-          ttlSystem: {
-            disabled: true,
-          },
-        },
-        runLock: {
-          redis: redisOptions,
+    const engine = new RunEngine({
+      prisma,
+      worker: {
+        redis: redisOptions,
+        workers: 1,
+        tasksPerWorker: 10,
+        pollIntervalMs: 100,
+      },
+      queue: {
+        redis: redisOptions,
+        processWorkerQueueDebounceMs: 50,
+        masterQueueConsumersDisabled: true,
+        ttlSystem: {
+          disabled: true,
         },
+      },
+      runLock: {
+        redis: redisOptions,
+      },
+      machines: {
+        defaultMachine: "small-1x",
         machines: {
-          defaultMachine: "small-1x",
-          machines: {
-            "small-1x": {
-              name: "small-1x" as const,
-              cpu: 0.5,
-              memory: 0.5,
-              centsPerMs: 0.0001,
-            },
+          "small-1x": {
+            name: "small-1x" as const,
+            cpu: 0.5,
+            memory: 0.5,
+            centsPerMs: 0.0001,
           },
-          baseCostInCents: 0.0001,
         },
-        tracer: trace.getTracer("test", "0.0.0"),
-      });
+        baseCostInCents: 0.0001,
+      },
+      tracer: trace.getTracer("test", "0.0.0"),
+    });
 
-      try {
-        // Try to expire an empty array
-        const result = await engine.ttlSystem.expireRunsBatch([]);
+    try {
+      // Try to expire an empty array
+      const result = await engine.ttlSystem.expireRunsBatch([]);
 
-        expect(result.expired.length).toBe(0);
-        expect(result.skipped.length).toBe(0);
-      } finally {
-        await engine.quit();
-      }
+      expect(result.expired.length).toBe(0);
+      expect(result.skipped.length).toBe(0);
+    } finally {
+      await engine.quit();
     }
-  );
+  });
 });
diff --git a/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts b/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts
index 9937314d799..55e1b0d0836 100644
--- a/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts
+++ b/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts
@@ -107,7 +107,16 @@ describe("RunEngine Waitpoints", () => {
       const executionData = await engine.getRunExecutionData({ runId: run.id });
       expect(executionData?.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS");
 
-      await setTimeout(2_000);
+      // Event-driven wait: the run resumes once the datetime waitpoint (~1s out) completes and the
+      // worker unblocks it. Gate on the final state the test asserts (run EXECUTING), not just the
+      // waitpoint status, which flips slightly earlier.
+      await vi.waitFor(
+        async () => {
+          const ed = await engine.getRunExecutionData({ runId: run.id });
+          expect(ed?.snapshot.executionStatus).toBe("EXECUTING");
+        },
+        { timeout: 10_000, interval: 100 }
+      );
 
       const waitpoint2 = await prisma.waitpoint.findFirst({
         where: {
@@ -497,7 +506,14 @@ describe("RunEngine Waitpoints", () => {
       const executionData = await engine.getRunExecutionData({ runId: run.id });
       expect(executionData?.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS");
 
-      await setTimeout(750);
+      // Event-driven wait: resume as soon as the waitpoint completes, no fixed margin.
+      await vi.waitFor(
+        async () => {
+          const ed = await engine.getRunExecutionData({ runId: run.id });
+          expect(ed?.snapshot.executionStatus).toBe("EXECUTING");
+        },
+        { timeout: 10_000, interval: 100 }
+      );
 
       const executionData2 = await engine.getRunExecutionData({ runId: run.id });
       expect(executionData2?.snapshot.executionStatus).toBe("EXECUTING");
@@ -781,7 +797,16 @@ describe("RunEngine Waitpoints", () => {
           event = result;
         });
 
-        await setTimeout(1_250);
+        // Event-driven wait: resume as soon as the timeout fires and the worker notifies, instead
+        // of a fixed 1250ms margin against the ~1s worker poll (the original flaky race).
+        await vi.waitFor(
+          async () => {
+            const ed = await engine.getRunExecutionData({ runId: run.id });
+            expect(ed?.snapshot.executionStatus).toBe("EXECUTING");
+            assertNonNullable(event);
+          },
+          { timeout: 10_000, interval: 100 }
+        );
 
         const executionData2 = await engine.getRunExecutionData({ runId: run.id });
         expect(executionData2?.snapshot.executionStatus).toBe("EXECUTING");
diff --git a/internal-packages/run-engine/vitest.config.ts b/internal-packages/run-engine/vitest.config.ts
index fc9a8f271e5..cb048f00927 100644
--- a/internal-packages/run-engine/vitest.config.ts
+++ b/internal-packages/run-engine/vitest.config.ts
@@ -1,7 +1,9 @@
 import { defineConfig } from "vitest/config";
+import { DurationShardingSequencer } from "@internal/testcontainers/sequencer";
 
 export default defineConfig({
   test: {
+    sequence: { sequencer: DurationShardingSequencer },
     include: ["**/*.test.ts"],
     globals: true,
     // CI-only: absorbs timing races (real-clock waits vs worker poll interval) under shard CPU contention
diff --git a/internal-packages/schedule-engine/test/scheduleEngine.test.ts b/internal-packages/schedule-engine/test/scheduleEngine.test.ts
index 4063521d823..c261697dacc 100644
--- a/internal-packages/schedule-engine/test/scheduleEngine.test.ts
+++ b/internal-packages/schedule-engine/test/scheduleEngine.test.ts
@@ -239,108 +239,4 @@ describe("ScheduleEngine Integration", () => {
       }
     }
   );
-
-  // Deploy-moment backward compatibility. At deploy time, in-flight Redis jobs
-  // were enqueued by the old engine — their payload has no `lastScheduleTime`
-  // field — and `instance.lastScheduledTimestamp` is still populated (last
-  // written by the old engine pre-deploy). The new engine must report that DB
-  // value as `payload.lastTimestamp` so customers don't see a transient
-  // `undefined` for the one fire per schedule that drains the legacy queue.
-  containerTest(
-    "should fall back to instance.lastScheduledTimestamp when payload lacks lastScheduleTime",
-    { timeout: 30_000 },
-    async ({ prisma, redisOptions }) => {
-      const triggerCalls: TriggerScheduledTaskParams[] = [];
-      const engine = new ScheduleEngine({
-        prisma,
-        redis: redisOptions,
-        distributionWindow: { seconds: 10 },
-        worker: {
-          concurrency: 1,
-          disabled: true, // Don't actually run the worker — calling triggerScheduledTask directly
-          pollIntervalMs: 1000,
-        },
-        tracer: trace.getTracer("test", "0.0.0"),
-        onTriggerScheduledTask: async (params) => {
-          triggerCalls.push(params);
-          return { success: true };
-        },
-        isDevEnvironmentConnectedHandler: vi.fn().mockResolvedValue(true),
-      });
-
-      try {
-        const organization = await prisma.organization.create({
-          data: { title: "Legacy Payload Org", slug: "legacy-payload-org" },
-        });
-
-        const project = await prisma.project.create({
-          data: {
-            name: "Legacy Payload Project",
-            slug: "legacy-payload-project",
-            externalRef: "legacy-payload-ref",
-            organizationId: organization.id,
-          },
-        });
-
-        const environment = await prisma.runtimeEnvironment.create({
-          data: {
-            slug: "legacy-payload-env",
-            type: "PRODUCTION",
-            projectId: project.id,
-            organizationId: organization.id,
-            apiKey: "tr_legacy_1234",
-            pkApiKey: "pk_legacy_1234",
-            shortcode: "legacy-short",
-          },
-        });
-
-        const taskSchedule = await prisma.taskSchedule.create({
-          data: {
-            friendlyId: "sched_legacy_payload",
-            taskIdentifier: "legacy-payload-task",
-            projectId: project.id,
-            deduplicationKey: "legacy-payload-dedup",
-            userProvidedDeduplicationKey: false,
-            generatorExpression: "*/5 * * * *",
-            generatorDescription: "Every 5 minutes",
-            timezone: "UTC",
-            type: "DECLARATIVE",
-            active: true,
-            externalId: "legacy-ext",
-          },
-        });
-
-        // Pre-populate lastScheduledTimestamp on the instance — simulates the
-        // value the old engine wrote to the DB before this PR deployed.
-        const preDeployLastFire = new Date("2026-04-30T10:00:00.000Z");
-        const scheduleInstance = await prisma.taskScheduleInstance.create({
-          data: {
-            taskScheduleId: taskSchedule.id,
-            environmentId: environment.id,
-            projectId: project.id,
-            active: true,
-            lastScheduledTimestamp: preDeployLastFire,
-          },
-        });
-
-        // Call triggerScheduledTask directly without lastScheduleTime,
-        // simulating an in-flight Redis job enqueued by the old engine.
-        const exactScheduleTime = new Date("2026-04-30T10:05:00.000Z");
-        await engine.triggerScheduledTask({
-          instanceId: scheduleInstance.id,
-          finalAttempt: false,
-          exactScheduleTime,
-          // lastScheduleTime intentionally omitted — legacy payload shape
-        });
-
-        expect(triggerCalls.length).toBe(1);
-        expect(triggerCalls[0].payload.timestamp).toEqual(exactScheduleTime);
-        // Falls back to instance.lastScheduledTimestamp from the DB rather
-        // than reporting undefined for this one transitional fire.
-        expect(triggerCalls[0].payload.lastTimestamp).toEqual(preDeployLastFire);
-      } finally {
-        await engine.quit();
-      }
-    }
-  );
 });
diff --git a/internal-packages/schedule-engine/test/scheduleEngine2.test.ts b/internal-packages/schedule-engine/test/scheduleEngine2.test.ts
new file mode 100644
index 00000000000..64936a89152
--- /dev/null
+++ b/internal-packages/schedule-engine/test/scheduleEngine2.test.ts
@@ -0,0 +1,112 @@
+import { containerTest } from "@internal/testcontainers";
+import { trace } from "@internal/tracing";
+import { setTimeout } from "timers/promises";
+import { describe, expect, vi } from "vitest";
+import { TriggerScheduledTaskParams } from "../src/engine/types.js";
+import { ScheduleEngine } from "../src/index.js";
+
+describe("ScheduleEngine Integration (part 2)", () => {
+  // Deploy-moment backward compatibility. At deploy time, in-flight Redis jobs
+  // were enqueued by the old engine — their payload has no `lastScheduleTime`
+  // field — and `instance.lastScheduledTimestamp` is still populated (last
+  // written by the old engine pre-deploy). The new engine must report that DB
+  // value as `payload.lastTimestamp` so customers don't see a transient
+  // `undefined` for the one fire per schedule that drains the legacy queue.
+  containerTest(
+    "should fall back to instance.lastScheduledTimestamp when payload lacks lastScheduleTime",
+    { timeout: 30_000 },
+    async ({ prisma, redisOptions }) => {
+      const triggerCalls: TriggerScheduledTaskParams[] = [];
+      const engine = new ScheduleEngine({
+        prisma,
+        redis: redisOptions,
+        distributionWindow: { seconds: 10 },
+        worker: {
+          concurrency: 1,
+          disabled: true, // Don't actually run the worker — calling triggerScheduledTask directly
+          pollIntervalMs: 1000,
+        },
+        tracer: trace.getTracer("test", "0.0.0"),
+        onTriggerScheduledTask: async (params) => {
+          triggerCalls.push(params);
+          return { success: true };
+        },
+        isDevEnvironmentConnectedHandler: vi.fn().mockResolvedValue(true),
+      });
+
+      try {
+        const organization = await prisma.organization.create({
+          data: { title: "Legacy Payload Org", slug: "legacy-payload-org" },
+        });
+
+        const project = await prisma.project.create({
+          data: {
+            name: "Legacy Payload Project",
+            slug: "legacy-payload-project",
+            externalRef: "legacy-payload-ref",
+            organizationId: organization.id,
+          },
+        });
+
+        const environment = await prisma.runtimeEnvironment.create({
+          data: {
+            slug: "legacy-payload-env",
+            type: "PRODUCTION",
+            projectId: project.id,
+            organizationId: organization.id,
+            apiKey: "tr_legacy_1234",
+            pkApiKey: "pk_legacy_1234",
+            shortcode: "legacy-short",
+          },
+        });
+
+        const taskSchedule = await prisma.taskSchedule.create({
+          data: {
+            friendlyId: "sched_legacy_payload",
+            taskIdentifier: "legacy-payload-task",
+            projectId: project.id,
+            deduplicationKey: "legacy-payload-dedup",
+            userProvidedDeduplicationKey: false,
+            generatorExpression: "*/5 * * * *",
+            generatorDescription: "Every 5 minutes",
+            timezone: "UTC",
+            type: "DECLARATIVE",
+            active: true,
+            externalId: "legacy-ext",
+          },
+        });
+
+        // Pre-populate lastScheduledTimestamp on the instance — simulates the
+        // value the old engine wrote to the DB before this PR deployed.
+        const preDeployLastFire = new Date("2026-04-30T10:00:00.000Z");
+        const scheduleInstance = await prisma.taskScheduleInstance.create({
+          data: {
+            taskScheduleId: taskSchedule.id,
+            environmentId: environment.id,
+            projectId: project.id,
+            active: true,
+            lastScheduledTimestamp: preDeployLastFire,
+          },
+        });
+
+        // Call triggerScheduledTask directly without lastScheduleTime,
+        // simulating an in-flight Redis job enqueued by the old engine.
+        const exactScheduleTime = new Date("2026-04-30T10:05:00.000Z");
+        await engine.triggerScheduledTask({
+          instanceId: scheduleInstance.id,
+          finalAttempt: false,
+          exactScheduleTime,
+          // lastScheduleTime intentionally omitted — legacy payload shape
+        });
+
+        expect(triggerCalls.length).toBe(1);
+        expect(triggerCalls[0].payload.timestamp).toEqual(exactScheduleTime);
+        // Falls back to instance.lastScheduledTimestamp from the DB rather
+        // than reporting undefined for this one transitional fire.
+        expect(triggerCalls[0].payload.lastTimestamp).toEqual(preDeployLastFire);
+      } finally {
+        await engine.quit();
+      }
+    }
+  );
+});
diff --git a/internal-packages/schedule-engine/vitest.config.ts b/internal-packages/schedule-engine/vitest.config.ts
index e773ec6d163..8f9b6a01a9a 100644
--- a/internal-packages/schedule-engine/vitest.config.ts
+++ b/internal-packages/schedule-engine/vitest.config.ts
@@ -1,7 +1,9 @@
 import { defineConfig } from "vitest/config";
+import { DurationShardingSequencer } from "@internal/testcontainers/sequencer";
 
 export default defineConfig({
   test: {
+    sequence: { sequencer: DurationShardingSequencer },
     globals: true,
     // CI-only: absorbs timing races (real-clock waits vs worker poll interval) under shard CPU contention
     retry: process.env.CI ? 2 : 0,
diff --git a/internal-packages/testcontainers/README.md b/internal-packages/testcontainers/README.md
index 51c2240d6c9..8f74b1137f3 100644
--- a/internal-packages/testcontainers/README.md
+++ b/internal-packages/testcontainers/README.md
@@ -1,3 +1,78 @@
-# Test container
+# Test containers
 
-This is package exposes some useful vitest utilities for writing tests with Postgres, Prisma, and Redis.
+Vitest utilities for writing tests against real Postgres, Prisma, Redis and ClickHouse - we don't mock
+(see the root `CLAUDE.md`), we boot containers. Also exposes a duration-weighted shard sequencer for
+splitting slow suites across CI shards.
+
+## Choosing a fixture
+
+Most tests share one set of containers per vitest worker (booted once, reset between tests) - this is
+much faster than a container per test. Reach for an isolated variant only when a test needs it.
+
+| Fixture                          | Postgres       | Redis    | ClickHouse | Use for                                 |
+| -------------------------------- | -------------- | -------- | ---------- | --------------------------------------- |
+| `redisTest`                      | -              | shared   | -          | redis-only tests                        |
+| `postgresTest`                   | shared (clone) | -        | -          | db-only tests                           |
+| `containerTest`                  | shared (clone) | shared   | shared     | the default - needs all three           |
+| `isolatedRedisTest`              | -              | per-test | -          | background redis work (see below)       |
+| `containerTestWithIsolatedRedis` | shared (clone) | per-test | shared     | background redis work + db/clickhouse   |
+| `replicationContainerTest`       | per-test       | per-test | shared     | Postgres→ClickHouse logical replication |
+
+"shared (clone)" = one Postgres per worker with a template database; each test gets a fast `CREATE
+DATABASE ... TEMPLATE` clone, so schema isn't re-pushed per test.
+
+### The background-work gotcha
+
+If a test spawns work that **outlives the test body** - a `RunEngine`, a `redis-worker` Worker, a
+`BatchQueue` - and that work isn't fully drained before the test ends, you **must** use an isolated
+redis fixture (`isolatedRedisTest` / `containerTestWithIsolatedRedis`).
+
+On the shared fixture, the leaked background loop keeps polling the one worker-scoped redis after the
+test's clients close, bleeding into the next test. The symptom is an intermittent `"Connection is
+closed"` error or a test that hangs until its timeout. `FLUSHALL` between tests does **not** fix this -
+it clears data, not live connections/loops, so per-test key prefixes won't help either. A plain
+db/redis test with no lingering background work is fine on the shared fixtures.
+
+## Sharding (`./sequencer`)
+
+CI splits the slow suites with `vitest --shard=i/N`. `DurationShardingSequencer` replaces vitest's
+default file-count split with a duration-weighted one: it reads `test-timings.json` at the repo root
+(`{ "<repo-relative path>": <ms> }`) and greedily bin-packs files so each shard does roughly equal
+_work_, not an equal _number of files_. The packing is deterministic, so every shard computes the same
+bins and runs each file exactly once.
+
+Configs opt in via:
+
+```ts
+import { DurationShardingSequencer } from "@internal/testcontainers/sequencer";
+// in defineConfig:
+test: {
+  sequence: {
+    sequencer: DurationShardingSequencer,
+  },
+}
+```
+
+### Adding tests - nothing to do
+
+New test files are discovered by vitest's glob and sharded automatically. A file with no entry in
+`test-timings.json` is given the **median** duration as a fallback, so it's still placed on exactly one
+shard - correctness never depends on the timings being present or current.
+
+What the timings affect is **balance**. A new heavy test estimated at the median can be under-weighted
+and land on an already-full shard, making that shard slower. There's headroom between the current
+makespan and the CI budget to absorb this, so it tolerates drift - but if a shard creeps toward the
+budget, refresh the timings.
+
+### Refreshing `test-timings.json`
+
+Measure each shard with the JSON reporter and write per-file `endTime - startTime` (ms), keyed by
+repo-relative path, back into `test-timings.json`. Set `GITHUB_ACTIONS=true` so suites that
+`skipIf(CI)` are excluded, matching what actually runs on CI:
+
+```bash
+GITHUB_ACTIONS=true pnpm exec vitest run --reporter=json --outputFile=/tmp/run.json
+```
+
+Stale entries for deleted/renamed files are harmless (they're simply ignored). This is a periodic
+chore, not a per-PR one.
diff --git a/internal-packages/testcontainers/TESTING.md b/internal-packages/testcontainers/TESTING.md
new file mode 100644
index 00000000000..8ca6c335789
--- /dev/null
+++ b/internal-packages/testcontainers/TESTING.md
@@ -0,0 +1,60 @@
+# Fast local testing loop
+
+These tests use real Docker containers (Postgres, ClickHouse, Redis, Electric, MinIO) via testcontainers - never mocks. This guide is the fast inner loop for working on them.
+
+## Prerequisites
+
+- **Docker daemon running.** That's it - testcontainers boots its own containers. You do **not** need `pnpm run docker` (that compose stack is for running the app, and is separate).
+
+## The loop
+
+```bash
+# 1. Build upstream deps once (turbo-caches them; only re-runs when a dep changes)
+pnpm run build --filter @internal/run-engine
+
+# 2. Iterate by running vitest DIRECTLY in the package - not via `turbo run test`
+cd internal-packages/run-engine
+pnpm exec vitest run src/engine/tests/ttl.test.ts        # one file
+pnpm exec vitest src/engine/tests/ttl.test.ts            # watch mode, tightest loop
+pnpm exec vitest run src/engine/tests/ --reporter=verbose # per-test timings
+```
+
+> **Why run vitest directly, not `turbo run test`?** The `test` turbo task is cacheable
+> (`outputs: []`). A second `turbo run test` with no input change replays the cached
+> result in ~0ms instead of executing - useless when you're measuring timing. Run vitest
+> directly (or `turbo run test --force`) so tests actually run.
+
+## Measuring container boot/teardown vs test time
+
+Container lifecycle (boot + migrate + teardown) dominates these suites. To see the split:
+
+```bash
+# JSON timing lines are gated on TESTCONTAINERS_TIMING locally (always on in CI),
+# and need --disableConsoleIntercept so vitest doesn't swallow them.
+TESTCONTAINERS_TIMING=1 pnpm exec vitest run <file> --disableConsoleIntercept
+```
+
+## Approximating the 2-core CI runner locally (flake repro)
+
+To reproduce CI-like CPU pressure on a beefy local machine - useful when a test only flakes under
+the 2-core CI runner:
+
+```bash
+# cap each testcontainer's CPU/mem (TESTCONTAINERS_CPU = cores, TESTCONTAINERS_MEMORY_GB = GB),
+# and pin the test runner to 2 cores. Off unless the env vars are set.
+TESTCONTAINERS_CPU=2 TESTCONTAINERS_MEMORY_GB=2 taskset -c 0,1 pnpm exec vitest run <file>
+```
+
+Note: in practice the scoped tests here are latency/IO/sleep-bound, not CPU-bound, so this changes
+timings little - the original CI slowness was per-test container _boots_, which worker-scoping removed.
+Keep it for the cases that genuinely starve on CPU (e.g. timing races against a worker poll).
+
+## Timing harness
+
+Or use the harness, which aggregates the split for you:
+
+```bash
+node internal-packages/testcontainers/scripts/measure-test-timing.mjs \
+  src/client/client.test.ts --cwd internal-packages/clickhouse --runs 3
+# -> run 1/3  passed=true  wall=10.58s  teardown=0.67s ...
+```
diff --git a/internal-packages/testcontainers/package.json b/internal-packages/testcontainers/package.json
index 1c57952509c..4ea83344c34 100644
--- a/internal-packages/testcontainers/package.json
+++ b/internal-packages/testcontainers/package.json
@@ -6,7 +6,11 @@
   "types": "./src/index.ts",
   "exports": {
     ".": "./src/index.ts",
-    "./webapp": "./src/webapp.ts"
+    "./webapp": "./src/webapp.ts",
+    "./sequencer": {
+      "types": "./src/sequencer.d.cts",
+      "default": "./src/sequencer.cjs"
+    }
   },
   "dependencies": {
     "@clickhouse/client": "^1.11.1",
@@ -25,4 +29,4 @@
   "scripts": {
     "typecheck": "tsc --noEmit"
   }
-}
\ No newline at end of file
+}
diff --git a/internal-packages/testcontainers/scripts/measure-test-timing.mjs b/internal-packages/testcontainers/scripts/measure-test-timing.mjs
new file mode 100644
index 00000000000..fd71a2b93a3
--- /dev/null
+++ b/internal-packages/testcontainers/scripts/measure-test-timing.mjs
@@ -0,0 +1,76 @@
+#!/usr/bin/env node
+// Measure testcontainers boot/teardown vs test time for a single test file.
+//
+// Usage (from any package dir, or pass --cwd):
+//   node <path>/measure-test-timing.mjs <testFile> [--cwd <packageDir>] [--runs N]
+//
+// Relies on the TESTCONTAINERS_TIMING log gate in src/logs.ts and runs vitest with
+// --disableConsoleIntercept so the JSON timing lines reach stdout.
+
+import { spawn } from "node:child_process";
+
+const args = process.argv.slice(2);
+const testFile = args.find((a) => !a.startsWith("--"));
+const cwd = valueOf("--cwd") ?? process.cwd();
+const runs = Number(valueOf("--runs") ?? "1");
+
+function valueOf(flag) {
+  const i = args.indexOf(flag);
+  return i >= 0 ? args[i + 1] : undefined;
+}
+
+if (!testFile) {
+  console.error("usage: measure-test-timing.mjs <testFile> [--cwd dir] [--runs N]");
+  process.exit(1);
+}
+
+function runOnce() {
+  return new Promise((resolve) => {
+    const child = spawn("pnpm", ["exec", "vitest", "run", testFile, "--disableConsoleIntercept"], {
+      cwd,
+      env: { ...process.env, TESTCONTAINERS_TIMING: "1" },
+    });
+
+    let out = "";
+    const collect = (buf) => (out += buf.toString());
+    child.stdout.on("data", collect);
+    child.stderr.on("data", collect);
+
+    child.on("close", () => {
+      const cleanups = [];
+      let duration = null;
+      for (const line of out.split("\n")) {
+        const trimmed = line.trim();
+        if (trimmed.startsWith("{")) {
+          try {
+            const ev = JSON.parse(trimmed);
+            if (ev.type === "cleanup") cleanups.push(ev);
+          } catch {}
+        }
+        const m = trimmed.match(/Duration\s+([\d.]+)s/);
+        if (m) duration = Number(m[1]);
+      }
+      resolve({ cleanups, duration, passed: /Tests\s+\d+ passed/.test(out) });
+    });
+  });
+}
+
+for (let i = 0; i < runs; i++) {
+  const { cleanups, duration, passed } = await runOnce();
+  const byResource = {};
+  for (const c of cleanups) {
+    const key = c.resource.split(":")[0];
+    byResource[key] ??= { totalMs: 0, count: 0 };
+    byResource[key].totalMs += c.durationMs ?? 0;
+    byResource[key].count += 1;
+  }
+  const teardownMs = Object.values(byResource).reduce((a, r) => a + r.totalMs, 0);
+  console.log(
+    `\nrun ${i + 1}/${runs}  passed=${passed}  wall=${duration}s  teardown=${(
+      teardownMs / 1000
+    ).toFixed(2)}s`
+  );
+  for (const [res, r] of Object.entries(byResource)) {
+    console.log(`  teardown ${res}: ${(r.totalMs / 1000).toFixed(2)}s over ${r.count}`);
+  }
+}
diff --git a/internal-packages/testcontainers/src/clickhouse.ts b/internal-packages/testcontainers/src/clickhouse.ts
index 577111af3d5..1bd7f758e02 100644
--- a/internal-packages/testcontainers/src/clickhouse.ts
+++ b/internal-packages/testcontainers/src/clickhouse.ts
@@ -144,6 +144,24 @@ export class StartedClickHouseContainer extends AbstractStartedContainer {
   }
 }
 
+/**
+ * Resets data between tests on a reused ClickHouse container by truncating every base table
+ * (MergeTree etc.) in the migrated database. Views/materialized views are skipped - their target
+ * tables are base tables and get truncated too, which clears MV state. Cheaper than dropping +
+ * re-migrating, and these migrations aren't version-tracked so they can't simply be re-run.
+ */
+export async function truncateClickhouseTables(client: ClickHouseClient, database = "trigger_dev") {
+  const result = await client.query({
+    query: `SELECT name FROM system.tables WHERE database = '${database}' AND engine NOT LIKE '%View%'`,
+    format: "JSONEachRow",
+  });
+  const tables = await result.json<{ name: string }>();
+
+  for (const { name } of tables) {
+    await client.command({ query: `TRUNCATE TABLE \`${database}\`.\`${name}\`` });
+  }
+}
+
 export async function runClickhouseMigrations(client: ClickHouseClient, migrationsPath: string) {
   // Get all the *.sql files in the migrations path
   const queries = await getAllClickhouseMigrationQueries(migrationsPath);
diff --git a/internal-packages/testcontainers/src/index.ts b/internal-packages/testcontainers/src/index.ts
index 95b4ab48e8c..8b687402f6d 100644
--- a/internal-packages/testcontainers/src/index.ts
+++ b/internal-packages/testcontainers/src/index.ts
@@ -1,7 +1,7 @@
-import { StartedPostgreSqlContainer } from "@testcontainers/postgresql";
+import { PostgreSqlContainer, StartedPostgreSqlContainer } from "@testcontainers/postgresql";
 import { StartedRedisContainer } from "@testcontainers/redis";
 import { PrismaClient } from "@trigger.dev/database";
-import { RedisOptions } from "ioredis";
+import Redis, { RedisOptions } from "ioredis";
 import { Network, type StartedNetwork } from "testcontainers";
 import { TestContext, test } from "vitest";
 import {
@@ -9,13 +9,21 @@ import {
   createElectricContainer,
   createPostgresContainer,
   createRedisContainer,
-  createMinIOContainer,
+  postgresUriWithDatabase,
+  pushDatabaseSchema,
   useContainer,
+  withCiResourceLimits,
   withContainerSetup,
 } from "./utils";
 import { getTaskMetadata, logCleanup, logSetup } from "./logs";
-import { StartedClickHouseContainer } from "./clickhouse";
-import { StartedMinIOContainer, type MinIOConnectionConfig } from "./minio";
+import path from "path";
+import {
+  ClickHouseContainer,
+  StartedClickHouseContainer,
+  runClickhouseMigrations,
+  truncateClickhouseTables,
+} from "./clickhouse";
+import { MinIOContainer, StartedMinIOContainer, type MinIOConnectionConfig } from "./minio";
 import { ClickHouseClient, createClient } from "@clickhouse/client";
 
 export { assertNonNullable, createPostgresContainer } from "./utils";
@@ -120,7 +128,112 @@ export const prisma = async (
   }
 };
 
-export const postgresTest = test.extend<PostgresContext>({ network, postgresContainer, prisma });
+const POSTGRES_TEMPLATE_DB = "template_db";
+let pgCloneCounter = 0;
+
+type PostgresTestContext = {
+  postgresContainer: StartedPostgreSqlContainer;
+  prisma: PrismaClient;
+};
+
+// --- Worker-scoped + per-test-isolated fixtures (shared by the standalone *Test and containerTest) ---
+// The pattern: boot each container ONCE per worker; isolate per test cheaply (postgres = template
+// clone, redis = FLUSHALL, clickhouse = TRUNCATE) instead of re-booting. Reset fixtures are `auto`
+// so they run for every test even if it doesn't destructure them.
+
+// Boot postgres ONCE per worker (module singleton, reaped by Ryuk on worker exit) and push the
+// schema into a dedicated template db that nothing else connects to (so CREATE DATABASE ... TEMPLATE
+// never trips on an active session).
+let workerPostgresContainer: Promise<StartedPostgreSqlContainer> | undefined;
+const getWorkerPostgresContainer = () => {
+  if (!workerPostgresContainer) {
+    workerPostgresContainer = (async () => {
+      const container = await withCiResourceLimits(new PostgreSqlContainer("docker.io/postgres:14"))
+        .withCommand(["-c", "listen_addresses=*", "-c", "wal_level=logical"])
+        .start();
+      // Create the template db explicitly via an admin connection (the same primitive the per-test
+      // clone uses) instead of relying on `prisma db push` to create a missing database. That
+      // create-if-missing path behaves differently on CI and - because push errors were swallowed -
+      // surfaced only later as a confusing "template database template_db does not exist" at clone
+      // time. Pushing into an already-existing db is the path the pre-worker-scope code always used.
+      const admin = new PrismaClient({
+        datasources: {
+          db: { url: postgresUriWithDatabase(container.getConnectionUri(), "postgres") },
+        },
+      });
+      await admin.$executeRawUnsafe(`CREATE DATABASE "${POSTGRES_TEMPLATE_DB}"`);
+      await admin.$disconnect();
+      await pushDatabaseSchema(
+        postgresUriWithDatabase(container.getConnectionUri(), POSTGRES_TEMPLATE_DB)
+      );
+      return container;
+    })();
+  }
+  return workerPostgresContainer;
+};
+
+// Per test: clone a fresh database from the template (fast filesystem copy), then hand back a view
+// of the shared container whose connection points at the clone. This keeps prisma AND any code that
+// reads postgresContainer.getConnectionUri()/getDatabase() (e.g. logical replication) on the SAME
+// isolated database - and it's parallel-ready (each test owns its db).
+const clonedPostgresContainer = async ({}, use: Use<StartedPostgreSqlContainer>) => {
+  const container = await getWorkerPostgresContainer();
+  const baseUri = container.getConnectionUri();
+  const cloneDb = `test_${pgCloneCounter++}`;
+
+  const admin = new PrismaClient({
+    datasources: { db: { url: postgresUriWithDatabase(baseUri, "postgres") } },
+  });
+  await admin.$executeRawUnsafe(`CREATE DATABASE "${cloneDb}" TEMPLATE "${POSTGRES_TEMPLATE_DB}"`);
+  await admin.$disconnect();
+
+  const cloneUri = postgresUriWithDatabase(baseUri, cloneDb);
+  const view = new Proxy(container, {
+    get(target, prop, receiver) {
+      if (prop === "getConnectionUri") return () => cloneUri;
+      if (prop === "getDatabase") return () => cloneDb;
+      const value = Reflect.get(target, prop, receiver);
+      return typeof value === "function" ? value.bind(target) : value;
+    },
+  });
+
+  try {
+    await use(view);
+  } finally {
+    // Best-effort drop so clones don't pile up in the worker's pg over a long suite. WITH (FORCE)
+    // terminates any lingering backends (pg 13+). A failed drop is harmless - the whole container is
+    // reaped on worker exit - so we never let cleanup fail the test.
+    const cleanup = new PrismaClient({
+      datasources: { db: { url: postgresUriWithDatabase(baseUri, "postgres") } },
+    });
+    try {
+      await cleanup.$executeRawUnsafe(`DROP DATABASE IF EXISTS "${cloneDb}" WITH (FORCE)`);
+    } catch {
+      // ignore - reaped with the container anyway
+    } finally {
+      await cleanup.$disconnect();
+    }
+  }
+};
+
+const prismaFromContainer = async (
+  { postgresContainer }: { postgresContainer: StartedPostgreSqlContainer },
+  use: Use<PrismaClient>
+) => {
+  const prisma = new PrismaClient({
+    datasources: { db: { url: postgresContainer.getConnectionUri() } },
+  });
+  try {
+    await use(prisma);
+  } finally {
+    await logCleanup("prisma", prisma.$disconnect());
+  }
+};
+
+export const postgresTest = test.extend<PostgresTestContext>({
+  postgresContainer: clonedPostgresContainer,
+  prisma: prismaFromContainer,
+});
 
 export const redisContainer = async (
   { network, task }: { network: StartedNetwork } & TestContext,
@@ -173,7 +286,58 @@ export const redisOptions = async (
   await use(options);
 };
 
-export const redisTest = test.extend<RedisContext>({ network, redisContainer, redisOptions });
+// Worker-scoped redis: booted once per worker, FLUSHALL per test. Big win for redis-heavy files
+// (buffer.test.ts: 88 boots -> 1). Safe ONLY for tests that don't leave background redis work
+// (a Worker loop, BatchQueue) running past the test body - use isolatedRedisTest for those.
+const bootWorkerRedis = async ({}, use: Use<StartedRedisContainer>) => {
+  const { container } = await createRedisContainer({ port: 6379 });
+  try {
+    await use(container);
+  } finally {
+    await container.stop({ timeout: 0 });
+  }
+};
+
+const flushRedis = async (
+  { redisContainer }: { redisContainer: StartedRedisContainer },
+  use: Use<void>
+) => {
+  const redis = new Redis({
+    host: redisContainer.getHost(),
+    port: redisContainer.getPort(),
+    password: redisContainer.getPassword(),
+    maxRetriesPerRequest: 3,
+  });
+  try {
+    await redis.flushall();
+  } finally {
+    redis.disconnect();
+  }
+  await use();
+};
+
+type RedisTestContext = {
+  redisContainer: StartedRedisContainer;
+  resetRedis: void;
+  redisOptions: RedisOptions;
+};
+
+// Worker-scoped redis (boots once, FLUSHALL between tests). Use isolatedRedisTest for tests that run
+// background redis work (redis-worker Workers, BatchQueue) past the test body - see its note + README.
+export const redisTest = test.extend<RedisTestContext>({
+  redisContainer: [bootWorkerRedis, { scope: "worker" }],
+  resetRedis: [flushRedis, { auto: true }],
+  redisOptions,
+});
+
+// Per-test redis for tests with background redis work (redis-worker Workers, BatchQueue) that can
+// outlive the test body - a shared redis would let leaked work hit a closed connection / next test
+// ("Connection is closed"). Boot is kept fast (see createRedisContainer).
+export const isolatedRedisTest = test.extend<RedisContext>({
+  network,
+  redisContainer,
+  redisOptions,
+});
 
 const electricOrigin = async (
   {
@@ -225,12 +389,60 @@ type ClickhouseContext = {
   clickhouseClient: ClickHouseClient;
 };
 
-export const clickhouseTest = test.extend<ClickhouseContext>({
-  network,
-  clickhouseContainer,
-  clickhouseClient,
+const clickhouseMigrationsPath = path.resolve(__dirname, "../../clickhouse/schema");
+
+type ClickhouseTestContext = {
+  clickhouseContainer: StartedClickHouseContainer;
+  resetClickhouse: void;
+  clickhouseClient: ClickHouseClient;
+};
+
+// Boot + migrate clickhouse once per worker.
+const bootWorkerClickhouse = async ({}, use: Use<StartedClickHouseContainer>) => {
+  const container = await withCiResourceLimits(new ClickHouseContainer()).start();
+  const client = createClient({ url: container.getConnectionUrl() });
+  await client.ping();
+  await runClickhouseMigrations(client, clickhouseMigrationsPath);
+  await client.close();
+  try {
+    await use(container);
+  } finally {
+    await container.stop({ timeout: 0 });
+  }
+};
+
+// Per test: truncate all tables on the shared clickhouse (auto fixture so it runs for every test).
+const truncateClickhouseFixture = async (
+  { clickhouseContainer }: { clickhouseContainer: StartedClickHouseContainer },
+  use: Use<void>
+) => {
+  const client = createClient({ url: clickhouseContainer.getConnectionUrl() });
+  await truncateClickhouseTables(client);
+  await client.close();
+  await use();
+};
+
+const scopedClickhouseClient = async (
+  { clickhouseContainer }: { clickhouseContainer: StartedClickHouseContainer },
+  use: Use<ClickHouseClient>
+) => {
+  const client = createClient({ url: clickhouseContainer.getConnectionUrl() });
+  try {
+    await use(client);
+  } finally {
+    await logCleanup("clickhouseClient", client.close());
+  }
+};
+
+export const clickhouseTest = test.extend<ClickhouseTestContext>({
+  clickhouseContainer: [bootWorkerClickhouse, { scope: "worker" }],
+  resetClickhouse: [truncateClickhouseFixture, { auto: true }],
+  clickhouseClient: scopedClickhouseClient,
 });
 
+// NOTE: per-test containers (not worker-scoped) - the replication package does logical replication
+// (slots/publications/REPLICA IDENTITY), which doesn't play nicely with a shared container +
+// template-clone. A dedicated container per test is the correct, isolated choice here.
 export const postgresAndRedisTest = test.extend<PostgresAndRedisContext>({
   network,
   postgresContainer,
@@ -239,14 +451,81 @@ export const postgresAndRedisTest = test.extend<PostgresAndRedisContext>({
   redisOptions,
 });
 
-export const containerTest = test.extend<ContainerContext>({
+type ContainerTestContext = {
+  postgresContainer: StartedPostgreSqlContainer;
+  prisma: PrismaClient;
+  redisContainer: StartedRedisContainer;
+  resetRedis: void;
+  redisOptions: RedisOptions;
+  clickhouseContainer: StartedClickHouseContainer;
+  resetClickhouse: void;
+  clickhouseClient: ClickHouseClient;
+};
+
+// The workhorse fixture (~36 files). Postgres (template-clone), Redis (FLUSHALL) and ClickHouse
+// (truncate) all boot once per worker - no per-test container boots. Use containerTestWithIsolatedRedis
+// for tests that run background redis work (BatchQueue, redis-worker Workers) past the test body.
+export const containerTest = test.extend<ContainerTestContext>({
+  postgresContainer: clonedPostgresContainer,
+  prisma: prismaFromContainer,
+  redisContainer: [bootWorkerRedis, { scope: "worker" }],
+  resetRedis: [flushRedis, { auto: true }],
+  redisOptions,
+  clickhouseContainer: [bootWorkerClickhouse, { scope: "worker" }],
+  resetClickhouse: [truncateClickhouseFixture, { auto: true }],
+  clickhouseClient: scopedClickhouseClient,
+});
+
+type ContainerWithIsolatedRedisContext = {
+  network: StartedNetwork;
+  postgresContainer: StartedPostgreSqlContainer;
+  prisma: PrismaClient;
+  redisContainer: StartedRedisContainer;
+  redisOptions: RedisOptions;
+  clickhouseContainer: StartedClickHouseContainer;
+  resetClickhouse: void;
+  clickhouseClient: ClickHouseClient;
+};
+
+// Same as containerTest but Redis is PER-TEST - for tests whose background redis work (BatchQueue,
+// Workers) outlives the test body and would otherwise hit a closed/shared connection.
+export const containerTestWithIsolatedRedis = test.extend<ContainerWithIsolatedRedisContext>({
+  network,
+  postgresContainer: clonedPostgresContainer,
+  prisma: prismaFromContainer,
+  redisContainer,
+  redisOptions,
+  clickhouseContainer: [bootWorkerClickhouse, { scope: "worker" }],
+  resetClickhouse: [truncateClickhouseFixture, { auto: true }],
+  clickhouseClient: scopedClickhouseClient,
+});
+
+// For tests that exercise the Postgres -> ClickHouse logical-replication pipeline (WAL slots,
+// publications, REPLICA IDENTITY). These need a dedicated Postgres per test - the worker-scoped +
+// template-clone model used by containerTest doesn't carry logical replication across cloned dbs.
+// Postgres is per-test (the WAL slot/publication lives in the db it writes to); ClickHouse is
+// worker-scoped + truncated (the pipeline writes pg->clickhouse and a shared+truncated clickhouse is
+// fine). Redis is per-test too (background work safety, same as containerTest).
+type ReplicationContainerTestContext = {
+  network: StartedNetwork;
+  postgresContainer: StartedPostgreSqlContainer;
+  prisma: PrismaClient;
+  redisContainer: StartedRedisContainer;
+  redisOptions: RedisOptions;
+  clickhouseContainer: StartedClickHouseContainer;
+  resetClickhouse: void;
+  clickhouseClient: ClickHouseClient;
+};
+
+export const replicationContainerTest = test.extend<ReplicationContainerTestContext>({
   network,
   postgresContainer,
   prisma,
   redisContainer,
   redisOptions,
-  clickhouseContainer,
-  clickhouseClient,
+  clickhouseContainer: [bootWorkerClickhouse, { scope: "worker" }],
+  resetClickhouse: [truncateClickhouseFixture, { auto: true }],
+  clickhouseClient: scopedClickhouseClient,
 });
 
 export const containerWithElectricTest = test.extend<ContainerWithElectricContext>({
@@ -267,17 +546,22 @@ export const containerWithElectricAndRedisTest = test.extend<ContainerWithElectr
   clickhouseClient,
 });
 
-const minioContainer = async (
-  { network, task }: { network: StartedNetwork } & TestContext,
-  use: Use<StartedMinIOContainer>
-) => {
-  const { container, metadata } = await withContainerSetup({
-    name: "minioContainer",
-    task,
-    setup: createMinIOContainer(network),
-  });
+// Boot minio once per worker; reset the bucket per test (auto fixture).
+const bootWorkerMinio = async ({}, use: Use<StartedMinIOContainer>) => {
+  const container = await withCiResourceLimits(new MinIOContainer()).start();
+  try {
+    await use(container);
+  } finally {
+    await container.stop({ timeout: 0 });
+  }
+};
 
-  await useContainer("minioContainer", { container, task, use: () => use(container) });
+const minioReset = async (
+  { minioContainer }: { minioContainer: StartedMinIOContainer },
+  use: Use<void>
+) => {
+  await minioContainer.resetBucket();
+  await use();
 };
 
 const minioConfig = async (
@@ -287,18 +571,30 @@ const minioConfig = async (
   await use(minioContainer.getConnectionConfig());
 };
 
-export const minioTest = test.extend<MinIOContext>({
-  network,
-  minioContainer,
+type MinioTestContext = {
+  minioContainer: StartedMinIOContainer;
+  resetMinio: void;
+  minioConfig: MinIOConnectionConfig;
+};
+
+export const minioTest = test.extend<MinioTestContext>({
+  minioContainer: [bootWorkerMinio, { scope: "worker" }],
+  resetMinio: [minioReset, { auto: true }],
   minioConfig,
 });
 
-type PostgresAndMinIOContext = NetworkContext & PostgresContext & MinIOContext;
+type PostgresAndMinioTestContext = {
+  postgresContainer: StartedPostgreSqlContainer;
+  prisma: PrismaClient;
+  minioContainer: StartedMinIOContainer;
+  resetMinio: void;
+  minioConfig: MinIOConnectionConfig;
+};
 
-export const postgresAndMinioTest = test.extend<PostgresAndMinIOContext>({
-  network,
-  postgresContainer,
-  prisma,
-  minioContainer,
+export const postgresAndMinioTest = test.extend<PostgresAndMinioTestContext>({
+  postgresContainer: clonedPostgresContainer,
+  prisma: prismaFromContainer,
+  minioContainer: [bootWorkerMinio, { scope: "worker" }],
+  resetMinio: [minioReset, { auto: true }],
   minioConfig,
 });
diff --git a/internal-packages/testcontainers/src/logs.ts b/internal-packages/testcontainers/src/logs.ts
index 865a6592bd5..3ea3e5fe8a6 100644
--- a/internal-packages/testcontainers/src/logs.ts
+++ b/internal-packages/testcontainers/src/logs.ts
@@ -5,10 +5,13 @@ import { StartedTestContainer } from "testcontainers";
 
 let setupOrder = 0;
 
+// Emit timing JSON in CI, or locally when TESTCONTAINERS_TIMING is set (drives the local timing harness)
+const emitTimingLogs = isCI || !!env.TESTCONTAINERS_TIMING;
+
 export function logSetup(resource: string, metadata: Record<string, unknown>) {
   const order = setupOrder++;
 
-  if (!isCI) {
+  if (!emitTimingLogs) {
     return;
   }
 
@@ -67,7 +70,7 @@ export async function logCleanup(
   const activeAtEnd = --activeCleanups;
   const parallel = activeAtStart > 1 || activeAtEnd > 0;
 
-  if (!isCI) {
+  if (!emitTimingLogs) {
     return;
   }
 
diff --git a/internal-packages/testcontainers/src/minio.ts b/internal-packages/testcontainers/src/minio.ts
index 4f85149b7a4..f7ef2d1275e 100644
--- a/internal-packages/testcontainers/src/minio.ts
+++ b/internal-packages/testcontainers/src/minio.ts
@@ -68,11 +68,9 @@ export class MinIOContainer extends GenericContainer {
       { throwOnError: true }
     );
 
-    await x(
-      "docker",
-      ["exec", startedContainer.getId(), "mc", "mb", "local/packets"],
-      { throwOnError: true }
-    );
+    await x("docker", ["exec", startedContainer.getId(), "mc", "mb", "local/packets"], {
+      throwOnError: true,
+    });
 
     return new StartedMinIOContainer(
       startedContainer,
@@ -120,6 +118,23 @@ export class StartedMinIOContainer extends AbstractStartedContainer {
     return `${protocol}://${host}:${port}`;
   }
 
+  /**
+   * Empties the bucket between tests on a reused container (the "local" mc alias and the bucket are
+   * created at boot). Recreates the bucket so each test starts from the same empty state.
+   */
+  public async resetBucket(bucket = "packets"): Promise<void> {
+    await x(
+      "docker",
+      ["exec", this.getId(), "mc", "rm", "--recursive", "--force", `local/${bucket}`],
+      {
+        throwOnError: false,
+      }
+    );
+    await x("docker", ["exec", this.getId(), "mc", "mb", "--ignore-existing", `local/${bucket}`], {
+      throwOnError: true,
+    });
+  }
+
   /**
    * Gets connection configuration suitable for object storage clients.
    */
diff --git a/internal-packages/testcontainers/src/sequencer.cjs b/internal-packages/testcontainers/src/sequencer.cjs
new file mode 100644
index 00000000000..14084e6e84d
--- /dev/null
+++ b/internal-packages/testcontainers/src/sequencer.cjs
@@ -0,0 +1,129 @@
+// Authored as plain CommonJS (NOT .ts) on purpose. vitest loads each package's vitest.config.ts by
+// bundling it, and it EXTERNALIZES this workspace subpath - node then loads this file verbatim. A .ts
+// here reaches node as raw TypeScript and crashes config loading on CI's pinned node 20 (no type
+// stripping: `SyntaxError`). Keeping it dependency-free JS - and importing nothing from the ESM-only
+// `vitest/node` - makes it loadable on every node. Types for consumers live in sequencer.d.cts.
+
+const { existsSync, readFileSync } = require("node:fs");
+const path = require("node:path");
+
+// Walk up from the package dir (cwd at config-load time) to the monorepo root (pnpm-workspace.yaml).
+function findRepoRoot(start) {
+  let dir = start;
+  for (let i = 0; i < 20; i++) {
+    if (existsSync(path.join(dir, "pnpm-workspace.yaml"))) return dir;
+    const parent = path.dirname(dir);
+    if (parent === dir) break;
+    dir = parent;
+  }
+  return start;
+}
+
+// test-timings.json lives at the monorepo root: { "<repo-relative path>": <ms> }
+const REPO_ROOT = findRepoRoot(process.cwd());
+const TIMINGS_PATH = path.resolve(REPO_ROOT, "test-timings.json");
+
+let cachedTimings;
+
+function loadTimings() {
+  if (!cachedTimings) {
+    // A MISSING file is a legitimate state (no timings configured yet => count-based split). But a
+    // file that EXISTS and won't parse is a real problem with a committed artifact we control - fail
+    // loud rather than silently degrading sharding (silent fallbacks are what hid earlier bugs).
+    if (!existsSync(TIMINGS_PATH)) {
+      cachedTimings = {};
+      return cachedTimings;
+    }
+    try {
+      cachedTimings = JSON.parse(readFileSync(TIMINGS_PATH, "utf-8"));
+    } catch (error) {
+      throw new Error(`Failed to parse ${TIMINGS_PATH}: ${error?.message ?? error}`);
+    }
+  }
+  return cachedTimings;
+}
+
+function median(nums) {
+  if (nums.length === 0) return 1;
+  const sorted = [...nums].sort((a, b) => a - b);
+  const mid = Math.floor(sorted.length / 2);
+  return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
+}
+
+// Stable per-package offset (derived from the package dir) so each package's heaviest file - which
+// LPT always drops into bin 0 - maps to a DIFFERENT shard. Without it, a serial multi-package job
+// (`turbo --concurrency=1 --filter "@internal/*"`) stacks every package's heaviest file into shard 1.
+// It's a rotation of the bin->shard mapping, so coverage stays exact (each file runs once).
+function packageOffset(specs, count) {
+  if (specs.length === 0) return 0;
+  const rel = path.relative(REPO_ROOT, specs[0].moduleId);
+  const key = rel.split(path.sep).slice(0, 2).join("/");
+  // FNV-1a - spreads similar sibling package names (e.g. internal-packages/*) far better than a
+  // simple polynomial hash mod count, which collided run-engine + schedule-engine onto one shard.
+  let h = 2166136261;
+  for (let i = 0; i < key.length; i++) {
+    h ^= key.charCodeAt(i);
+    h = Math.imul(h, 16777619);
+  }
+  return (h >>> 0) % count;
+}
+
+/**
+ * Duration-weighted interpretation of `--shard=i/N`. Instead of vitest's default file-count split,
+ * this greedily bin-packs test files by recorded duration (test-timings.json at the repo root;
+ * unknown/new files get the median) so each shard does roughly equal work.
+ *
+ * The packing is fully deterministic (sort by duration desc, then moduleId) so every shard computes
+ * the identical bins and just takes its own - no file runs twice or gets dropped. Falls back to the
+ * full set when no shard is configured, and to ~count-based when no timings exist.
+ *
+ * Implemented as a standalone TestSequencer (not extending BaseSequencer) so this file never imports
+ * `vitest/node` - see the header note.
+ */
+class DurationShardingSequencer {
+  constructor(ctx) {
+    this.ctx = ctx;
+  }
+
+  // Deterministic order (heaviest first, then moduleId) - stable across shards and a sensible
+  // in-shard run order, replacing BaseSequencer's default sort we no longer inherit.
+  async sort(files) {
+    const timings = loadTimings();
+    const fallback = median(Object.values(timings));
+    return [...files].sort((a, b) => {
+      const am = timings[path.relative(REPO_ROOT, a.moduleId)] ?? fallback;
+      const bm = timings[path.relative(REPO_ROOT, b.moduleId)] ?? fallback;
+      return bm - am || a.moduleId.localeCompare(b.moduleId);
+    });
+  }
+
+  async shard(specs) {
+    const shard = this.ctx.config.shard;
+    if (!shard || specs.length === 0) {
+      return specs;
+    }
+
+    const timings = loadTimings();
+    const fallback = median(Object.values(timings));
+
+    const weighted = specs
+      .map((spec) => ({
+        spec,
+        ms: timings[path.relative(REPO_ROOT, spec.moduleId)] ?? fallback,
+      }))
+      .sort((a, b) => b.ms - a.ms || a.spec.moduleId.localeCompare(b.spec.moduleId));
+
+    const bins = Array.from({ length: shard.count }, () => ({ total: 0, specs: [] }));
+
+    for (const { spec, ms } of weighted) {
+      const lightest = bins.reduce((min, bin) => (bin.total < min.total ? bin : min));
+      lightest.total += ms;
+      lightest.specs.push(spec);
+    }
+
+    const offset = packageOffset(specs, shard.count);
+    return bins[(shard.index - 1 + offset) % shard.count].specs;
+  }
+}
+
+module.exports = { DurationShardingSequencer };
diff --git a/internal-packages/testcontainers/src/sequencer.d.cts b/internal-packages/testcontainers/src/sequencer.d.cts
new file mode 100644
index 00000000000..2fbecc89ccd
--- /dev/null
+++ b/internal-packages/testcontainers/src/sequencer.d.cts
@@ -0,0 +1,13 @@
+import type { TestSequencer, TestSpecification, Vitest } from "vitest/node";
+
+/**
+ * Duration-weighted `--shard=i/N`: bin-packs test files by recorded duration (test-timings.json at
+ * the repo root) so each shard does roughly equal work. The runtime lives in `sequencer.cjs` (plain
+ * JS, so vitest config loading can load it on any node - see that file's header); this declaration
+ * supplies the types for configs that wire it via `sequence: { sequencer: DurationShardingSequencer }`.
+ */
+export declare class DurationShardingSequencer implements TestSequencer {
+  constructor(ctx: Vitest);
+  sort(files: TestSpecification[]): Promise<TestSpecification[]>;
+  shard(files: TestSpecification[]): Promise<TestSpecification[]>;
+}
diff --git a/internal-packages/testcontainers/src/utils.ts b/internal-packages/testcontainers/src/utils.ts
index 5f689bc5bfe..4183e85b40b 100644
--- a/internal-packages/testcontainers/src/utils.ts
+++ b/internal-packages/testcontainers/src/utils.ts
@@ -12,17 +12,20 @@ import { ClickHouseContainer, runClickhouseMigrations } from "./clickhouse";
 import { MinIOContainer } from "./minio";
 import { getContainerMetadata, getTaskMetadata, logCleanup, logSetup } from "./logs";
 
-export async function createPostgresContainer(network: StartedNetwork) {
-  const container = await new PostgreSqlContainer("docker.io/postgres:14")
-    .withNetwork(network)
-    .withNetworkAliases("database")
-    .withCommand(["-c", "listen_addresses=*", "-c", "wal_level=logical"])
-    .start();
+/** Returns the container's connection URI with the database path swapped to `database`. */
+export function postgresUriWithDatabase(uri: string, database: string): string {
+  const url = new URL(uri);
+  url.pathname = `/${database}`;
+  return url.toString();
+}
 
-  // Run migrations
+/** Pushes the Prisma schema into the database at `databaseUrl` (which must already exist). */
+export async function pushDatabaseSchema(databaseUrl: string) {
   const databasePath = path.resolve(__dirname, "../../database");
 
-  await x(
+  // throwOnError is essential: without it tinyexec swallows a non-zero `prisma db push`, so a failed
+  // push looks like success and only surfaces much later as a confusing downstream error.
+  const result = await x(
     `${databasePath}/node_modules/.bin/prisma`,
     [
       "db",
@@ -34,21 +37,65 @@ export async function createPostgresContainer(network: StartedNetwork) {
       `${databasePath}/prisma/schema.prisma`,
     ],
     {
+      throwOnError: true,
       nodeOptions: {
         env: {
           ...process.env,
-          DATABASE_URL: container.getConnectionUri(),
-          DIRECT_URL: container.getConnectionUri(),
+          DATABASE_URL: databaseUrl,
+          DIRECT_URL: databaseUrl,
         },
       },
     }
   );
 
+  return result;
+}
+
+/**
+ * Caps each container's CPU/memory to approximate the 2-core CI runner locally (for timing + flake
+ * reproduction). Set TESTCONTAINERS_CPU (cores per container, e.g. "2") and/or
+ * TESTCONTAINERS_MEMORY_GB (GB per container). Pair with running the runner under `taskset -c 0,1`.
+ * No-op when neither is set. (testcontainers v11 has no cpuset pinning, only this quota cap.)
+ */
+export function withCiResourceLimits<T extends GenericContainer>(container: T): T {
+  const cpu = parsePositiveNumberEnv("TESTCONTAINERS_CPU");
+  const memory = parsePositiveNumberEnv("TESTCONTAINERS_MEMORY_GB");
+  if (cpu === undefined && memory === undefined) {
+    return container;
+  }
+  return container.withResourcesQuota({
+    ...(cpu !== undefined ? { cpu } : {}),
+    ...(memory !== undefined ? { memory } : {}),
+  });
+}
+
+// Fail fast on a malformed value rather than letting NaN reach the container runtime as a cryptic error.
+function parsePositiveNumberEnv(name: string): number | undefined {
+  const raw = process.env[name];
+  if (!raw) return undefined;
+  const value = Number(raw);
+  if (!Number.isFinite(value) || value <= 0) {
+    throw new Error(`${name} must be a positive number, got "${raw}"`);
+  }
+  return value;
+}
+
+export async function createPostgresContainer(network: StartedNetwork) {
+  const container = await withCiResourceLimits(new PostgreSqlContainer("docker.io/postgres:14"))
+    .withNetwork(network)
+    .withNetworkAliases("database")
+    .withCommand(["-c", "listen_addresses=*", "-c", "wal_level=logical"])
+    .start();
+
+  await pushDatabaseSchema(container.getConnectionUri());
+
   return { url: container.getConnectionUri(), container, network };
 }
 
 export async function createClickHouseContainer(network: StartedNetwork) {
-  const container = await new ClickHouseContainer().withNetwork(network).start();
+  const container = await withCiResourceLimits(new ClickHouseContainer())
+    .withNetwork(network)
+    .start();
 
   const client = createClient({
     url: container.getConnectionUrl(),
@@ -75,7 +122,7 @@ export async function createRedisContainer({
   port?: number;
   network?: StartedNetwork;
 }) {
-  let container = new RedisContainer("redis:7.2")
+  let container = withCiResourceLimits(new RedisContainer("redis:7.2"))
     .withExposedPorts(port ?? 6379)
     .withStartupTimeout(120_000); // 2 minutes
 
@@ -83,16 +130,11 @@ export async function createRedisContainer({
     container = container.withNetwork(network).withNetworkAliases("redis");
   }
 
+  // Wait only on the readiness log (RedisContainer's default) - the previous Docker healthcheck added
+  // a full poll-cycle of latency per boot, which dominates per-test redis. verifyRedisConnection
+  // below still confirms the container actually accepts connections before we hand it to the test.
   const startedContainer = await container
-    .withHealthCheck({
-      test: ["CMD", "redis-cli", "ping"],
-      interval: 1000,
-      timeout: 3000,
-      retries: 5,
-    })
-    .withWaitStrategy(
-      Wait.forAll([Wait.forHealthCheck(), Wait.forLogMessage("Ready to accept connections")])
-    )
+    .withWaitStrategy(Wait.forLogMessage("Ready to accept connections"))
     .start();
 
   // Add a verification step
@@ -156,8 +198,10 @@ export async function createElectricContainer(
     network.getName()
   )}:5432/${postgresContainer.getDatabase()}?sslmode=disable`;
 
-  const container = await new GenericContainer(
-    "electricsql/electric:1.2.4@sha256:20da3d0b0e74926c5623392db67fd56698b9e374c4aeb6cb5cadeb8fea171c36"
+  const container = await withCiResourceLimits(
+    new GenericContainer(
+      "electricsql/electric:1.2.4@sha256:20da3d0b0e74926c5623392db67fd56698b9e374c4aeb6cb5cadeb8fea171c36"
+    )
   )
     .withExposedPorts(3000)
     .withNetwork(network)
@@ -174,7 +218,7 @@ export async function createElectricContainer(
 }
 
 export async function createMinIOContainer(network: StartedNetwork) {
-  const container = await new MinIOContainer()
+  const container = await withCiResourceLimits(new MinIOContainer())
     .withNetwork(network)
     .withNetworkAliases("minio")
     .start();
@@ -250,8 +294,9 @@ export async function useContainer<TContainer extends StartedTestContainer>(
     const useDurationMs = Date.now() - start;
     metadata.useDurationMs = useDurationMs;
   } finally {
-    // WARNING: Testcontainers by default will not wait until the container has stopped. It will simply issue the stop command and return immediately.
-    // If you need to wait for the container to be stopped, you can provide a timeout. The unit of timeout option here is milliseconds (changed from seconds in testcontainers v11)
-    await logCleanup(name, container.stop({ timeout: 10_000 }), metadata);
+    // Containers are throwaway, so we force-kill (SIGKILL) instead of waiting for a graceful
+    // shutdown - ClickHouse alone spends ~5s/test gracefully stopping. timeout: 0 = immediate kill.
+    // We still await it (no pileup); logCleanup swallows any teardown-time connection errors.
+    await logCleanup(name, container.stop({ timeout: 0 }), metadata);
   }
 }
diff --git a/packages/redis-worker/src/worker.test.ts b/packages/redis-worker/src/worker.test.ts
index e4b6fd3e858..bd6c70b9676 100644
--- a/packages/redis-worker/src/worker.test.ts
+++ b/packages/redis-worker/src/worker.test.ts
@@ -1,4 +1,4 @@
-import { redisTest } from "@internal/testcontainers";
+import { isolatedRedisTest as redisTest } from "@internal/testcontainers";
 import { Logger } from "@trigger.dev/core/logger";
 import { describe } from "node:test";
 import { expect } from "vitest";
diff --git a/packages/redis-worker/vitest.config.ts b/packages/redis-worker/vitest.config.ts
index 452a66037d2..b52b49a0dc4 100644
--- a/packages/redis-worker/vitest.config.ts
+++ b/packages/redis-worker/vitest.config.ts
@@ -1,7 +1,9 @@
 import { defineConfig } from "vitest/config";
+import { DurationShardingSequencer } from "@internal/testcontainers/sequencer";
 
 export default defineConfig({
   test: {
+    sequence: { sequencer: DurationShardingSequencer },
     include: ["**/*.test.ts"],
     globals: true,
     // CI-only: absorbs timing races (real-clock waits vs worker poll interval) under shard CPU contention
diff --git a/test-timings.json b/test-timings.json
new file mode 100644
index 00000000000..1c1504c029d
--- /dev/null
+++ b/test-timings.json
@@ -0,0 +1,222 @@
+{
+  "apps/webapp/test/EnvironmentVariablesPresenter.test.ts": 10249,
+  "apps/webapp/test/GCRARateLimiter.test.ts": 4984,
+  "apps/webapp/test/authorizationRateLimitMiddleware.test.ts": 1,
+  "apps/webapp/test/bufferedTriggerPayload.test.ts": 3,
+  "apps/webapp/test/calculateNextSchedule.test.ts": 345,
+  "apps/webapp/test/chat-snapshot-integration.test.ts": 2326,
+  "apps/webapp/test/clickhouseFactory.test.ts": 13885,
+  "apps/webapp/test/concurrentFlushScheduler.test.ts": 361,
+  "apps/webapp/test/createDeploymentWithNextVersion.test.ts": 17889,
+  "apps/webapp/test/detectbadJsonStrings.test.ts": 98,
+  "apps/webapp/test/environmentVariableDeduplication.test.ts": 3,
+  "apps/webapp/test/environmentVariableRules.test.ts": 3,
+  "apps/webapp/test/environmentVariablesEnvironments.test.ts": 10355,
+  "apps/webapp/test/environmentVariablesRepository.test.ts": 18320,
+  "apps/webapp/test/errorFingerprinting.test.ts": 16,
+  "apps/webapp/test/errorGroupWebhook.test.ts": 7,
+  "apps/webapp/test/fairDequeuingStrategy.test.ts": 5705,
+  "apps/webapp/test/findOrCreateBackgroundWorker.test.ts": 17442,
+  "apps/webapp/test/getDeploymentImageRef.test.ts": 7,
+  "apps/webapp/test/httpErrors.test.ts": 25,
+  "apps/webapp/test/marqsKeyProducer.test.ts": 7,
+  "apps/webapp/test/metadataRouteOperationsLogging.test.ts": 6,
+  "apps/webapp/test/mollifierApplyMetadataMutation.test.ts": 497,
+  "apps/webapp/test/mollifierClaimResolution.test.ts": 6,
+  "apps/webapp/test/mollifierDrainerHandler.test.ts": 21,
+  "apps/webapp/test/mollifierDrainerWorker.test.ts": 5,
+  "apps/webapp/test/mollifierDrainingGauge.test.ts": 449,
+  "apps/webapp/test/mollifierGate.test.ts": 16,
+  "apps/webapp/test/mollifierIdempotencyClaim.test.ts": 13,
+  "apps/webapp/test/mollifierMollify.test.ts": 5,
+  "apps/webapp/test/mollifierMutateWithFallback.test.ts": 18,
+  "apps/webapp/test/mollifierReadFallback.test.ts": 15,
+  "apps/webapp/test/mollifierReplayPayloadShape.test.ts": 2,
+  "apps/webapp/test/mollifierResetIdempotencyKey.test.ts": 9,
+  "apps/webapp/test/mollifierResolveRunForMutation.test.ts": 7,
+  "apps/webapp/test/mollifierStaleSweep.test.ts": 969,
+  "apps/webapp/test/mollifierSynthesiseFoundRun.test.ts": 5,
+  "apps/webapp/test/mollifierSyntheticApiResponses.test.ts": 5,
+  "apps/webapp/test/mollifierSyntheticRedirectInfo.test.ts": 805,
+  "apps/webapp/test/mollifierSyntheticReplayTaskRun.test.ts": 2,
+  "apps/webapp/test/mollifierSyntheticRunHeader.test.ts": 4,
+  "apps/webapp/test/mollifierSyntheticSpanRun.test.ts": 8,
+  "apps/webapp/test/mollifierSyntheticTrace.test.ts": 6,
+  "apps/webapp/test/mollifierTripEvaluator.test.ts": 621,
+  "apps/webapp/test/objectStore.test.ts": 15979,
+  "apps/webapp/test/organizationDataStoresRegistry.test.ts": 16732,
+  "apps/webapp/test/otlpExporter.test.ts": 13,
+  "apps/webapp/test/otlpUtf16Sanitization.integration.test.ts": 6540,
+  "apps/webapp/test/realtimeClient.test.ts": 1,
+  "apps/webapp/test/redisRealtimeStreams.test.ts": 6214,
+  "apps/webapp/test/registryConfig.test.ts": 652,
+  "apps/webapp/test/replay-after-crash.test.ts": 2233,
+  "apps/webapp/test/runsBackfiller.test.ts": 15478,
+  "apps/webapp/test/runsReplicationBenchmark.test.ts": 0,
+  "apps/webapp/test/runsRepository.part1.test.ts": 53000,
+  "apps/webapp/test/runsRepository.part2.test.ts": 57000,
+  "apps/webapp/test/sanitizeRowsOnParseError.test.ts": 8,
+  "apps/webapp/test/sentryTenantContext.test.ts": 5,
+  "apps/webapp/test/sentryTraceContext.server.test.ts": 12,
+  "apps/webapp/test/sessionDuration.test.ts": 18416,
+  "apps/webapp/test/sessionsReplicationService.test.ts": 30000,
+  "apps/webapp/test/shouldRevalidateRunsList.test.ts": 5,
+  "apps/webapp/test/slackErrorAlerts.test.ts": 0,
+  "apps/webapp/test/tenantContext.test.ts": 26,
+  "apps/webapp/test/tenantContextFromAuthEnvironment.test.ts": 2,
+  "apps/webapp/test/tenantContextResolver.test.ts": 19,
+  "apps/webapp/test/timeGranularity.test.ts": 3,
+  "apps/webapp/test/timelineSpanEvents.test.ts": 6,
+  "apps/webapp/test/updateMetadata.test.ts": 26380,
+  "apps/webapp/test/validateGitBranchName.test.ts": 7,
+  "apps/webapp/test/vercelUrls.test.ts": 3,
+  "apps/webapp/test/webhookErrorAlerts.test.ts": 5,
+  "apps/webapp/test/workerQueueSplit.test.ts": 3,
+  "apps/webapp/test/components/DateTime.test.ts": 24,
+  "apps/webapp/test/engine/batchPayloads.test.ts": 5018,
+  "apps/webapp/test/engine/streamBatchItems.test.ts": 45000,
+  "apps/webapp/test/engine/taskIdentifierRegistry.test.ts": 13152,
+  "apps/webapp/test/engine/triggerTask.test.ts": 31630,
+  "apps/webapp/test/presenters/mapRunToLiveFields.test.ts": 3,
+  "apps/webapp/test/services/organizationAccessToken.test.ts": 9,
+  "apps/webapp/test/services/personalAccessToken.test.ts": 8,
+  "apps/webapp/test/components/code/tsql/tsqlCompletion.test.ts": 10,
+  "apps/webapp/test/components/code/tsql/tsqlLinter.test.ts": 237,
+  "apps/webapp/test/components/runs/v3/RunTag.test.ts": 5,
+  "packages/trigger-sdk/test/chat-snapshot.test.ts": 22,
+  "packages/trigger-sdk/test/chatHandover.test.ts": 1658,
+  "packages/trigger-sdk/test/merge-by-id.test.ts": 9,
+  "packages/trigger-sdk/test/mockChatAgent.test.ts": 2254,
+  "packages/trigger-sdk/test/recovery-boot.test.ts": 671,
+  "packages/trigger-sdk/test/replay-session-in.test.ts": 12,
+  "packages/trigger-sdk/test/replay-session-out.test.ts": 40,
+  "packages/trigger-sdk/test/skill.test.ts": 16,
+  "packages/trigger-sdk/test/skillsRuntime.test.ts": 131,
+  "packages/trigger-sdk/test/wire-shape.test.ts": 15,
+  "packages/trigger-sdk/src/v3/chat-server.test.ts": 185,
+  "packages/trigger-sdk/src/v3/chat-tab-coordinator.test.ts": 14,
+  "packages/trigger-sdk/src/v3/chat.test.ts": 77,
+  "packages/trigger-sdk/src/v3/createStartSessionAction.test.ts": 5,
+  "packages/trigger-sdk/src/v3/sessions.test.ts": 31,
+  "packages/trigger-sdk/src/v3/shared.test.ts": 68,
+  "packages/trigger-sdk/src/v3/streams.test.ts": 6,
+  "packages/trigger-sdk/src/v3/triggerClient.test.ts": 66,
+  "packages/trigger-sdk/src/v3/triggerClient.types.test.ts": 11,
+  "packages/redis-worker/src/cron.test.ts": 27371,
+  "packages/redis-worker/src/queue.test.ts": 3435,
+  "packages/redis-worker/src/worker.test.ts": 32870,
+  "packages/redis-worker/src/mollifier/buffer.test.ts": 3091,
+  "packages/redis-worker/src/mollifier/drainer.test.ts": 8403,
+  "packages/redis-worker/src/fair-queue/tests/concurrency.test.ts": 1341,
+  "packages/redis-worker/src/fair-queue/tests/drr.test.ts": 1203,
+  "packages/redis-worker/src/fair-queue/tests/fairQueue.test.ts": 7728,
+  "packages/redis-worker/src/fair-queue/tests/raceConditions.test.ts": 14961,
+  "packages/redis-worker/src/fair-queue/tests/retry.test.ts": 10,
+  "packages/redis-worker/src/fair-queue/tests/tenantDispatch.test.ts": 5769,
+  "packages/redis-worker/src/fair-queue/tests/visibility.test.ts": 3783,
+  "packages/redis-worker/src/fair-queue/tests/workerQueue.test.ts": 1116,
+  "packages/core/test/duration.test.ts": 42,
+  "packages/core/test/errors.test.ts": 51,
+  "packages/core/test/eventFilterMatches.test.ts": 38,
+  "packages/core/test/externalSpanExporterWrapper.test.ts": 13,
+  "packages/core/test/flattenAttributes.test.ts": 59,
+  "packages/core/test/ioSerialization.test.ts": 306,
+  "packages/core/test/jumpHash.test.ts": 385,
+  "packages/core/test/mockTaskContext.test.ts": 61,
+  "packages/core/test/recordSpanException.test.ts": 65,
+  "packages/core/test/resourceCatalog.test.ts": 71,
+  "packages/core/test/runStream.test.ts": 245,
+  "packages/core/test/skillCatalog.test.ts": 17,
+  "packages/core/test/standardMetadataManager.test.ts": 456,
+  "packages/core/test/streamsWriterV1.test.ts": 84112,
+  "packages/core/test/taskExecutor.test.ts": 364,
+  "packages/core/test/utils.test.ts": 15,
+  "packages/core/src/v3/apiClient/runStream.test.ts": 1893,
+  "packages/core/src/v3/apiClient/streamBatchItems.test.ts": 278,
+  "packages/core/src/v3/build/flags.test.ts": 12,
+  "packages/core/src/v3/idempotency-key-catalog/lruIdempotencyKeyCatalog.test.ts": 21,
+  "packages/core/src/v3/machines/max-old-space.test.ts": 18,
+  "packages/core/src/v3/realtimeStreams/manager.test.ts": 28,
+  "packages/core/src/v3/realtimeStreams/streamsWriterV2.test.ts": 91,
+  "packages/core/src/v3/schemas/api-type.test.ts": 33,
+  "packages/core/src/v3/schemas/batchItemNDJSON.test.ts": 15,
+  "packages/core/src/v3/schemas/idempotencyKey.test.ts": 68,
+  "packages/core/src/v3/sessionStreams/manager.test.ts": 126,
+  "packages/core/src/v3/serverOnly/shutdownManager.test.ts": 57,
+  "packages/core/src/v3/taskContext/index.test.ts": 23,
+  "packages/core/src/v3/utils/reconnectBackoff.test.ts": 45,
+  "packages/core/src/v3/runEngineWorker/supervisor/consumerPool.test.ts": 123,
+  "packages/core/src/v3/runEngineWorker/supervisor/queueMetricsProcessor.test.ts": 98,
+  "packages/core/src/v3/runEngineWorker/supervisor/scalingStrategies.test.ts": 92,
+  "packages/schema-to-json/tests/index.test.ts": 17,
+  "internal-packages/run-engine/src/run-queue/index.test.ts": 82296,
+  "internal-packages/run-engine/src/batch-queue/tests/index.test.ts": 5462,
+  "internal-packages/run-engine/src/engine/tests/attemptFailures.test.ts": 35471,
+  "internal-packages/run-engine/src/engine/tests/batchTrigger.test.ts": 33127,
+  "internal-packages/run-engine/src/engine/tests/batchTriggerAndWait.test.ts": 33681,
+  "internal-packages/run-engine/src/engine/tests/batchTwoPhase.test.ts": 28159,
+  "internal-packages/run-engine/src/engine/tests/cancelling.test.ts": 26240,
+  "internal-packages/run-engine/src/engine/tests/checkpoints.test.ts": 29420,
+  "internal-packages/run-engine/src/engine/tests/createCancelledRun.test.ts": 31807,
+  "internal-packages/run-engine/src/engine/tests/createFailedTaskRun.test.ts": 23573,
+  "internal-packages/run-engine/src/engine/tests/debounce.test.ts": 58554,
+  "internal-packages/run-engine/src/engine/tests/delays.test.ts": 42748,
+  "internal-packages/run-engine/src/engine/tests/dequeuing.test.ts": 25542,
+  "internal-packages/run-engine/src/engine/tests/getSnapshotsSince.test.ts": 32158,
+  "internal-packages/run-engine/src/engine/tests/heartbeats.test.ts": 39634,
+  "internal-packages/run-engine/src/engine/tests/lazyWaitpoint.test.ts": 34757,
+  "internal-packages/run-engine/src/engine/tests/locking.test.ts": 51090,
+  "internal-packages/run-engine/src/engine/tests/pendingVersion.test.ts": 28762,
+  "internal-packages/run-engine/src/engine/tests/priority.test.ts": 28808,
+  "internal-packages/run-engine/src/engine/tests/trigger.test.ts": 30601,
+  "internal-packages/run-engine/src/engine/tests/triggerAndWait.test.ts": 28893,
+  "internal-packages/run-engine/src/engine/tests/ttl.test.ts": 61981,
+  "internal-packages/run-engine/src/engine/tests/waitpointRace.test.ts": 22855,
+  "internal-packages/run-engine/src/engine/tests/waitpoints.test.ts": 39521,
+  "internal-packages/run-engine/src/run-queue/tests/ack.test.ts": 3156,
+  "internal-packages/run-engine/src/run-queue/tests/ckCounters.test.ts": 16715,
+  "internal-packages/run-engine/src/run-queue/tests/ckIndex.test.ts": 8916,
+  "internal-packages/run-engine/src/run-queue/tests/concurrencySweeper.test.ts": 6379,
+  "internal-packages/run-engine/src/run-queue/tests/dequeueMessageFromWorkerQueue.test.ts": 66701,
+  "internal-packages/run-engine/src/run-queue/tests/enqueueMessage.test.ts": 9108,
+  "internal-packages/run-engine/src/run-queue/tests/fairQueueSelectionStrategy.test.ts": 7997,
+  "internal-packages/run-engine/src/run-queue/tests/keyProducer.test.ts": 19,
+  "internal-packages/run-engine/src/run-queue/tests/migrateLegacyMasterQueue.test.ts": 1728,
+  "internal-packages/run-engine/src/run-queue/tests/nack.test.ts": 6184,
+  "internal-packages/run-engine/src/run-queue/tests/releaseConcurrency.test.ts": 3018,
+  "internal-packages/run-engine/src/run-queue/tests/workerQueueResolver.test.ts": 38,
+  "internal-packages/cache/src/stores/lruMemory.test.ts": 44,
+  "internal-packages/schedule-engine/test/scheduleEngine.test.ts": 43000,
+  "internal-packages/schedule-engine/test/scheduleRecovery.test.ts": 17396,
+  "internal-packages/replication/src/client.test.ts": 31306,
+  "internal-packages/tsql/src/index.test.ts": 246,
+  "internal-packages/tsql/src/grammar/parser.test.ts": 150,
+  "internal-packages/tsql/src/query/escape.test.ts": 9,
+  "internal-packages/tsql/src/query/parser.test.ts": 368,
+  "internal-packages/tsql/src/query/printer.test.ts": 942,
+  "internal-packages/tsql/src/query/results.test.ts": 4,
+  "internal-packages/tsql/src/query/schema.test.ts": 18,
+  "internal-packages/tsql/src/query/security.test.ts": 487,
+  "internal-packages/tsql/src/query/time_buckets.test.ts": 5,
+  "internal-packages/tsql/src/query/validator.test.ts": 250,
+  "internal-packages/rbac/src/ability.test.ts": 6,
+  "internal-packages/rbac/src/loader.test.ts": 3,
+  "internal-packages/llm-model-catalog/src/registry.test.ts": 15,
+  "internal-packages/llm-model-catalog/src/sync.test.ts": 15852,
+  "internal-packages/clickhouse/src/taskRuns.test.ts": 6813,
+  "internal-packages/clickhouse/src/tsql.test.ts": 9021,
+  "internal-packages/clickhouse/src/tsqlFunctions.test.ts": 12971,
+  "internal-packages/clickhouse/src/client/client.test.ts": 9138,
+  "internal-packages/sdk-compat-tests/src/tests/bundler.test.ts": 348,
+  "internal-packages/sdk-compat-tests/src/tests/import.test.ts": 4742,
+  "apps/webapp/test/runsReplicationService.part1.test.ts": 74000,
+  "apps/webapp/test/runsReplicationService.part2.test.ts": 64000,
+  "apps/webapp/test/runsReplicationService.part3.test.ts": 30000,
+  "apps/webapp/test/runsReplicationService.part4.test.ts": 70000,
+  "apps/webapp/test/runsReplicationService.part5.test.ts": 43000,
+  "apps/webapp/test/runsReplicationService.part6.test.ts": 32000,
+  "apps/webapp/test/runsRepository.part3.test.ts": 43000,
+  "apps/webapp/test/runsRepository.part4.test.ts": 57000,
+  "apps/webapp/test/runsReplicationService.part7.test.ts": 43000,
+  "internal-packages/schedule-engine/test/scheduleEngine2.test.ts": 43000
+}