diff --git a/.github/workflows/unit-tests-internal.yml b/.github/workflows/unit-tests-internal.yml index 6f2b32f620f..e2aae11b846 100644 --- a/.github/workflows/unit-tests-internal.yml +++ b/.github/workflows/unit-tests-internal.yml @@ -19,8 +19,8 @@ jobs: # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard fail-fast: false matrix: - shardIndex: [1, 2, 3, 4, 5, 6, 7, 8] - shardTotal: [8] + shardIndex: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + shardTotal: [12] env: DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} SHARD_INDEX: ${{ matrix.shardIndex }} @@ -83,12 +83,22 @@ jobs: - name: 🐳 Pre-pull testcontainer images if: ${{ env.DOCKERHUB_USERNAME }} run: | + # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake. + pull() { + for attempt in 1 2 3; do + docker pull "$1" && return 0 + echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s" + sleep 10 + done + echo "::error::docker pull $1 failed after 3 attempts" + return 1 + } echo "Pre-pulling Docker images with authenticated session..." - docker pull postgres:14 - docker pull clickhouse/clickhouse-server:25.4-alpine - docker pull redis:7.2 - docker pull testcontainers/ryuk:0.14.0 - docker pull electricsql/electric:1.2.4 + pull postgres:14 + pull clickhouse/clickhouse-server:25.4-alpine + pull redis:7.2 + pull testcontainers/ryuk:0.14.0 + pull electricsql/electric:1.2.4 echo "Image pre-pull complete" - name: 📥 Download deps diff --git a/.github/workflows/unit-tests-packages.yml b/.github/workflows/unit-tests-packages.yml index 5251a993313..6642f2443c4 100644 --- a/.github/workflows/unit-tests-packages.yml +++ b/.github/workflows/unit-tests-packages.yml @@ -16,9 +16,11 @@ jobs: name: "🧪 Unit Tests: Packages" runs-on: ubuntu-latest strategy: + # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard + fail-fast: false matrix: - shardIndex: [1] - shardTotal: [1] + shardIndex: [1, 2, 3] + shardTotal: [3] env: DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} SHARD_INDEX: ${{ matrix.shardIndex }} @@ -81,12 +83,22 @@ jobs: - name: 🐳 Pre-pull testcontainer images if: ${{ env.DOCKERHUB_USERNAME }} run: | + # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake. + pull() { + for attempt in 1 2 3; do + docker pull "$1" && return 0 + echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s" + sleep 10 + done + echo "::error::docker pull $1 failed after 3 attempts" + return 1 + } echo "Pre-pulling Docker images with authenticated session..." - docker pull postgres:14 - docker pull clickhouse/clickhouse-server:25.4-alpine - docker pull redis:7.2 - docker pull testcontainers/ryuk:0.14.0 - docker pull electricsql/electric:1.2.4 + pull postgres:14 + pull clickhouse/clickhouse-server:25.4-alpine + pull redis:7.2 + pull testcontainers/ryuk:0.14.0 + pull electricsql/electric:1.2.4 echo "Image pre-pull complete" - name: 📥 Download deps diff --git a/.github/workflows/unit-tests-webapp.yml b/.github/workflows/unit-tests-webapp.yml index 3517afbba49..dc1cc978f35 100644 --- a/.github/workflows/unit-tests-webapp.yml +++ b/.github/workflows/unit-tests-webapp.yml @@ -19,8 +19,8 @@ jobs: # one flaky shard shouldn't cancel its siblings - lets us re-run only the failed shard fail-fast: false matrix: - shardIndex: [1, 2, 3, 4, 5, 6, 7, 8] - shardTotal: [8] + shardIndex: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + shardTotal: [10] env: DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} SHARD_INDEX: ${{ matrix.shardIndex }} @@ -83,13 +83,23 @@ jobs: - name: 🐳 Pre-pull testcontainer images if: ${{ env.DOCKERHUB_USERNAME }} run: | + # Retry each pull - DockerHub registry timeouts are a recurring transient CI flake. + pull() { + for attempt in 1 2 3; do + docker pull "$1" && return 0 + echo "::warning::docker pull $1 failed (attempt ${attempt}/3); retrying in 10s" + sleep 10 + done + echo "::error::docker pull $1 failed after 3 attempts" + return 1 + } echo "Pre-pulling Docker images with authenticated session..." - docker pull postgres:14 - docker pull clickhouse/clickhouse-server:25.4-alpine - docker pull redis:7.2 - docker pull testcontainers/ryuk:0.14.0 - docker pull electricsql/electric:1.2.4 - docker pull minio/minio:latest + pull postgres:14 + pull clickhouse/clickhouse-server:25.4-alpine + pull redis:7.2 + pull testcontainers/ryuk:0.14.0 + pull electricsql/electric:1.2.4 + pull minio/minio:latest echo "Image pre-pull complete" - name: 📥 Download deps diff --git a/.gitignore b/.gitignore index d071d5ae4e3..d5f0c945ad1 100644 --- a/.gitignore +++ b/.gitignore @@ -72,4 +72,6 @@ apps/**/public/build .mcp.log .mcp.json .cursor/debug.log -ailogger-output.log \ No newline at end of file +ailogger-output.log +# per-package vitest timing capture (transient; merged into root test-timings.json) +.vitest-timing.json diff --git a/apps/webapp/test/engine/streamBatchItems.test.ts b/apps/webapp/test/engine/streamBatchItems.test.ts index f5348d71b98..f4c2f21f8de 100644 --- a/apps/webapp/test/engine/streamBatchItems.test.ts +++ b/apps/webapp/test/engine/streamBatchItems.test.ts @@ -16,7 +16,11 @@ vi.mock("~/services/platform.v3.server", async (importOriginal) => { import { RunEngine } from "@internal/run-engine"; import { setupAuthenticatedEnvironment } from "@internal/run-engine/tests"; -import { containerTest } from "@internal/testcontainers"; +// Per-test redis (isolated): each test spins up its own RunEngine and runs batch work, which leaves +// background activity on redis that outlives the test - sharing a worker redis across the 16 cases +// here caused cross-test interference and 30s seal-timeout flakes. Same carve-out as the run-engine +// batch tests. +import { containerTestWithIsolatedRedis as containerTest } from "@internal/testcontainers"; import { trace } from "@opentelemetry/api"; import { PrismaClient } from "@trigger.dev/database"; import { BatchId } from "@trigger.dev/core/v3/isomorphic"; @@ -1584,10 +1588,7 @@ describe("createNdjsonParserStream", () => { const parser = createNdjsonParserStream(1024); const results = await collectStream(stream.pipeThrough(parser)); - expect(results).toEqual([ - { payload: "line1\nline2\nline3" }, - { payload: "no newlines" }, - ]); + expect(results).toEqual([{ payload: "line1\nline2\nline3" }, { payload: "no newlines" }]); }); it("should skip empty lines", async () => { @@ -1888,7 +1889,9 @@ describe("extractIndexAndTask", () => { }); it("should not match nested keys", () => { - const bytes = encoder.encode('{"nested":{"index":999,"task":"inner"},"index":5,"task":"outer"}'); + const bytes = encoder.encode( + '{"nested":{"index":999,"task":"inner"},"index":5,"task":"outer"}' + ); const result = extractIndexAndTask(bytes); expect(result.index).toBe(5); expect(result.task).toBe("outer"); diff --git a/apps/webapp/test/runsBackfiller.test.ts b/apps/webapp/test/runsBackfiller.test.ts index 87bc3822d98..fbdb16a4a7b 100644 --- a/apps/webapp/test/runsBackfiller.test.ts +++ b/apps/webapp/test/runsBackfiller.test.ts @@ -7,7 +7,7 @@ vi.mock("~/db.server", () => ({ })); import { ClickHouse } from "@internal/clickhouse"; -import { containerTest } from "@internal/testcontainers"; +import { replicationContainerTest } from "@internal/testcontainers"; import { z } from "zod"; import { RunsBackfillerService } from "~/services/runsBackfiller.server"; import { RunsReplicationService } from "~/services/runsReplicationService.server"; @@ -17,7 +17,7 @@ import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickho vi.setConfig({ testTimeout: 60_000 }); describe("RunsBackfillerService", () => { - containerTest( + replicationContainerTest( "should backfill completed runs to clickhouse", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { const clickhouse = new ClickHouse({ diff --git a/apps/webapp/test/runsReplicationBenchmark.test.ts b/apps/webapp/test/runsReplicationBenchmark.test.ts index e17d6b41212..d1b80d06af0 100644 --- a/apps/webapp/test/runsReplicationBenchmark.test.ts +++ b/apps/webapp/test/runsReplicationBenchmark.test.ts @@ -1,5 +1,5 @@ import { ClickHouse } from "@internal/clickhouse"; -import { containerTest } from "@internal/testcontainers"; +import { replicationContainerTest } from "@internal/testcontainers"; import { fork, type ChildProcess } from "node:child_process"; import { performance, PerformanceObserver } from "node:perf_hooks"; import { setTimeout } from "node:timers/promises"; @@ -501,7 +501,7 @@ function compareBenchmarks(baseline: BenchmarkResult, comparison: BenchmarkResul } describe("RunsReplicationService Benchmark", () => { - containerTest.skipIf(process.env.BENCHMARKS_ENABLED !== "1")( + replicationContainerTest.skipIf(process.env.BENCHMARKS_ENABLED !== "1")( "should benchmark error fingerprinting performance impact", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { // Enable replica identity for TaskRun table diff --git a/apps/webapp/test/runsReplicationService.part1.test.ts b/apps/webapp/test/runsReplicationService.part1.test.ts index d2a3c1b7627..5a085944a61 100644 --- a/apps/webapp/test/runsReplicationService.part1.test.ts +++ b/apps/webapp/test/runsReplicationService.part1.test.ts @@ -1,5 +1,5 @@ import { ClickHouse } from "@internal/clickhouse"; -import { containerTest } from "@internal/testcontainers"; +import { replicationContainerTest } from "@internal/testcontainers"; import { setTimeout } from "node:timers/promises"; import { z } from "zod"; import { TaskRunStatus } from "~/database-types"; @@ -10,8 +10,8 @@ import superjson from "superjson"; vi.setConfig({ testTimeout: 60_000 }); -describe("RunsReplicationService (part 1/2)", () => { - containerTest( +describe("RunsReplicationService (part 1/7)", () => { + replicationContainerTest( "should replicate runs to clickhouse", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); @@ -135,7 +135,7 @@ describe("RunsReplicationService (part 1/2)", () => { } ); - containerTest( + replicationContainerTest( "should replicate runs with super json payloads to clickhouse", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); @@ -276,7 +276,7 @@ describe("RunsReplicationService (part 1/2)", () => { } ); - containerTest( + replicationContainerTest( "should not produce any flush spans when no TaskRun events are produced", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); @@ -348,7 +348,7 @@ describe("RunsReplicationService (part 1/2)", () => { } ); - containerTest( + replicationContainerTest( "should replicate a new TaskRun to ClickHouse using batching insert strategy", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); @@ -452,7 +452,7 @@ describe("RunsReplicationService (part 1/2)", () => { } ); - containerTest( + replicationContainerTest( "should insert the payload into ClickHouse when a TaskRun is created", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); @@ -553,7 +553,7 @@ describe("RunsReplicationService (part 1/2)", () => { } ); - containerTest( + replicationContainerTest( "should insert the payload even if it's very large into ClickHouse when a TaskRun is created", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); @@ -658,701 +658,4 @@ describe("RunsReplicationService (part 1/2)", () => { await runsReplicationService.stop(); } ); - - containerTest( - "should replicate updates to an existing TaskRun to ClickHouse", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); - - const clickhouse = new ClickHouse({ - url: clickhouseContainer.getConnectionUrl(), - name: "runs-replication-update", - logLevel: "warn", - }); - - const runsReplicationService = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-update", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 1, - flushIntervalMs: 100, - flushBatchSize: 1, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - await runsReplicationService.start(); - - const organization = await prisma.organization.create({ - data: { - title: "test-update", - slug: "test-update", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test-update", - slug: "test-update", - organizationId: organization.id, - externalRef: "test-update", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-update", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test-update", - pkApiKey: "test-update", - shortcode: "test-update", - }, - }); - - const uniqueFriendlyId = `run_update_${Date.now()}`; - const taskRun = await prisma.taskRun.create({ - data: { - friendlyId: uniqueFriendlyId, - taskIdentifier: "my-task-update", - payload: JSON.stringify({ foo: "update-test" }), - payloadType: "application/json", - traceId: "update-1234", - spanId: "update-1234", - queue: "test-update", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - }, - }); - - await setTimeout(1000); - - await prisma.taskRun.update({ - where: { id: taskRun.id }, - data: { status: TaskRunStatus.COMPLETED_SUCCESSFULLY }, - }); - - await setTimeout(1000); - - const queryRuns = clickhouse.reader.query({ - name: "runs-replication-update", - query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}", - schema: z.any(), - params: z.object({ run_id: z.string() }), - }); - - const [queryError, result] = await queryRuns({ run_id: taskRun.id }); - - expect(queryError).toBeNull(); - expect(result?.length).toBe(1); - expect(result?.[0]).toEqual( - expect.objectContaining({ - run_id: taskRun.id, - status: TaskRunStatus.COMPLETED_SUCCESSFULLY, - }) - ); - - await runsReplicationService.stop(); - } - ); - - containerTest( - "should replicate deletions of a TaskRun to ClickHouse and mark as deleted", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); - - const clickhouse = new ClickHouse({ - url: clickhouseContainer.getConnectionUrl(), - name: "runs-replication-delete", - logLevel: "warn", - }); - - const runsReplicationService = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-delete", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 1, - flushIntervalMs: 100, - flushBatchSize: 1, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - await runsReplicationService.start(); - - const organization = await prisma.organization.create({ - data: { - title: "test-delete", - slug: "test-delete", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test-delete", - slug: "test-delete", - organizationId: organization.id, - externalRef: "test-delete", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-delete", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test-delete", - pkApiKey: "test-delete", - shortcode: "test-delete", - }, - }); - - const uniqueFriendlyId = `run_delete_${Date.now()}`; - const taskRun = await prisma.taskRun.create({ - data: { - friendlyId: uniqueFriendlyId, - taskIdentifier: "my-task-delete", - payload: JSON.stringify({ foo: "delete-test" }), - payloadType: "application/json", - traceId: "delete-1234", - spanId: "delete-1234", - queue: "test-delete", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - }, - }); - - await setTimeout(1000); - - await prisma.taskRun.delete({ - where: { id: taskRun.id }, - }); - - await setTimeout(1000); - - const queryRuns = clickhouse.reader.query({ - name: "runs-replication-delete", - query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}", - schema: z.any(), - params: z.object({ run_id: z.string() }), - }); - - const [queryError, result] = await queryRuns({ run_id: taskRun.id }); - - expect(queryError).toBeNull(); - expect(result?.length).toBe(0); - - await runsReplicationService.stop(); - } - ); - - containerTest( - "should gracefully shutdown and allow a new service to pick up from the correct LSN (handover)", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); - - const clickhouse = new ClickHouse({ - url: clickhouseContainer.getConnectionUrl(), - name: "runs-replication-shutdown-handover", - logLevel: "warn", - }); - - // Service A - const runsReplicationServiceA = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-shutdown-handover", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 1, - flushIntervalMs: 100, - flushBatchSize: 1, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - await runsReplicationServiceA.start(); - - const organization = await prisma.organization.create({ - data: { - title: "test-shutdown-handover", - slug: "test-shutdown-handover", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test-shutdown-handover", - slug: "test-shutdown-handover", - organizationId: organization.id, - externalRef: "test-shutdown-handover", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-shutdown-handover", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test-shutdown-handover", - pkApiKey: "test-shutdown-handover", - shortcode: "test-shutdown-handover", - }, - }); - - const run1Id = `run_shutdown_handover_1_${Date.now()}`; - - runsReplicationServiceA.events.on("message", async ({ message, service }) => { - if (message.tag === "insert") { - await service.shutdown(); - } - }); - - const taskRun1 = await prisma.taskRun.create({ - data: { - friendlyId: run1Id, - taskIdentifier: "my-task-shutdown-handover-1", - payload: JSON.stringify({ foo: "handover-1" }), - payloadType: "application/json", - traceId: "handover-1-1234", - spanId: "handover-1-1234", - queue: "test-shutdown-handover", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - }, - }); - - const run2Id = `run_shutdown_handover_2_${Date.now()}`; - const taskRun2 = await prisma.taskRun.create({ - data: { - friendlyId: run2Id, - taskIdentifier: "my-task-shutdown-handover-2", - payload: JSON.stringify({ foo: "handover-2" }), - payloadType: "application/json", - traceId: "handover-2-1234", - spanId: "handover-2-1234", - queue: "test-shutdown-handover", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - }, - }); - - await setTimeout(1000); - - const queryRuns = clickhouse.reader.query({ - name: "runs-replication-shutdown-handover", - query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL ORDER BY created_at ASC", - schema: z.any(), - }); - const [queryError, result] = await queryRuns({}); - expect(queryError).toBeNull(); - expect(result?.length).toBe(1); - expect(result?.[0]).toEqual(expect.objectContaining({ run_id: taskRun1.id })); - - // Service B - const runsReplicationServiceB = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-shutdown-handover", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 1, - flushIntervalMs: 100, - flushBatchSize: 1, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - await runsReplicationServiceB.start(); - - await setTimeout(1000); - - const [queryErrorB, resultB] = await queryRuns({}); - - expect(queryErrorB).toBeNull(); - expect(resultB?.length).toBe(2); - expect(resultB).toEqual( - expect.arrayContaining([ - expect.objectContaining({ run_id: taskRun1.id }), - expect.objectContaining({ run_id: taskRun2.id }), - ]) - ); - - await runsReplicationServiceB.stop(); - } - ); - - containerTest( - "should not re-process already handled data if shutdown is called after all transactions are processed", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); - - const clickhouse = new ClickHouse({ - url: clickhouseContainer.getConnectionUrl(), - name: "runs-replication-shutdown-after-processed", - logLevel: "warn", - }); - - // Service A - const runsReplicationServiceA = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-shutdown-after-processed", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 1, - flushIntervalMs: 100, - flushBatchSize: 1, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - await runsReplicationServiceA.start(); - - const organization = await prisma.organization.create({ - data: { - title: "test-shutdown-after-processed", - slug: "test-shutdown-after-processed", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test-shutdown-after-processed", - slug: "test-shutdown-after-processed", - organizationId: organization.id, - externalRef: "test-shutdown-after-processed", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-shutdown-after-processed", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test-shutdown-after-processed", - pkApiKey: "test-shutdown-after-processed", - shortcode: "test-shutdown-after-processed", - }, - }); - - const run1Id = `run_shutdown_after_processed_${Date.now()}`; - const taskRun1 = await prisma.taskRun.create({ - data: { - friendlyId: run1Id, - taskIdentifier: "my-task-shutdown-after-processed", - payload: JSON.stringify({ foo: "after-processed" }), - payloadType: "application/json", - traceId: "after-processed-1234", - spanId: "after-processed-1234", - queue: "test-shutdown-after-processed", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - }, - }); - - await setTimeout(1000); - - const queryRuns = clickhouse.reader.query({ - name: "runs-replication-shutdown-after-processed", - query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}", - schema: z.any(), - params: z.object({ run_id: z.string() }), - }); - - const [queryErrorA, resultA] = await queryRuns({ run_id: taskRun1.id }); - expect(queryErrorA).toBeNull(); - expect(resultA?.length).toBe(1); - expect(resultA?.[0]).toEqual(expect.objectContaining({ run_id: taskRun1.id })); - - await runsReplicationServiceA.shutdown(); - - await setTimeout(500); - - const taskRun2 = await prisma.taskRun.create({ - data: { - friendlyId: `run_shutdown_after_processed_${Date.now()}`, - taskIdentifier: "my-task-shutdown-after-processed", - payload: JSON.stringify({ foo: "after-processed-2" }), - payloadType: "application/json", - traceId: "after-processed-2-1234", - spanId: "after-processed-2-1234", - queue: "test-shutdown-after-processed", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - }, - }); - - // Service B - const runsReplicationServiceB = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-shutdown-after-processed", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 1, - flushIntervalMs: 100, - flushBatchSize: 1, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - await runsReplicationServiceB.start(); - - await setTimeout(1000); - - const [queryErrorB, resultB] = await queryRuns({ run_id: taskRun2.id }); - expect(queryErrorB).toBeNull(); - expect(resultB?.length).toBe(1); - expect(resultB?.[0]).toEqual(expect.objectContaining({ run_id: taskRun2.id })); - - await runsReplicationServiceB.stop(); - } - ); - - containerTest( - "should record metrics with correct values when replicating runs", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); - - const clickhouse = new ClickHouse({ - url: clickhouseContainer.getConnectionUrl(), - name: "runs-replication-metrics", - logLevel: "warn", - }); - - const { tracer } = createInMemoryTracing(); - const metricsHelper = createInMemoryMetrics(); - - const runsReplicationService = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-metrics", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 2, - flushIntervalMs: 100, - flushBatchSize: 5, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - tracer, - meter: metricsHelper.meter, - logLevel: "warn", - }); - - await runsReplicationService.start(); - - const organization = await prisma.organization.create({ - data: { - title: "test-metrics", - slug: "test-metrics", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test-metrics", - slug: "test-metrics", - organizationId: organization.id, - externalRef: "test-metrics", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-metrics", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test-metrics", - pkApiKey: "test-metrics", - shortcode: "test-metrics", - }, - }); - - const now = Date.now(); - const createdRuns: string[] = []; - - for (let i = 0; i < 5; i++) { - const run = await prisma.taskRun.create({ - data: { - friendlyId: `run_metrics_${now}_${i}`, - taskIdentifier: "my-task-metrics", - payload: JSON.stringify({ index: i }), - payloadType: "application/json", - traceId: `metrics-${now}-${i}`, - spanId: `metrics-${now}-${i}`, - queue: "test-metrics", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - }, - }); - createdRuns.push(run.id); - } - - await setTimeout(1000); - - for (let i = 0; i < 3; i++) { - await prisma.taskRun.update({ - where: { id: createdRuns[i] }, - data: { status: "EXECUTING" }, - }); - } - - await setTimeout(1000); - - for (let i = 0; i < 2; i++) { - await prisma.taskRun.update({ - where: { id: createdRuns[i] }, - data: { - status: "COMPLETED_SUCCESSFULLY", - completedAt: new Date(), - output: JSON.stringify({ result: "success" }), - outputType: "application/json", - }, - }); - } - - await setTimeout(1000); - - const metrics = await metricsHelper.getMetrics(); - - function getMetricData(name: string) { - for (const resourceMetrics of metrics) { - for (const scopeMetrics of resourceMetrics.scopeMetrics) { - for (const metric of scopeMetrics.metrics) { - if (metric.descriptor.name === name) { - return metric; - } - } - } - } - return null; - } - - function sumCounterValues(metric: any): number { - if (!metric?.dataPoints) return 0; - return metric.dataPoints.reduce((sum: number, dp: any) => sum + (dp.value || 0), 0); - } - - function histogramHasData(metric: any): boolean { - if (!metric?.dataPoints || metric.dataPoints.length === 0) return false; - return metric.dataPoints.some((dp: any) => { - return ( - (typeof dp.count === "number" && dp.count > 0) || - (typeof dp.value?.count === "number" && dp.value.count > 0) || - (Array.isArray(dp.buckets?.counts) && dp.buckets.counts.some((c: number) => c > 0)) || - (typeof dp.sum === "number" && dp.sum > 0) || - typeof dp.min === "number" || - typeof dp.max === "number" - ); - }); - } - - function getCounterAttributeValues(metric: any, attributeName: string): unknown[] { - if (!metric?.dataPoints) return []; - return metric.dataPoints - .filter((dp: any) => dp.attributes?.[attributeName] !== undefined) - .map((dp: any) => dp.attributes[attributeName]); - } - - const batchesFlushed = getMetricData("runs_replication.batches_flushed"); - expect(batchesFlushed).not.toBeNull(); - const totalBatchesFlushed = sumCounterValues(batchesFlushed); - expect(totalBatchesFlushed).toBeGreaterThanOrEqual(1); - - const successAttributeValues = getCounterAttributeValues(batchesFlushed, "success"); - expect(successAttributeValues.length).toBeGreaterThanOrEqual(1); - - const taskRunsInserted = getMetricData("runs_replication.task_runs_inserted"); - expect(taskRunsInserted).not.toBeNull(); - const totalTaskRunsInserted = sumCounterValues(taskRunsInserted); - expect(totalTaskRunsInserted).toBeGreaterThanOrEqual(5); - - const payloadsInserted = getMetricData("runs_replication.payloads_inserted"); - expect(payloadsInserted).not.toBeNull(); - const totalPayloadsInserted = sumCounterValues(payloadsInserted); - expect(totalPayloadsInserted).toBeGreaterThanOrEqual(1); - - const eventsProcessed = getMetricData("runs_replication.events_processed"); - expect(eventsProcessed).not.toBeNull(); - const totalEventsProcessed = sumCounterValues(eventsProcessed); - expect(totalEventsProcessed).toBeGreaterThanOrEqual(1); - - const eventTypes = getCounterAttributeValues(eventsProcessed, "event_type"); - expect(eventTypes.length).toBeGreaterThanOrEqual(1); - expect(eventTypes).toContain("insert"); - - const batchSize = getMetricData("runs_replication.batch_size"); - expect(batchSize).not.toBeNull(); - expect(histogramHasData(batchSize)).toBe(true); - - const replicationLag = getMetricData("runs_replication.replication_lag_ms"); - expect(replicationLag).not.toBeNull(); - expect(histogramHasData(replicationLag)).toBe(true); - - const flushDuration = getMetricData("runs_replication.flush_duration_ms"); - expect(flushDuration).not.toBeNull(); - expect(histogramHasData(flushDuration)).toBe(true); - - await runsReplicationService.stop(); - await metricsHelper.shutdown(); - } - ); }); diff --git a/apps/webapp/test/runsReplicationService.part2.test.ts b/apps/webapp/test/runsReplicationService.part2.test.ts index bd7348186b3..90be5b18322 100644 --- a/apps/webapp/test/runsReplicationService.part2.test.ts +++ b/apps/webapp/test/runsReplicationService.part2.test.ts @@ -1,5 +1,5 @@ import { ClickHouse, getTaskRunField, getPayloadField } from "@internal/clickhouse"; -import { containerTest } from "@internal/testcontainers"; +import { replicationContainerTest } from "@internal/testcontainers"; import { Logger } from "@trigger.dev/core/logger"; import { readFile } from "node:fs/promises"; import { setTimeout } from "node:timers/promises"; @@ -10,8 +10,8 @@ import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickho vi.setConfig({ testTimeout: 60_000 }); -describe("RunsReplicationService (part 2/2)", () => { - containerTest( +describe("RunsReplicationService (part 2/7)", () => { + replicationContainerTest( "should handover leadership to a second service, and the second service should be able to extend the leader lock", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); @@ -141,7 +141,7 @@ describe("RunsReplicationService (part 2/2)", () => { } ); - containerTest( + replicationContainerTest( "should replicate all 1,000 TaskRuns inserted in bulk to ClickHouse", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); @@ -256,7 +256,7 @@ describe("RunsReplicationService (part 2/2)", () => { } ); - containerTest( + replicationContainerTest( "should replicate all 1,000 TaskRuns inserted in bulk to ClickHouse with updates", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); @@ -376,1062 +376,4 @@ describe("RunsReplicationService (part 2/2)", () => { await runsReplicationService.stop(); } ); - - containerTest( - "should replicate all events in a single transaction (insert, update)", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); - - const clickhouse = new ClickHouse({ - url: clickhouseContainer.getConnectionUrl(), - name: "runs-replication-multi-event-tx", - logLevel: "warn", - }); - - const runsReplicationService = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-multi-event-tx", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 1, - flushIntervalMs: 100, - flushBatchSize: 10, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - await runsReplicationService.start(); - - const organization = await prisma.organization.create({ - data: { - title: "test-multi-event-tx", - slug: "test-multi-event-tx", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test-multi-event-tx", - slug: "test-multi-event-tx", - organizationId: organization.id, - externalRef: "test-multi-event-tx", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-multi-event-tx", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test-multi-event-tx", - pkApiKey: "test-multi-event-tx", - shortcode: "test-multi-event-tx", - }, - }); - - // Start a transaction - const [run1, run2] = await prisma.$transaction(async (tx) => { - const run1 = await tx.taskRun.create({ - data: { - friendlyId: `run_multi_event_1_${Date.now()}`, - taskIdentifier: "my-task-multi-event-1", - payload: JSON.stringify({ multi: 1 }), - payloadType: "application/json", - traceId: `multi-1-${Date.now()}`, - spanId: `multi-1-${Date.now()}`, - queue: "test-multi-event-tx", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - attemptNumber: 1, - createdAt: new Date(), - updatedAt: new Date(), - }, - }); - const run2 = await tx.taskRun.create({ - data: { - friendlyId: `run_multi_event_2_${Date.now()}`, - taskIdentifier: "my-task-multi-event-2", - payload: JSON.stringify({ multi: 2 }), - payloadType: "application/json", - traceId: `multi-2-${Date.now()}`, - spanId: `multi-2-${Date.now()}`, - queue: "test-multi-event-tx", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - attemptNumber: 1, - createdAt: new Date(), - updatedAt: new Date(), - }, - }); - await tx.taskRun.update({ - where: { id: run1.id }, - data: { status: "COMPLETED_SUCCESSFULLY" }, - }); - - return [run1, run2]; - }); - - // Wait for replication - await setTimeout(1000); - - // Query ClickHouse for both runs using FINAL - const queryRuns = clickhouse.reader.query({ - name: "runs-replication-multi-event-tx", - query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id IN ({run_id_1:String}, {run_id_2:String})`, - schema: z.any(), - params: z.object({ run_id_1: z.string(), run_id_2: z.string() }), - }); - - const [queryError, result] = await queryRuns({ run_id_1: run1.id, run_id_2: run2.id }); - expect(queryError).toBeNull(); - expect(result?.length).toBe(2); - const run1Result = result?.find((r: any) => r.run_id === run1.id); - const run2Result = result?.find((r: any) => r.run_id === run2.id); - expect(run1Result).toBeDefined(); - expect(run1Result).toEqual( - expect.objectContaining({ run_id: run1.id, status: "COMPLETED_SUCCESSFULLY" }) - ); - expect(run2Result).toBeDefined(); - expect(run2Result).toEqual(expect.objectContaining({ run_id: run2.id })); - - await runsReplicationService.stop(); - } - ); - - containerTest( - "should be able to handle processing transactions for a long period of time", - { timeout: 60_000 * 5 }, - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); - - const clickhouse = new ClickHouse({ - url: clickhouseContainer.getConnectionUrl(), - name: "runs-replication-long-tx", - logLevel: "warn", - }); - - const runsReplicationService = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-long-tx", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 1, - flushIntervalMs: 100, - flushBatchSize: 10, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - await runsReplicationService.start(); - - const organization = await prisma.organization.create({ - data: { - title: "test-long-tx", - slug: "test-long-tx", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test-long-tx", - slug: "test-long-tx", - organizationId: organization.id, - externalRef: "test-long-tx", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-long-tx", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test-long-tx", - pkApiKey: "test-long-tx", - shortcode: "test-long-tx", - }, - }); - - // Start an interval that will create a new run every 500ms for 4 minutes - const interval = setInterval(async () => { - await prisma.taskRun.create({ - data: { - friendlyId: `run_long_tx_${Date.now()}`, - taskIdentifier: "my-task-long-tx", - payload: JSON.stringify({ long: 1 }), - payloadType: "application/json", - traceId: `long-${Date.now()}`, - spanId: `long-${Date.now()}`, - queue: "test-long-tx", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - attemptNumber: 1, - createdAt: new Date(), - updatedAt: new Date(), - }, - }); - }, 500); - - // Wait for 1 minute - await setTimeout(1 * 60 * 1000); - - // Stop the interval - clearInterval(interval); - - // Wait for replication - await setTimeout(1000); - - // Query ClickHouse for all runs using FINAL - const queryRuns = clickhouse.reader.query({ - name: "runs-replication-long-tx", - query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL`, - schema: z.any(), - }); - - const [queryError, result] = await queryRuns({}); - expect(queryError).toBeNull(); - - expect(result?.length).toBeGreaterThanOrEqual(50); - - await runsReplicationService.stop(); - } - ); - - containerTest( - "should insert TaskRuns even if there are incomplete Unicode escape sequences in the JSON", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); - - const clickhouse = new ClickHouse({ - url: clickhouseContainer.getConnectionUrl(), - name: "runs-replication-stress-bulk-insert", - logLevel: "warn", - }); - - const runsReplicationService = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-stress-bulk-insert", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 10, - flushIntervalMs: 100, - flushBatchSize: 50, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - await runsReplicationService.start(); - - const organization = await prisma.organization.create({ - data: { - title: "test-stress-bulk-insert", - slug: "test-stress-bulk-insert", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test-stress-bulk-insert", - slug: "test-stress-bulk-insert", - organizationId: organization.id, - externalRef: "test-stress-bulk-insert", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-stress-bulk-insert", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test-stress-bulk-insert", - pkApiKey: "test-stress-bulk-insert", - shortcode: "test-stress-bulk-insert", - }, - }); - - // Prepare 9 unique TaskRuns - const now = Date.now(); - const runsData = Array.from({ length: 9 }, (_, i) => ({ - friendlyId: `run_bulk_${now}_${i}`, - taskIdentifier: `my-task-bulk`, - payload: `{"title": "hello"}`, - payloadType: "application/json", - traceId: `bulk-${i}`, - spanId: `bulk-${i}`, - queue: "test-stress-bulk-insert", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT" as const, - engine: "V2" as const, - status: "PENDING" as const, - attemptNumber: 1, - createdAt: new Date(now + i), - updatedAt: new Date(now + i), - })); - - //add a run with incomplete Unicode escape sequences - const badPayload = await readFile(`${__dirname}/bad-clickhouse-output.json`, "utf-8"); - const hasProblems = detectBadJsonStrings(badPayload); - expect(hasProblems).toBe(true); - - runsData.push({ - friendlyId: `run_bulk_${now}_10`, - taskIdentifier: `my-task-bulk`, - payload: badPayload, - payloadType: "application/json", - traceId: `bulk-10`, - spanId: `bulk-10`, - queue: "test-stress-bulk-insert", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT" as const, - engine: "V2" as const, - status: "PENDING" as const, - attemptNumber: 1, - createdAt: new Date(now + 10), - updatedAt: new Date(now + 10), - }); - - // Bulk insert - const created = await prisma.taskRun.createMany({ data: runsData }); - expect(created.count).toBe(10); - - // Update the runs (not the 10th one) - await prisma.taskRun.updateMany({ - where: { - spanId: { not: "bulk-10" }, - }, - data: { - status: "COMPLETED_SUCCESSFULLY", - output: `{"foo":"bar"}`, - outputType: "application/json", - }, - }); - - // Give the 10th one a bad payload - await prisma.taskRun.updateMany({ - where: { - spanId: "bulk-10", - }, - data: { - status: "COMPLETED_SUCCESSFULLY", - output: badPayload, - outputType: "application/json", - }, - }); - - // Wait for replication - await setTimeout(5000); - - // Query ClickHouse for all runs using FINAL - const queryRuns = clickhouse.reader.query({ - name: "runs-replication-stress-bulk-insert", - query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL`, - schema: z.any(), - }); - - const [queryError, result] = await queryRuns({}); - expect(queryError).toBeNull(); - expect(result?.length).toBe(10); - - // Check a few random runs for correctness - for (let i = 0; i < 9; i++) { - const expected = runsData[i]; - const found = result?.find((r: any) => r.friendly_id === expected.friendlyId); - expect(found).toBeDefined(); - expect(found).toEqual( - expect.objectContaining({ - friendly_id: expected.friendlyId, - trace_id: expected.traceId, - task_identifier: expected.taskIdentifier, - status: "COMPLETED_SUCCESSFULLY", - }) - ); - expect(found?.output).toBeDefined(); - } - - // Check the run with the bad JSON - const foundBad = result?.find((r: any) => r.span_id === "bulk-10"); - expect(foundBad).toBeDefined(); - expect(foundBad?.output).toStrictEqual({}); - - await runsReplicationService.stop(); - } - ); - - containerTest( - "should merge duplicate event+run.id combinations keeping the latest version", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - await prisma.$executeRawUnsafe(`ALTER TABLE public.\"TaskRun\" REPLICA IDENTITY FULL;`); - - const clickhouse = new ClickHouse({ - url: clickhouseContainer.getConnectionUrl(), - name: "runs-replication-merge-batch", - logLevel: "warn", - }); - - const runsReplicationService = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-merge-batch", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 1, - flushIntervalMs: 100, - flushBatchSize: 10, // Higher batch size to test merging - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - // Listen to batchFlushed events to verify merging - const batchFlushedEvents: Array<{ - flushId: string; - taskRunInserts: any[]; - payloadInserts: any[]; - }> = []; - - runsReplicationService.events.on("batchFlushed", (event) => { - batchFlushedEvents.push(event); - }); - - await runsReplicationService.start(); - - const organization = await prisma.organization.create({ - data: { - title: "test-merge-batch", - slug: "test-merge-batch", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test-merge-batch", - slug: "test-merge-batch", - organizationId: organization.id, - externalRef: "test-merge-batch", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-merge-batch", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test-merge-batch", - pkApiKey: "test-merge-batch", - shortcode: "test-merge-batch", - }, - }); - - // Create a run and rapidly update it multiple times in a transaction - // This should create multiple events for the same run that get merged - const run = await prisma.taskRun.create({ - data: { - friendlyId: `run_merge_${Date.now()}`, - taskIdentifier: "my-task-merge", - payload: JSON.stringify({ version: 1 }), - payloadType: "application/json", - traceId: `merge-${Date.now()}`, - spanId: `merge-${Date.now()}`, - queue: "test-merge-batch", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING_VERSION", - }, - }); - await prisma.taskRun.update({ - where: { id: run.id }, - data: { status: "DEQUEUED" }, - }); - await prisma.taskRun.update({ - where: { id: run.id }, - data: { status: "EXECUTING" }, - }); - await prisma.taskRun.update({ - where: { id: run.id }, - data: { status: "PAUSED" }, - }); - await prisma.taskRun.update({ - where: { id: run.id }, - data: { status: "EXECUTING" }, - }); - await prisma.taskRun.update({ - where: { id: run.id }, - data: { status: "COMPLETED_SUCCESSFULLY" }, - }); - - await setTimeout(1000); - - expect(batchFlushedEvents?.[0].taskRunInserts).toHaveLength(2); - // Use getTaskRunField for type-safe array access - expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[0], "run_id")).toEqual(run.id); - expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[0], "status")).toEqual( - "PENDING_VERSION" - ); - expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[1], "run_id")).toEqual(run.id); - expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[1], "status")).toEqual( - "COMPLETED_SUCCESSFULLY" - ); - - await runsReplicationService.stop(); - } - ); - - containerTest( - "should sort batch inserts according to table schema ordering for optimal performance", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - await prisma.$executeRawUnsafe(`ALTER TABLE public.\"TaskRun\" REPLICA IDENTITY FULL;`); - - const clickhouse = new ClickHouse({ - url: clickhouseContainer.getConnectionUrl(), - name: "runs-replication-sorting", - logLevel: "warn", - }); - - const runsReplicationService = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-sorting", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 1, - flushIntervalMs: 100, - flushBatchSize: 10, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - // Listen to batchFlushed events to verify sorting - const batchFlushedEvents: Array<{ - flushId: string; - taskRunInserts: any[]; - payloadInserts: any[]; - }> = []; - - runsReplicationService.events.on("batchFlushed", (event) => { - batchFlushedEvents.push(event); - }); - - await runsReplicationService.start(); - - // Create two organizations to test sorting by organization_id - const org1 = await prisma.organization.create({ - data: { title: "org-z", slug: "org-z" }, - }); - - const org2 = await prisma.organization.create({ - data: { title: "org-a", slug: "org-a" }, - }); - - const project1 = await prisma.project.create({ - data: { - name: "test-sorting-z", - slug: "test-sorting-z", - organizationId: org1.id, - externalRef: "test-sorting-z", - }, - }); - - const project2 = await prisma.project.create({ - data: { - name: "test-sorting-a", - slug: "test-sorting-a", - organizationId: org2.id, - externalRef: "test-sorting-a", - }, - }); - - const env1 = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-sorting-z", - type: "DEVELOPMENT", - projectId: project1.id, - organizationId: org1.id, - apiKey: "test-sorting-z", - pkApiKey: "test-sorting-z", - shortcode: "test-sorting-z", - }, - }); - - const env2 = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-sorting-a", - type: "DEVELOPMENT", - projectId: project2.id, - organizationId: org2.id, - apiKey: "test-sorting-a", - pkApiKey: "test-sorting-a", - shortcode: "test-sorting-a", - }, - }); - - const now = Date.now(); - - const run1 = await prisma.taskRun.create({ - data: { - friendlyId: `run_sort_org_z_${now}`, - taskIdentifier: "my-task-sort", - payload: JSON.stringify({ org: "z" }), - payloadType: "application/json", - traceId: `sort-z-${now}`, - spanId: `sort-z-${now}`, - queue: "test-sorting", - runtimeEnvironmentId: env1.id, - projectId: project1.id, - organizationId: org1.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - createdAt: new Date(now + 2000), - }, - }); - await prisma.taskRun.update({ - where: { id: run1.id }, - data: { status: "DEQUEUED" }, - }); - - await prisma.taskRun.create({ - data: { - friendlyId: `run_sort_org_a_${now}`, - taskIdentifier: "my-task-sort", - payload: JSON.stringify({ org: "a" }), - payloadType: "application/json", - traceId: `sort-a-${now}`, - spanId: `sort-a-${now}`, - queue: "test-sorting", - runtimeEnvironmentId: env2.id, - projectId: project2.id, - organizationId: org2.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - createdAt: new Date(now + 1000), - }, - }); - - await prisma.taskRun.create({ - data: { - friendlyId: `run_sort_org_a_${now}_2`, - taskIdentifier: "my-task-sort", - payload: JSON.stringify({ org: "a" }), - payloadType: "application/json", - traceId: `sort-a-${now}`, - spanId: `sort-a-${now}`, - queue: "test-sorting", - runtimeEnvironmentId: env2.id, - projectId: project2.id, - organizationId: org2.id, - environmentType: "DEVELOPMENT", - engine: "V2", - status: "PENDING", - createdAt: new Date(now), - }, - }); - - await setTimeout(1000); - - expect(batchFlushedEvents[0]?.taskRunInserts.length).toBeGreaterThan(1); - expect(batchFlushedEvents[0]?.payloadInserts.length).toBeGreaterThan(1); - - // Verify sorting order: organization_id, project_id, environment_id, created_at, run_id - for (let i = 1; i < batchFlushedEvents[0]?.taskRunInserts.length; i++) { - const prev = batchFlushedEvents[0]!.taskRunInserts[i - 1]; - const curr = batchFlushedEvents[0]!.taskRunInserts[i]; - - const prevKey = [ - getTaskRunField(prev, "organization_id"), - getTaskRunField(prev, "project_id"), - getTaskRunField(prev, "environment_id"), - getTaskRunField(prev, "created_at"), - getTaskRunField(prev, "run_id"), - ]; - const currKey = [ - getTaskRunField(curr, "organization_id"), - getTaskRunField(curr, "project_id"), - getTaskRunField(curr, "environment_id"), - getTaskRunField(curr, "created_at"), - getTaskRunField(curr, "run_id"), - ]; - - const keysAreEqual = prevKey.every((val, idx) => val === currKey[idx]); - if (keysAreEqual) { - // Also valid order - continue; - } - - // Compare tuples lexicographically - let isCorrectOrder = false; - for (let j = 0; j < prevKey.length; j++) { - if (prevKey[j] < currKey[j]) { - isCorrectOrder = true; - break; - } - if (prevKey[j] > currKey[j]) { - isCorrectOrder = false; - break; - } - // If equal, continue to next field - } - - expect(isCorrectOrder).toBeTruthy(); - } - - // Verify payloadInserts are also sorted by run_id - for (let i = 1; i < batchFlushedEvents[0]?.payloadInserts.length; i++) { - const prev = batchFlushedEvents[0]!.payloadInserts[i - 1]; - const curr = batchFlushedEvents[0]!.payloadInserts[i]; - expect(getPayloadField(prev, "run_id") <= getPayloadField(curr, "run_id")).toBeTruthy(); - } - - await runsReplicationService.stop(); - } - ); - - containerTest( - "should exhaustively replicate all TaskRun columns to ClickHouse", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); - - const clickhouse = new ClickHouse({ - url: clickhouseContainer.getConnectionUrl(), - name: "runs-replication-exhaustive", - logLevel: "warn", - }); - - const runsReplicationService = new RunsReplicationService({ - clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), - pgConnectionUrl: postgresContainer.getConnectionUri(), - serviceName: "runs-replication-exhaustive", - slotName: "task_runs_to_clickhouse_v1", - publicationName: "task_runs_to_clickhouse_v1_publication", - redisOptions, - maxFlushConcurrency: 1, - flushIntervalMs: 100, - flushBatchSize: 1, - leaderLockTimeoutMs: 5000, - leaderLockExtendIntervalMs: 1000, - ackIntervalSeconds: 5, - logLevel: "warn", - }); - - await runsReplicationService.start(); - - const organization = await prisma.organization.create({ - data: { - title: "test-exhaustive", - slug: "test-exhaustive", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test-exhaustive", - slug: "test-exhaustive", - organizationId: organization.id, - externalRef: "test-exhaustive", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test-exhaustive", - type: "PRODUCTION", - projectId: project.id, - organizationId: organization.id, - apiKey: "test-exhaustive", - pkApiKey: "test-exhaustive", - shortcode: "test-exhaustive", - }, - }); - - // Create a batch for the batchId field - const batch = await prisma.batchTaskRun.create({ - data: { - friendlyId: "batch_exhaustive", - runtimeEnvironmentId: runtimeEnvironment.id, - status: "PENDING", - }, - }); - - // Create a root run for the rootTaskRunId field - const rootRun = await prisma.taskRun.create({ - data: { - friendlyId: "run_root_exhaustive", - taskIdentifier: "root-task", - payload: JSON.stringify({ root: true }), - traceId: "root-trace-id", - spanId: "root-span-id", - queue: "root-queue", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "PRODUCTION", - engine: "V2", - }, - }); - - // Create a parent run for the parentTaskRunId field - const parentRun = await prisma.taskRun.create({ - data: { - friendlyId: "run_parent_exhaustive", - taskIdentifier: "parent-task", - payload: JSON.stringify({ parent: true }), - traceId: "parent-trace-id", - spanId: "parent-span-id", - queue: "parent-queue", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "PRODUCTION", - engine: "V2", - rootTaskRunId: rootRun.id, - depth: 1, - }, - }); - - // Set up all the dates we'll use - const now = new Date(); - const createdAt = new Date(now.getTime() - 10000); - const updatedAt = new Date(now.getTime() - 5000); - const startedAt = new Date(now.getTime() - 8000); - const executedAt = new Date(now.getTime() - 7500); - const completedAt = new Date(now.getTime() - 6000); - const delayUntil = new Date(now.getTime() - 9000); - const queuedAt = new Date(now.getTime() - 9500); - const expiredAt = null; // Not expired - - // Create the main task run with ALL fields populated - const taskRun = await prisma.taskRun.create({ - data: { - // Core identifiers - friendlyId: "run_exhaustive_test", - taskIdentifier: "exhaustive-task", - - // Environment/project/org - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "PRODUCTION", - - // Engine and execution - engine: "V2", - status: "COMPLETED_SUCCESSFULLY", - attemptNumber: 3, - queue: "exhaustive-queue", - workerQueue: "exhaustive-worker-queue", - - // Relationships - // Note: scheduleId is not set to test empty string handling - batchId: batch.id, - rootTaskRunId: rootRun.id, - parentTaskRunId: parentRun.id, - depth: 2, - - // Timestamps - createdAt, - updatedAt, - startedAt, - executedAt, - completedAt, - delayUntil, - queuedAt, - expiredAt, - - // Payload and output - payload: JSON.stringify({ input: "test-payload" }), - payloadType: "application/json", - output: JSON.stringify({ result: "test-output" }), - outputType: "application/json", - error: { message: "test error", name: "TestError" }, - - // Tracing - traceId: "exhaustive-trace-id-12345", - spanId: "exhaustive-span-id-67890", - - // Versioning - taskVersion: "1.2.3", - sdkVersion: "3.0.0", - cliVersion: "2.5.1", - - // Execution settings - machinePreset: "large-1x", - idempotencyKey: "exhaustive-idempotency-key-hashed", - idempotencyKeyOptions: { - key: "exhaustive-idempotency-key", - scope: "run", - }, - ttl: "1h", - isTest: true, - concurrencyKey: "exhaustive-concurrency-key", - maxDurationInSeconds: 3600, - - // Tags and bulk actions - runTags: ["tag1", "tag2", "exhaustive-tag"], - bulkActionGroupIds: ["bulk-group-1", "bulk-group-2"], - - // Usage metrics - usageDurationMs: 12345, - costInCents: 50, - baseCostInCents: 25, - }, - }); - - // Wait for replication - await setTimeout(1500); - - // Query ClickHouse directly to get all columns - const queryRuns = clickhouse.reader.query({ - name: "exhaustive-replication-test", - query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}", - schema: z.any(), - params: z.object({ run_id: z.string() }), - }); - - const [queryError, result] = await queryRuns({ run_id: taskRun.id }); - - expect(queryError).toBeNull(); - expect(result).toHaveLength(1); - - const clickhouseRun = result![0]; - - // Exhaustively verify each column - // Core identifiers - expect(clickhouseRun.run_id).toBe(taskRun.id); - expect(clickhouseRun.friendly_id).toBe("run_exhaustive_test"); - expect(clickhouseRun.task_identifier).toBe("exhaustive-task"); - - // Environment/project/org - expect(clickhouseRun.environment_id).toBe(runtimeEnvironment.id); - expect(clickhouseRun.project_id).toBe(project.id); - expect(clickhouseRun.organization_id).toBe(organization.id); - expect(clickhouseRun.environment_type).toBe("PRODUCTION"); - - // Engine and execution - expect(clickhouseRun.engine).toBe("V2"); - expect(clickhouseRun.status).toBe("COMPLETED_SUCCESSFULLY"); - expect(clickhouseRun.attempt).toBe(3); - expect(clickhouseRun.queue).toBe("exhaustive-queue"); - expect(clickhouseRun.worker_queue).toBe("exhaustive-worker-queue"); - - // Relationships - expect(clickhouseRun.schedule_id).toBe(""); // Empty when not set - expect(clickhouseRun.batch_id).toBe(batch.id); - expect(clickhouseRun.root_run_id).toBe(rootRun.id); - expect(clickhouseRun.parent_run_id).toBe(parentRun.id); - expect(clickhouseRun.depth).toBe(2); - - // Timestamps (ClickHouse returns DateTime64 as strings in UTC without 'Z' suffix) - // Helper to parse ClickHouse timestamp strings to milliseconds - function parseClickhouseTimestamp(ts: string | null): number | null { - if (ts === null || ts === "1970-01-01 00:00:00.000") return null; - return new Date(ts + "Z").getTime(); - } - - expect(parseClickhouseTimestamp(clickhouseRun.created_at)).toBe(createdAt.getTime()); - expect(parseClickhouseTimestamp(clickhouseRun.updated_at)).toBe(updatedAt.getTime()); - expect(parseClickhouseTimestamp(clickhouseRun.started_at)).toBe(startedAt.getTime()); - expect(parseClickhouseTimestamp(clickhouseRun.executed_at)).toBe(executedAt.getTime()); - expect(parseClickhouseTimestamp(clickhouseRun.completed_at)).toBe(completedAt.getTime()); - expect(parseClickhouseTimestamp(clickhouseRun.delay_until)).toBe(delayUntil.getTime()); - expect(parseClickhouseTimestamp(clickhouseRun.queued_at)).toBe(queuedAt.getTime()); - expect(parseClickhouseTimestamp(clickhouseRun.expired_at)).toBeNull(); - - // Output (parsed JSON) - expect(clickhouseRun.output).toEqual({ data: { result: "test-output" } }); - - // Error - expect(clickhouseRun.error).toEqual({ - data: { message: "test error", name: "TestError" }, - }); - - // Tracing - expect(clickhouseRun.trace_id).toBe("exhaustive-trace-id-12345"); - expect(clickhouseRun.span_id).toBe("exhaustive-span-id-67890"); - - // Versioning - expect(clickhouseRun.task_version).toBe("1.2.3"); - expect(clickhouseRun.sdk_version).toBe("3.0.0"); - expect(clickhouseRun.cli_version).toBe("2.5.1"); - - // Execution settings - expect(clickhouseRun.machine_preset).toBe("large-1x"); - expect(clickhouseRun.idempotency_key).toBe("exhaustive-idempotency-key-hashed"); - expect(clickhouseRun.idempotency_key_user).toBe("exhaustive-idempotency-key"); - expect(clickhouseRun.idempotency_key_scope).toBe("run"); - expect(clickhouseRun.expiration_ttl).toBe("1h"); - expect(clickhouseRun.is_test).toBe(1); // ClickHouse returns booleans as integers - expect(clickhouseRun.concurrency_key).toBe("exhaustive-concurrency-key"); - expect(clickhouseRun.max_duration_in_seconds).toBe(3600); - - // Tags and bulk actions - expect(clickhouseRun.tags).toEqual(["tag1", "tag2", "exhaustive-tag"]); - expect(clickhouseRun.bulk_action_group_ids).toEqual(["bulk-group-1", "bulk-group-2"]); - - // Usage metrics - expect(clickhouseRun.usage_duration_ms).toBe(12345); - expect(clickhouseRun.cost_in_cents).toBe(50); - expect(clickhouseRun.base_cost_in_cents).toBe(25); - - // Internal ClickHouse columns - expect(clickhouseRun._is_deleted).toBe(0); - expect(clickhouseRun._version).toBeDefined(); - expect(typeof clickhouseRun._version).toBe("number"); // ClickHouse returns UInt64 as number - - // Also verify the payload was inserted into the payloads table - const queryPayloads = clickhouse.reader.query({ - name: "exhaustive-payload-test", - query: "SELECT * FROM trigger_dev.raw_task_runs_payload_v1 WHERE run_id = {run_id:String}", - schema: z.any(), - params: z.object({ run_id: z.string() }), - }); - - const [payloadError, payloadResult] = await queryPayloads({ run_id: taskRun.id }); - - expect(payloadError).toBeNull(); - expect(payloadResult).toHaveLength(1); - expect(payloadResult![0].run_id).toBe(taskRun.id); - expect(parseClickhouseTimestamp(payloadResult![0].created_at)).toBe(createdAt.getTime()); - expect(payloadResult![0].payload).toEqual({ data: { input: "test-payload" } }); - - await runsReplicationService.stop(); - } - ); }); diff --git a/apps/webapp/test/runsReplicationService.part3.test.ts b/apps/webapp/test/runsReplicationService.part3.test.ts new file mode 100644 index 00000000000..1261be3b513 --- /dev/null +++ b/apps/webapp/test/runsReplicationService.part3.test.ts @@ -0,0 +1,307 @@ +import { ClickHouse, getTaskRunField, getPayloadField } from "@internal/clickhouse"; +import { replicationContainerTest } from "@internal/testcontainers"; +import { Logger } from "@trigger.dev/core/logger"; +import { readFile } from "node:fs/promises"; +import { setTimeout } from "node:timers/promises"; +import { z } from "zod"; +import { RunsReplicationService } from "~/services/runsReplicationService.server"; +import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings"; +import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory"; + +vi.setConfig({ testTimeout: 60_000 }); + +describe("RunsReplicationService (part 3/7)", () => { + replicationContainerTest( + "should insert TaskRuns even if there are incomplete Unicode escape sequences in the JSON", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication-stress-bulk-insert", + logLevel: "warn", + }); + + const runsReplicationService = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-stress-bulk-insert", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 10, + flushIntervalMs: 100, + flushBatchSize: 50, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + await runsReplicationService.start(); + + const organization = await prisma.organization.create({ + data: { + title: "test-stress-bulk-insert", + slug: "test-stress-bulk-insert", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-stress-bulk-insert", + slug: "test-stress-bulk-insert", + organizationId: organization.id, + externalRef: "test-stress-bulk-insert", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-stress-bulk-insert", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test-stress-bulk-insert", + pkApiKey: "test-stress-bulk-insert", + shortcode: "test-stress-bulk-insert", + }, + }); + + // Prepare 9 unique TaskRuns + const now = Date.now(); + const runsData = Array.from({ length: 9 }, (_, i) => ({ + friendlyId: `run_bulk_${now}_${i}`, + taskIdentifier: `my-task-bulk`, + payload: `{"title": "hello"}`, + payloadType: "application/json", + traceId: `bulk-${i}`, + spanId: `bulk-${i}`, + queue: "test-stress-bulk-insert", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT" as const, + engine: "V2" as const, + status: "PENDING" as const, + attemptNumber: 1, + createdAt: new Date(now + i), + updatedAt: new Date(now + i), + })); + + //add a run with incomplete Unicode escape sequences + const badPayload = await readFile(`${__dirname}/bad-clickhouse-output.json`, "utf-8"); + const hasProblems = detectBadJsonStrings(badPayload); + expect(hasProblems).toBe(true); + + runsData.push({ + friendlyId: `run_bulk_${now}_10`, + taskIdentifier: `my-task-bulk`, + payload: badPayload, + payloadType: "application/json", + traceId: `bulk-10`, + spanId: `bulk-10`, + queue: "test-stress-bulk-insert", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT" as const, + engine: "V2" as const, + status: "PENDING" as const, + attemptNumber: 1, + createdAt: new Date(now + 10), + updatedAt: new Date(now + 10), + }); + + // Bulk insert + const created = await prisma.taskRun.createMany({ data: runsData }); + expect(created.count).toBe(10); + + // Update the runs (not the 10th one) + await prisma.taskRun.updateMany({ + where: { + spanId: { not: "bulk-10" }, + }, + data: { + status: "COMPLETED_SUCCESSFULLY", + output: `{"foo":"bar"}`, + outputType: "application/json", + }, + }); + + // Give the 10th one a bad payload + await prisma.taskRun.updateMany({ + where: { + spanId: "bulk-10", + }, + data: { + status: "COMPLETED_SUCCESSFULLY", + output: badPayload, + outputType: "application/json", + }, + }); + + // Wait for replication + await setTimeout(5000); + + // Query ClickHouse for all runs using FINAL + const queryRuns = clickhouse.reader.query({ + name: "runs-replication-stress-bulk-insert", + query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL`, + schema: z.any(), + }); + + const [queryError, result] = await queryRuns({}); + expect(queryError).toBeNull(); + expect(result?.length).toBe(10); + + // Check a few random runs for correctness + for (let i = 0; i < 9; i++) { + const expected = runsData[i]; + const found = result?.find((r: any) => r.friendly_id === expected.friendlyId); + expect(found).toBeDefined(); + expect(found).toEqual( + expect.objectContaining({ + friendly_id: expected.friendlyId, + trace_id: expected.traceId, + task_identifier: expected.taskIdentifier, + status: "COMPLETED_SUCCESSFULLY", + }) + ); + expect(found?.output).toBeDefined(); + } + + // Check the run with the bad JSON + const foundBad = result?.find((r: any) => r.span_id === "bulk-10"); + expect(foundBad).toBeDefined(); + expect(foundBad?.output).toStrictEqual({}); + + await runsReplicationService.stop(); + } + ); + + replicationContainerTest( + "should merge duplicate event+run.id combinations keeping the latest version", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public.\"TaskRun\" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication-merge-batch", + logLevel: "warn", + }); + + const runsReplicationService = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-merge-batch", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 10, // Higher batch size to test merging + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + // Listen to batchFlushed events to verify merging + const batchFlushedEvents: Array<{ + flushId: string; + taskRunInserts: any[]; + payloadInserts: any[]; + }> = []; + + runsReplicationService.events.on("batchFlushed", (event) => { + batchFlushedEvents.push(event); + }); + + await runsReplicationService.start(); + + const organization = await prisma.organization.create({ + data: { + title: "test-merge-batch", + slug: "test-merge-batch", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-merge-batch", + slug: "test-merge-batch", + organizationId: organization.id, + externalRef: "test-merge-batch", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-merge-batch", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test-merge-batch", + pkApiKey: "test-merge-batch", + shortcode: "test-merge-batch", + }, + }); + + // Create a run and rapidly update it multiple times in a transaction + // This should create multiple events for the same run that get merged + const run = await prisma.taskRun.create({ + data: { + friendlyId: `run_merge_${Date.now()}`, + taskIdentifier: "my-task-merge", + payload: JSON.stringify({ version: 1 }), + payloadType: "application/json", + traceId: `merge-${Date.now()}`, + spanId: `merge-${Date.now()}`, + queue: "test-merge-batch", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING_VERSION", + }, + }); + await prisma.taskRun.update({ + where: { id: run.id }, + data: { status: "DEQUEUED" }, + }); + await prisma.taskRun.update({ + where: { id: run.id }, + data: { status: "EXECUTING" }, + }); + await prisma.taskRun.update({ + where: { id: run.id }, + data: { status: "PAUSED" }, + }); + await prisma.taskRun.update({ + where: { id: run.id }, + data: { status: "EXECUTING" }, + }); + await prisma.taskRun.update({ + where: { id: run.id }, + data: { status: "COMPLETED_SUCCESSFULLY" }, + }); + + await setTimeout(1000); + + expect(batchFlushedEvents?.[0].taskRunInserts).toHaveLength(2); + // Use getTaskRunField for type-safe array access + expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[0], "run_id")).toEqual(run.id); + expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[0], "status")).toEqual( + "PENDING_VERSION" + ); + expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[1], "run_id")).toEqual(run.id); + expect(getTaskRunField(batchFlushedEvents![0].taskRunInserts[1], "status")).toEqual( + "COMPLETED_SUCCESSFULLY" + ); + + await runsReplicationService.stop(); + } + ); +}); diff --git a/apps/webapp/test/runsReplicationService.part4.test.ts b/apps/webapp/test/runsReplicationService.part4.test.ts new file mode 100644 index 00000000000..835192ad0fb --- /dev/null +++ b/apps/webapp/test/runsReplicationService.part4.test.ts @@ -0,0 +1,710 @@ +import { ClickHouse } from "@internal/clickhouse"; +import { replicationContainerTest } from "@internal/testcontainers"; +import { setTimeout } from "node:timers/promises"; +import { z } from "zod"; +import { TaskRunStatus } from "~/database-types"; +import { RunsReplicationService } from "~/services/runsReplicationService.server"; +import { createInMemoryTracing, createInMemoryMetrics } from "./utils/tracing"; +import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory"; +import superjson from "superjson"; + +vi.setConfig({ testTimeout: 60_000 }); + +describe("RunsReplicationService (part 4/7)", () => { + replicationContainerTest( + "should replicate updates to an existing TaskRun to ClickHouse", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication-update", + logLevel: "warn", + }); + + const runsReplicationService = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-update", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 1, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + await runsReplicationService.start(); + + const organization = await prisma.organization.create({ + data: { + title: "test-update", + slug: "test-update", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-update", + slug: "test-update", + organizationId: organization.id, + externalRef: "test-update", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-update", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test-update", + pkApiKey: "test-update", + shortcode: "test-update", + }, + }); + + const uniqueFriendlyId = `run_update_${Date.now()}`; + const taskRun = await prisma.taskRun.create({ + data: { + friendlyId: uniqueFriendlyId, + taskIdentifier: "my-task-update", + payload: JSON.stringify({ foo: "update-test" }), + payloadType: "application/json", + traceId: "update-1234", + spanId: "update-1234", + queue: "test-update", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + }, + }); + + await setTimeout(1000); + + await prisma.taskRun.update({ + where: { id: taskRun.id }, + data: { status: TaskRunStatus.COMPLETED_SUCCESSFULLY }, + }); + + await setTimeout(1000); + + const queryRuns = clickhouse.reader.query({ + name: "runs-replication-update", + query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}", + schema: z.any(), + params: z.object({ run_id: z.string() }), + }); + + const [queryError, result] = await queryRuns({ run_id: taskRun.id }); + + expect(queryError).toBeNull(); + expect(result?.length).toBe(1); + expect(result?.[0]).toEqual( + expect.objectContaining({ + run_id: taskRun.id, + status: TaskRunStatus.COMPLETED_SUCCESSFULLY, + }) + ); + + await runsReplicationService.stop(); + } + ); + + replicationContainerTest( + "should replicate deletions of a TaskRun to ClickHouse and mark as deleted", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication-delete", + logLevel: "warn", + }); + + const runsReplicationService = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-delete", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 1, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + await runsReplicationService.start(); + + const organization = await prisma.organization.create({ + data: { + title: "test-delete", + slug: "test-delete", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-delete", + slug: "test-delete", + organizationId: organization.id, + externalRef: "test-delete", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-delete", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test-delete", + pkApiKey: "test-delete", + shortcode: "test-delete", + }, + }); + + const uniqueFriendlyId = `run_delete_${Date.now()}`; + const taskRun = await prisma.taskRun.create({ + data: { + friendlyId: uniqueFriendlyId, + taskIdentifier: "my-task-delete", + payload: JSON.stringify({ foo: "delete-test" }), + payloadType: "application/json", + traceId: "delete-1234", + spanId: "delete-1234", + queue: "test-delete", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + }, + }); + + await setTimeout(1000); + + await prisma.taskRun.delete({ + where: { id: taskRun.id }, + }); + + await setTimeout(1000); + + const queryRuns = clickhouse.reader.query({ + name: "runs-replication-delete", + query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}", + schema: z.any(), + params: z.object({ run_id: z.string() }), + }); + + const [queryError, result] = await queryRuns({ run_id: taskRun.id }); + + expect(queryError).toBeNull(); + expect(result?.length).toBe(0); + + await runsReplicationService.stop(); + } + ); + + replicationContainerTest( + "should gracefully shutdown and allow a new service to pick up from the correct LSN (handover)", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication-shutdown-handover", + logLevel: "warn", + }); + + // Service A + const runsReplicationServiceA = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-shutdown-handover", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 1, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + await runsReplicationServiceA.start(); + + const organization = await prisma.organization.create({ + data: { + title: "test-shutdown-handover", + slug: "test-shutdown-handover", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-shutdown-handover", + slug: "test-shutdown-handover", + organizationId: organization.id, + externalRef: "test-shutdown-handover", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-shutdown-handover", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test-shutdown-handover", + pkApiKey: "test-shutdown-handover", + shortcode: "test-shutdown-handover", + }, + }); + + const run1Id = `run_shutdown_handover_1_${Date.now()}`; + + runsReplicationServiceA.events.on("message", async ({ message, service }) => { + if (message.tag === "insert") { + await service.shutdown(); + } + }); + + const taskRun1 = await prisma.taskRun.create({ + data: { + friendlyId: run1Id, + taskIdentifier: "my-task-shutdown-handover-1", + payload: JSON.stringify({ foo: "handover-1" }), + payloadType: "application/json", + traceId: "handover-1-1234", + spanId: "handover-1-1234", + queue: "test-shutdown-handover", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + }, + }); + + const run2Id = `run_shutdown_handover_2_${Date.now()}`; + const taskRun2 = await prisma.taskRun.create({ + data: { + friendlyId: run2Id, + taskIdentifier: "my-task-shutdown-handover-2", + payload: JSON.stringify({ foo: "handover-2" }), + payloadType: "application/json", + traceId: "handover-2-1234", + spanId: "handover-2-1234", + queue: "test-shutdown-handover", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + }, + }); + + await setTimeout(1000); + + const queryRuns = clickhouse.reader.query({ + name: "runs-replication-shutdown-handover", + query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL ORDER BY created_at ASC", + schema: z.any(), + }); + const [queryError, result] = await queryRuns({}); + expect(queryError).toBeNull(); + expect(result?.length).toBe(1); + expect(result?.[0]).toEqual(expect.objectContaining({ run_id: taskRun1.id })); + + // Service B + const runsReplicationServiceB = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-shutdown-handover", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 1, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + await runsReplicationServiceB.start(); + + await setTimeout(1000); + + const [queryErrorB, resultB] = await queryRuns({}); + + expect(queryErrorB).toBeNull(); + expect(resultB?.length).toBe(2); + expect(resultB).toEqual( + expect.arrayContaining([ + expect.objectContaining({ run_id: taskRun1.id }), + expect.objectContaining({ run_id: taskRun2.id }), + ]) + ); + + await runsReplicationServiceB.stop(); + } + ); + + replicationContainerTest( + "should not re-process already handled data if shutdown is called after all transactions are processed", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication-shutdown-after-processed", + logLevel: "warn", + }); + + // Service A + const runsReplicationServiceA = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-shutdown-after-processed", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 1, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + await runsReplicationServiceA.start(); + + const organization = await prisma.organization.create({ + data: { + title: "test-shutdown-after-processed", + slug: "test-shutdown-after-processed", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-shutdown-after-processed", + slug: "test-shutdown-after-processed", + organizationId: organization.id, + externalRef: "test-shutdown-after-processed", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-shutdown-after-processed", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test-shutdown-after-processed", + pkApiKey: "test-shutdown-after-processed", + shortcode: "test-shutdown-after-processed", + }, + }); + + const run1Id = `run_shutdown_after_processed_${Date.now()}`; + const taskRun1 = await prisma.taskRun.create({ + data: { + friendlyId: run1Id, + taskIdentifier: "my-task-shutdown-after-processed", + payload: JSON.stringify({ foo: "after-processed" }), + payloadType: "application/json", + traceId: "after-processed-1234", + spanId: "after-processed-1234", + queue: "test-shutdown-after-processed", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + }, + }); + + await setTimeout(1000); + + const queryRuns = clickhouse.reader.query({ + name: "runs-replication-shutdown-after-processed", + query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}", + schema: z.any(), + params: z.object({ run_id: z.string() }), + }); + + const [queryErrorA, resultA] = await queryRuns({ run_id: taskRun1.id }); + expect(queryErrorA).toBeNull(); + expect(resultA?.length).toBe(1); + expect(resultA?.[0]).toEqual(expect.objectContaining({ run_id: taskRun1.id })); + + await runsReplicationServiceA.shutdown(); + + await setTimeout(500); + + const taskRun2 = await prisma.taskRun.create({ + data: { + friendlyId: `run_shutdown_after_processed_${Date.now()}`, + taskIdentifier: "my-task-shutdown-after-processed", + payload: JSON.stringify({ foo: "after-processed-2" }), + payloadType: "application/json", + traceId: "after-processed-2-1234", + spanId: "after-processed-2-1234", + queue: "test-shutdown-after-processed", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + }, + }); + + // Service B + const runsReplicationServiceB = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-shutdown-after-processed", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 1, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + await runsReplicationServiceB.start(); + + await setTimeout(1000); + + const [queryErrorB, resultB] = await queryRuns({ run_id: taskRun2.id }); + expect(queryErrorB).toBeNull(); + expect(resultB?.length).toBe(1); + expect(resultB?.[0]).toEqual(expect.objectContaining({ run_id: taskRun2.id })); + + await runsReplicationServiceB.stop(); + } + ); + + replicationContainerTest( + "should record metrics with correct values when replicating runs", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication-metrics", + logLevel: "warn", + }); + + const { tracer } = createInMemoryTracing(); + const metricsHelper = createInMemoryMetrics(); + + const runsReplicationService = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-metrics", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 2, + flushIntervalMs: 100, + flushBatchSize: 5, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + tracer, + meter: metricsHelper.meter, + logLevel: "warn", + }); + + await runsReplicationService.start(); + + const organization = await prisma.organization.create({ + data: { + title: "test-metrics", + slug: "test-metrics", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-metrics", + slug: "test-metrics", + organizationId: organization.id, + externalRef: "test-metrics", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-metrics", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test-metrics", + pkApiKey: "test-metrics", + shortcode: "test-metrics", + }, + }); + + const now = Date.now(); + const createdRuns: string[] = []; + + for (let i = 0; i < 5; i++) { + const run = await prisma.taskRun.create({ + data: { + friendlyId: `run_metrics_${now}_${i}`, + taskIdentifier: "my-task-metrics", + payload: JSON.stringify({ index: i }), + payloadType: "application/json", + traceId: `metrics-${now}-${i}`, + spanId: `metrics-${now}-${i}`, + queue: "test-metrics", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + }, + }); + createdRuns.push(run.id); + } + + await setTimeout(1000); + + for (let i = 0; i < 3; i++) { + await prisma.taskRun.update({ + where: { id: createdRuns[i] }, + data: { status: "EXECUTING" }, + }); + } + + await setTimeout(1000); + + for (let i = 0; i < 2; i++) { + await prisma.taskRun.update({ + where: { id: createdRuns[i] }, + data: { + status: "COMPLETED_SUCCESSFULLY", + completedAt: new Date(), + output: JSON.stringify({ result: "success" }), + outputType: "application/json", + }, + }); + } + + await setTimeout(1000); + + const metrics = await metricsHelper.getMetrics(); + + function getMetricData(name: string) { + for (const resourceMetrics of metrics) { + for (const scopeMetrics of resourceMetrics.scopeMetrics) { + for (const metric of scopeMetrics.metrics) { + if (metric.descriptor.name === name) { + return metric; + } + } + } + } + return null; + } + + function sumCounterValues(metric: any): number { + if (!metric?.dataPoints) return 0; + return metric.dataPoints.reduce((sum: number, dp: any) => sum + (dp.value || 0), 0); + } + + function histogramHasData(metric: any): boolean { + if (!metric?.dataPoints || metric.dataPoints.length === 0) return false; + return metric.dataPoints.some((dp: any) => { + return ( + (typeof dp.count === "number" && dp.count > 0) || + (typeof dp.value?.count === "number" && dp.value.count > 0) || + (Array.isArray(dp.buckets?.counts) && dp.buckets.counts.some((c: number) => c > 0)) || + (typeof dp.sum === "number" && dp.sum > 0) || + typeof dp.min === "number" || + typeof dp.max === "number" + ); + }); + } + + function getCounterAttributeValues(metric: any, attributeName: string): unknown[] { + if (!metric?.dataPoints) return []; + return metric.dataPoints + .filter((dp: any) => dp.attributes?.[attributeName] !== undefined) + .map((dp: any) => dp.attributes[attributeName]); + } + + const batchesFlushed = getMetricData("runs_replication.batches_flushed"); + expect(batchesFlushed).not.toBeNull(); + const totalBatchesFlushed = sumCounterValues(batchesFlushed); + expect(totalBatchesFlushed).toBeGreaterThanOrEqual(1); + + const successAttributeValues = getCounterAttributeValues(batchesFlushed, "success"); + expect(successAttributeValues.length).toBeGreaterThanOrEqual(1); + + const taskRunsInserted = getMetricData("runs_replication.task_runs_inserted"); + expect(taskRunsInserted).not.toBeNull(); + const totalTaskRunsInserted = sumCounterValues(taskRunsInserted); + expect(totalTaskRunsInserted).toBeGreaterThanOrEqual(5); + + const payloadsInserted = getMetricData("runs_replication.payloads_inserted"); + expect(payloadsInserted).not.toBeNull(); + const totalPayloadsInserted = sumCounterValues(payloadsInserted); + expect(totalPayloadsInserted).toBeGreaterThanOrEqual(1); + + const eventsProcessed = getMetricData("runs_replication.events_processed"); + expect(eventsProcessed).not.toBeNull(); + const totalEventsProcessed = sumCounterValues(eventsProcessed); + expect(totalEventsProcessed).toBeGreaterThanOrEqual(1); + + const eventTypes = getCounterAttributeValues(eventsProcessed, "event_type"); + expect(eventTypes.length).toBeGreaterThanOrEqual(1); + expect(eventTypes).toContain("insert"); + + const batchSize = getMetricData("runs_replication.batch_size"); + expect(batchSize).not.toBeNull(); + expect(histogramHasData(batchSize)).toBe(true); + + const replicationLag = getMetricData("runs_replication.replication_lag_ms"); + expect(replicationLag).not.toBeNull(); + expect(histogramHasData(replicationLag)).toBe(true); + + const flushDuration = getMetricData("runs_replication.flush_duration_ms"); + expect(flushDuration).not.toBeNull(); + expect(histogramHasData(flushDuration)).toBe(true); + + await runsReplicationService.stop(); + await metricsHelper.shutdown(); + } + ); +}); diff --git a/apps/webapp/test/runsReplicationService.part5.test.ts b/apps/webapp/test/runsReplicationService.part5.test.ts new file mode 100644 index 00000000000..3263efae7b8 --- /dev/null +++ b/apps/webapp/test/runsReplicationService.part5.test.ts @@ -0,0 +1,147 @@ +import { ClickHouse, getTaskRunField, getPayloadField } from "@internal/clickhouse"; +import { replicationContainerTest } from "@internal/testcontainers"; +import { Logger } from "@trigger.dev/core/logger"; +import { readFile } from "node:fs/promises"; +import { setTimeout } from "node:timers/promises"; +import { z } from "zod"; +import { RunsReplicationService } from "~/services/runsReplicationService.server"; +import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings"; +import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory"; + +vi.setConfig({ testTimeout: 60_000 }); + +describe("RunsReplicationService (part 5/7)", () => { + replicationContainerTest( + "should replicate all events in a single transaction (insert, update)", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication-multi-event-tx", + logLevel: "warn", + }); + + const runsReplicationService = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-multi-event-tx", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 10, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + await runsReplicationService.start(); + + const organization = await prisma.organization.create({ + data: { + title: "test-multi-event-tx", + slug: "test-multi-event-tx", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-multi-event-tx", + slug: "test-multi-event-tx", + organizationId: organization.id, + externalRef: "test-multi-event-tx", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-multi-event-tx", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test-multi-event-tx", + pkApiKey: "test-multi-event-tx", + shortcode: "test-multi-event-tx", + }, + }); + + // Start a transaction + const [run1, run2] = await prisma.$transaction(async (tx) => { + const run1 = await tx.taskRun.create({ + data: { + friendlyId: `run_multi_event_1_${Date.now()}`, + taskIdentifier: "my-task-multi-event-1", + payload: JSON.stringify({ multi: 1 }), + payloadType: "application/json", + traceId: `multi-1-${Date.now()}`, + spanId: `multi-1-${Date.now()}`, + queue: "test-multi-event-tx", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + attemptNumber: 1, + createdAt: new Date(), + updatedAt: new Date(), + }, + }); + const run2 = await tx.taskRun.create({ + data: { + friendlyId: `run_multi_event_2_${Date.now()}`, + taskIdentifier: "my-task-multi-event-2", + payload: JSON.stringify({ multi: 2 }), + payloadType: "application/json", + traceId: `multi-2-${Date.now()}`, + spanId: `multi-2-${Date.now()}`, + queue: "test-multi-event-tx", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + attemptNumber: 1, + createdAt: new Date(), + updatedAt: new Date(), + }, + }); + await tx.taskRun.update({ + where: { id: run1.id }, + data: { status: "COMPLETED_SUCCESSFULLY" }, + }); + + return [run1, run2]; + }); + + // Wait for replication + await setTimeout(1000); + + // Query ClickHouse for both runs using FINAL + const queryRuns = clickhouse.reader.query({ + name: "runs-replication-multi-event-tx", + query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id IN ({run_id_1:String}, {run_id_2:String})`, + schema: z.any(), + params: z.object({ run_id_1: z.string(), run_id_2: z.string() }), + }); + + const [queryError, result] = await queryRuns({ run_id_1: run1.id, run_id_2: run2.id }); + expect(queryError).toBeNull(); + expect(result?.length).toBe(2); + const run1Result = result?.find((r: any) => r.run_id === run1.id); + const run2Result = result?.find((r: any) => r.run_id === run2.id); + expect(run1Result).toBeDefined(); + expect(run1Result).toEqual( + expect.objectContaining({ run_id: run1.id, status: "COMPLETED_SUCCESSFULLY" }) + ); + expect(run2Result).toBeDefined(); + expect(run2Result).toEqual(expect.objectContaining({ run_id: run2.id })); + + await runsReplicationService.stop(); + } + ); +}); diff --git a/apps/webapp/test/runsReplicationService.part6.test.ts b/apps/webapp/test/runsReplicationService.part6.test.ts new file mode 100644 index 00000000000..276920f8491 --- /dev/null +++ b/apps/webapp/test/runsReplicationService.part6.test.ts @@ -0,0 +1,536 @@ +import { ClickHouse, getTaskRunField, getPayloadField } from "@internal/clickhouse"; +import { replicationContainerTest } from "@internal/testcontainers"; +import { Logger } from "@trigger.dev/core/logger"; +import { readFile } from "node:fs/promises"; +import { setTimeout } from "node:timers/promises"; +import { z } from "zod"; +import { RunsReplicationService } from "~/services/runsReplicationService.server"; +import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings"; +import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory"; + +vi.setConfig({ testTimeout: 60_000 }); + +describe("RunsReplicationService (part 6/7)", () => { + replicationContainerTest( + "should sort batch inserts according to table schema ordering for optimal performance", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public.\"TaskRun\" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication-sorting", + logLevel: "warn", + }); + + const runsReplicationService = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-sorting", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 10, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + // Listen to batchFlushed events to verify sorting + const batchFlushedEvents: Array<{ + flushId: string; + taskRunInserts: any[]; + payloadInserts: any[]; + }> = []; + + runsReplicationService.events.on("batchFlushed", (event) => { + batchFlushedEvents.push(event); + }); + + await runsReplicationService.start(); + + // Create two organizations to test sorting by organization_id + const org1 = await prisma.organization.create({ + data: { title: "org-z", slug: "org-z" }, + }); + + const org2 = await prisma.organization.create({ + data: { title: "org-a", slug: "org-a" }, + }); + + const project1 = await prisma.project.create({ + data: { + name: "test-sorting-z", + slug: "test-sorting-z", + organizationId: org1.id, + externalRef: "test-sorting-z", + }, + }); + + const project2 = await prisma.project.create({ + data: { + name: "test-sorting-a", + slug: "test-sorting-a", + organizationId: org2.id, + externalRef: "test-sorting-a", + }, + }); + + const env1 = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-sorting-z", + type: "DEVELOPMENT", + projectId: project1.id, + organizationId: org1.id, + apiKey: "test-sorting-z", + pkApiKey: "test-sorting-z", + shortcode: "test-sorting-z", + }, + }); + + const env2 = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-sorting-a", + type: "DEVELOPMENT", + projectId: project2.id, + organizationId: org2.id, + apiKey: "test-sorting-a", + pkApiKey: "test-sorting-a", + shortcode: "test-sorting-a", + }, + }); + + const now = Date.now(); + + const run1 = await prisma.taskRun.create({ + data: { + friendlyId: `run_sort_org_z_${now}`, + taskIdentifier: "my-task-sort", + payload: JSON.stringify({ org: "z" }), + payloadType: "application/json", + traceId: `sort-z-${now}`, + spanId: `sort-z-${now}`, + queue: "test-sorting", + runtimeEnvironmentId: env1.id, + projectId: project1.id, + organizationId: org1.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + createdAt: new Date(now + 2000), + }, + }); + await prisma.taskRun.update({ + where: { id: run1.id }, + data: { status: "DEQUEUED" }, + }); + + await prisma.taskRun.create({ + data: { + friendlyId: `run_sort_org_a_${now}`, + taskIdentifier: "my-task-sort", + payload: JSON.stringify({ org: "a" }), + payloadType: "application/json", + traceId: `sort-a-${now}`, + spanId: `sort-a-${now}`, + queue: "test-sorting", + runtimeEnvironmentId: env2.id, + projectId: project2.id, + organizationId: org2.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + createdAt: new Date(now + 1000), + }, + }); + + await prisma.taskRun.create({ + data: { + friendlyId: `run_sort_org_a_${now}_2`, + taskIdentifier: "my-task-sort", + payload: JSON.stringify({ org: "a" }), + payloadType: "application/json", + traceId: `sort-a-${now}`, + spanId: `sort-a-${now}`, + queue: "test-sorting", + runtimeEnvironmentId: env2.id, + projectId: project2.id, + organizationId: org2.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + createdAt: new Date(now), + }, + }); + + await setTimeout(1000); + + expect(batchFlushedEvents[0]?.taskRunInserts.length).toBeGreaterThan(1); + expect(batchFlushedEvents[0]?.payloadInserts.length).toBeGreaterThan(1); + + // Verify sorting order: organization_id, project_id, environment_id, created_at, run_id + for (let i = 1; i < batchFlushedEvents[0]?.taskRunInserts.length; i++) { + const prev = batchFlushedEvents[0]!.taskRunInserts[i - 1]; + const curr = batchFlushedEvents[0]!.taskRunInserts[i]; + + const prevKey = [ + getTaskRunField(prev, "organization_id"), + getTaskRunField(prev, "project_id"), + getTaskRunField(prev, "environment_id"), + getTaskRunField(prev, "created_at"), + getTaskRunField(prev, "run_id"), + ]; + const currKey = [ + getTaskRunField(curr, "organization_id"), + getTaskRunField(curr, "project_id"), + getTaskRunField(curr, "environment_id"), + getTaskRunField(curr, "created_at"), + getTaskRunField(curr, "run_id"), + ]; + + const keysAreEqual = prevKey.every((val, idx) => val === currKey[idx]); + if (keysAreEqual) { + // Also valid order + continue; + } + + // Compare tuples lexicographically + let isCorrectOrder = false; + for (let j = 0; j < prevKey.length; j++) { + if (prevKey[j] < currKey[j]) { + isCorrectOrder = true; + break; + } + if (prevKey[j] > currKey[j]) { + isCorrectOrder = false; + break; + } + // If equal, continue to next field + } + + expect(isCorrectOrder).toBeTruthy(); + } + + // Verify payloadInserts are also sorted by run_id + for (let i = 1; i < batchFlushedEvents[0]?.payloadInserts.length; i++) { + const prev = batchFlushedEvents[0]!.payloadInserts[i - 1]; + const curr = batchFlushedEvents[0]!.payloadInserts[i]; + expect(getPayloadField(prev, "run_id") <= getPayloadField(curr, "run_id")).toBeTruthy(); + } + + await runsReplicationService.stop(); + } + ); + + replicationContainerTest( + "should exhaustively replicate all TaskRun columns to ClickHouse", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication-exhaustive", + logLevel: "warn", + }); + + const runsReplicationService = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-exhaustive", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 1, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + await runsReplicationService.start(); + + const organization = await prisma.organization.create({ + data: { + title: "test-exhaustive", + slug: "test-exhaustive", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-exhaustive", + slug: "test-exhaustive", + organizationId: organization.id, + externalRef: "test-exhaustive", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-exhaustive", + type: "PRODUCTION", + projectId: project.id, + organizationId: organization.id, + apiKey: "test-exhaustive", + pkApiKey: "test-exhaustive", + shortcode: "test-exhaustive", + }, + }); + + // Create a batch for the batchId field + const batch = await prisma.batchTaskRun.create({ + data: { + friendlyId: "batch_exhaustive", + runtimeEnvironmentId: runtimeEnvironment.id, + status: "PENDING", + }, + }); + + // Create a root run for the rootTaskRunId field + const rootRun = await prisma.taskRun.create({ + data: { + friendlyId: "run_root_exhaustive", + taskIdentifier: "root-task", + payload: JSON.stringify({ root: true }), + traceId: "root-trace-id", + spanId: "root-span-id", + queue: "root-queue", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "PRODUCTION", + engine: "V2", + }, + }); + + // Create a parent run for the parentTaskRunId field + const parentRun = await prisma.taskRun.create({ + data: { + friendlyId: "run_parent_exhaustive", + taskIdentifier: "parent-task", + payload: JSON.stringify({ parent: true }), + traceId: "parent-trace-id", + spanId: "parent-span-id", + queue: "parent-queue", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "PRODUCTION", + engine: "V2", + rootTaskRunId: rootRun.id, + depth: 1, + }, + }); + + // Set up all the dates we'll use + const now = new Date(); + const createdAt = new Date(now.getTime() - 10000); + const updatedAt = new Date(now.getTime() - 5000); + const startedAt = new Date(now.getTime() - 8000); + const executedAt = new Date(now.getTime() - 7500); + const completedAt = new Date(now.getTime() - 6000); + const delayUntil = new Date(now.getTime() - 9000); + const queuedAt = new Date(now.getTime() - 9500); + const expiredAt = null; // Not expired + + // Create the main task run with ALL fields populated + const taskRun = await prisma.taskRun.create({ + data: { + // Core identifiers + friendlyId: "run_exhaustive_test", + taskIdentifier: "exhaustive-task", + + // Environment/project/org + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "PRODUCTION", + + // Engine and execution + engine: "V2", + status: "COMPLETED_SUCCESSFULLY", + attemptNumber: 3, + queue: "exhaustive-queue", + workerQueue: "exhaustive-worker-queue", + + // Relationships + // Note: scheduleId is not set to test empty string handling + batchId: batch.id, + rootTaskRunId: rootRun.id, + parentTaskRunId: parentRun.id, + depth: 2, + + // Timestamps + createdAt, + updatedAt, + startedAt, + executedAt, + completedAt, + delayUntil, + queuedAt, + expiredAt, + + // Payload and output + payload: JSON.stringify({ input: "test-payload" }), + payloadType: "application/json", + output: JSON.stringify({ result: "test-output" }), + outputType: "application/json", + error: { message: "test error", name: "TestError" }, + + // Tracing + traceId: "exhaustive-trace-id-12345", + spanId: "exhaustive-span-id-67890", + + // Versioning + taskVersion: "1.2.3", + sdkVersion: "3.0.0", + cliVersion: "2.5.1", + + // Execution settings + machinePreset: "large-1x", + idempotencyKey: "exhaustive-idempotency-key-hashed", + idempotencyKeyOptions: { + key: "exhaustive-idempotency-key", + scope: "run", + }, + ttl: "1h", + isTest: true, + concurrencyKey: "exhaustive-concurrency-key", + maxDurationInSeconds: 3600, + + // Tags and bulk actions + runTags: ["tag1", "tag2", "exhaustive-tag"], + bulkActionGroupIds: ["bulk-group-1", "bulk-group-2"], + + // Usage metrics + usageDurationMs: 12345, + costInCents: 50, + baseCostInCents: 25, + }, + }); + + // Wait for replication + await setTimeout(1500); + + // Query ClickHouse directly to get all columns + const queryRuns = clickhouse.reader.query({ + name: "exhaustive-replication-test", + query: "SELECT * FROM trigger_dev.task_runs_v2 FINAL WHERE run_id = {run_id:String}", + schema: z.any(), + params: z.object({ run_id: z.string() }), + }); + + const [queryError, result] = await queryRuns({ run_id: taskRun.id }); + + expect(queryError).toBeNull(); + expect(result).toHaveLength(1); + + const clickhouseRun = result![0]; + + // Exhaustively verify each column + // Core identifiers + expect(clickhouseRun.run_id).toBe(taskRun.id); + expect(clickhouseRun.friendly_id).toBe("run_exhaustive_test"); + expect(clickhouseRun.task_identifier).toBe("exhaustive-task"); + + // Environment/project/org + expect(clickhouseRun.environment_id).toBe(runtimeEnvironment.id); + expect(clickhouseRun.project_id).toBe(project.id); + expect(clickhouseRun.organization_id).toBe(organization.id); + expect(clickhouseRun.environment_type).toBe("PRODUCTION"); + + // Engine and execution + expect(clickhouseRun.engine).toBe("V2"); + expect(clickhouseRun.status).toBe("COMPLETED_SUCCESSFULLY"); + expect(clickhouseRun.attempt).toBe(3); + expect(clickhouseRun.queue).toBe("exhaustive-queue"); + expect(clickhouseRun.worker_queue).toBe("exhaustive-worker-queue"); + + // Relationships + expect(clickhouseRun.schedule_id).toBe(""); // Empty when not set + expect(clickhouseRun.batch_id).toBe(batch.id); + expect(clickhouseRun.root_run_id).toBe(rootRun.id); + expect(clickhouseRun.parent_run_id).toBe(parentRun.id); + expect(clickhouseRun.depth).toBe(2); + + // Timestamps (ClickHouse returns DateTime64 as strings in UTC without 'Z' suffix) + // Helper to parse ClickHouse timestamp strings to milliseconds + function parseClickhouseTimestamp(ts: string | null): number | null { + if (ts === null || ts === "1970-01-01 00:00:00.000") return null; + return new Date(ts + "Z").getTime(); + } + + expect(parseClickhouseTimestamp(clickhouseRun.created_at)).toBe(createdAt.getTime()); + expect(parseClickhouseTimestamp(clickhouseRun.updated_at)).toBe(updatedAt.getTime()); + expect(parseClickhouseTimestamp(clickhouseRun.started_at)).toBe(startedAt.getTime()); + expect(parseClickhouseTimestamp(clickhouseRun.executed_at)).toBe(executedAt.getTime()); + expect(parseClickhouseTimestamp(clickhouseRun.completed_at)).toBe(completedAt.getTime()); + expect(parseClickhouseTimestamp(clickhouseRun.delay_until)).toBe(delayUntil.getTime()); + expect(parseClickhouseTimestamp(clickhouseRun.queued_at)).toBe(queuedAt.getTime()); + expect(parseClickhouseTimestamp(clickhouseRun.expired_at)).toBeNull(); + + // Output (parsed JSON) + expect(clickhouseRun.output).toEqual({ data: { result: "test-output" } }); + + // Error + expect(clickhouseRun.error).toEqual({ + data: { message: "test error", name: "TestError" }, + }); + + // Tracing + expect(clickhouseRun.trace_id).toBe("exhaustive-trace-id-12345"); + expect(clickhouseRun.span_id).toBe("exhaustive-span-id-67890"); + + // Versioning + expect(clickhouseRun.task_version).toBe("1.2.3"); + expect(clickhouseRun.sdk_version).toBe("3.0.0"); + expect(clickhouseRun.cli_version).toBe("2.5.1"); + + // Execution settings + expect(clickhouseRun.machine_preset).toBe("large-1x"); + expect(clickhouseRun.idempotency_key).toBe("exhaustive-idempotency-key-hashed"); + expect(clickhouseRun.idempotency_key_user).toBe("exhaustive-idempotency-key"); + expect(clickhouseRun.idempotency_key_scope).toBe("run"); + expect(clickhouseRun.expiration_ttl).toBe("1h"); + expect(clickhouseRun.is_test).toBe(1); // ClickHouse returns booleans as integers + expect(clickhouseRun.concurrency_key).toBe("exhaustive-concurrency-key"); + expect(clickhouseRun.max_duration_in_seconds).toBe(3600); + + // Tags and bulk actions + expect(clickhouseRun.tags).toEqual(["tag1", "tag2", "exhaustive-tag"]); + expect(clickhouseRun.bulk_action_group_ids).toEqual(["bulk-group-1", "bulk-group-2"]); + + // Usage metrics + expect(clickhouseRun.usage_duration_ms).toBe(12345); + expect(clickhouseRun.cost_in_cents).toBe(50); + expect(clickhouseRun.base_cost_in_cents).toBe(25); + + // Internal ClickHouse columns + expect(clickhouseRun._is_deleted).toBe(0); + expect(clickhouseRun._version).toBeDefined(); + expect(typeof clickhouseRun._version).toBe("number"); // ClickHouse returns UInt64 as number + + // Also verify the payload was inserted into the payloads table + const queryPayloads = clickhouse.reader.query({ + name: "exhaustive-payload-test", + query: "SELECT * FROM trigger_dev.raw_task_runs_payload_v1 WHERE run_id = {run_id:String}", + schema: z.any(), + params: z.object({ run_id: z.string() }), + }); + + const [payloadError, payloadResult] = await queryPayloads({ run_id: taskRun.id }); + + expect(payloadError).toBeNull(); + expect(payloadResult).toHaveLength(1); + expect(payloadResult![0].run_id).toBe(taskRun.id); + expect(parseClickhouseTimestamp(payloadResult![0].created_at)).toBe(createdAt.getTime()); + expect(payloadResult![0].payload).toEqual({ data: { input: "test-payload" } }); + + await runsReplicationService.stop(); + } + ); +}); diff --git a/apps/webapp/test/runsReplicationService.part7.test.ts b/apps/webapp/test/runsReplicationService.part7.test.ts new file mode 100644 index 00000000000..4f091d8eb4c --- /dev/null +++ b/apps/webapp/test/runsReplicationService.part7.test.ts @@ -0,0 +1,120 @@ +import { ClickHouse, getTaskRunField, getPayloadField } from "@internal/clickhouse"; +import { replicationContainerTest } from "@internal/testcontainers"; +import { Logger } from "@trigger.dev/core/logger"; +import { readFile } from "node:fs/promises"; +import { setTimeout } from "node:timers/promises"; +import { z } from "zod"; +import { RunsReplicationService } from "~/services/runsReplicationService.server"; +import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings"; +import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickhouseFactory"; + +vi.setConfig({ testTimeout: 60_000 }); + +describe("RunsReplicationService (part 7/7)", () => { + replicationContainerTest( + "should be able to handle processing transactions for a long period of time", + { timeout: 60_000 * 5 }, + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + await prisma.$executeRawUnsafe(`ALTER TABLE public."TaskRun" REPLICA IDENTITY FULL;`); + + const clickhouse = new ClickHouse({ + url: clickhouseContainer.getConnectionUrl(), + name: "runs-replication-long-tx", + logLevel: "warn", + }); + + const runsReplicationService = new RunsReplicationService({ + clickhouseFactory: new TestReplicationClickhouseFactory(clickhouse), + pgConnectionUrl: postgresContainer.getConnectionUri(), + serviceName: "runs-replication-long-tx", + slotName: "task_runs_to_clickhouse_v1", + publicationName: "task_runs_to_clickhouse_v1_publication", + redisOptions, + maxFlushConcurrency: 1, + flushIntervalMs: 100, + flushBatchSize: 10, + leaderLockTimeoutMs: 5000, + leaderLockExtendIntervalMs: 1000, + ackIntervalSeconds: 5, + logLevel: "warn", + }); + + await runsReplicationService.start(); + + const organization = await prisma.organization.create({ + data: { + title: "test-long-tx", + slug: "test-long-tx", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test-long-tx", + slug: "test-long-tx", + organizationId: organization.id, + externalRef: "test-long-tx", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test-long-tx", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test-long-tx", + pkApiKey: "test-long-tx", + shortcode: "test-long-tx", + }, + }); + + // Start an interval that will create a new run every 500ms for 4 minutes + const interval = setInterval(async () => { + await prisma.taskRun.create({ + data: { + friendlyId: `run_long_tx_${Date.now()}`, + taskIdentifier: "my-task-long-tx", + payload: JSON.stringify({ long: 1 }), + payloadType: "application/json", + traceId: `long-${Date.now()}`, + spanId: `long-${Date.now()}`, + queue: "test-long-tx", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + status: "PENDING", + attemptNumber: 1, + createdAt: new Date(), + updatedAt: new Date(), + }, + }); + }, 500); + + // Wait for 1 minute + await setTimeout(1 * 60 * 1000); + + // Stop the interval + clearInterval(interval); + + // Wait for replication + await setTimeout(1000); + + // Query ClickHouse for all runs using FINAL + const queryRuns = clickhouse.reader.query({ + name: "runs-replication-long-tx", + query: `SELECT * FROM trigger_dev.task_runs_v2 FINAL`, + schema: z.any(), + }); + + const [queryError, result] = await queryRuns({}); + expect(queryError).toBeNull(); + + expect(result?.length).toBeGreaterThanOrEqual(50); + + await runsReplicationService.stop(); + } + ); +}); diff --git a/apps/webapp/test/runsRepository.part1.test.ts b/apps/webapp/test/runsRepository.part1.test.ts index 45d91ad44e7..e33f4464db3 100644 --- a/apps/webapp/test/runsRepository.part1.test.ts +++ b/apps/webapp/test/runsRepository.part1.test.ts @@ -6,15 +6,15 @@ vi.mock("~/db.server", () => ({ $replica: {}, })); -import { containerTest } from "@internal/testcontainers"; +import { replicationContainerTest } from "@internal/testcontainers"; import { setTimeout } from "node:timers/promises"; import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; import { setupClickhouseReplication } from "./utils/replicationUtils"; vi.setConfig({ testTimeout: 60_000 }); -describe("RunsRepository (part 1/2)", () => { - containerTest( +describe("RunsRepository (part 1/4)", () => { + replicationContainerTest( "should list runs, using clickhouse as the source", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { const { clickhouse } = await setupClickhouseReplication({ @@ -90,7 +90,7 @@ describe("RunsRepository (part 1/2)", () => { } ); - containerTest( + replicationContainerTest( "should filter runs by task identifiers", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { const { clickhouse } = await setupClickhouseReplication({ @@ -198,7 +198,7 @@ describe("RunsRepository (part 1/2)", () => { } ); - containerTest( + replicationContainerTest( "should filter runs by task versions", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { const { clickhouse } = await setupClickhouseReplication({ @@ -309,7 +309,7 @@ describe("RunsRepository (part 1/2)", () => { } ); - containerTest( + replicationContainerTest( "should filter runs by status", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { const { clickhouse } = await setupClickhouseReplication({ @@ -419,331 +419,4 @@ describe("RunsRepository (part 1/2)", () => { expect(runs.map((r) => r.status).sort()).toEqual(["COMPLETED_SUCCESSFULLY", "PENDING"]); } ); - - containerTest( - "should filter runs by tags", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - const { clickhouse } = await setupClickhouseReplication({ - prisma, - databaseUrl: postgresContainer.getConnectionUri(), - clickhouseUrl: clickhouseContainer.getConnectionUrl(), - redisOptions, - }); - - const organization = await prisma.organization.create({ - data: { - title: "test", - slug: "test", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test", - slug: "test", - organizationId: organization.id, - externalRef: "test", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test", - pkApiKey: "test", - shortcode: "test", - }, - }); - - // Create runs with different tags - const taskRun1 = await prisma.taskRun.create({ - data: { - friendlyId: "run_urgent", - taskIdentifier: "my-task", - runTags: ["urgent", "production"], - payload: JSON.stringify({ foo: "bar" }), - traceId: "1234", - spanId: "1234", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - const taskRun2 = await prisma.taskRun.create({ - data: { - friendlyId: "run_regular", - taskIdentifier: "my-task", - runTags: ["regular", "development"], - payload: JSON.stringify({ foo: "bar" }), - traceId: "1235", - spanId: "1235", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - const taskRun3 = await prisma.taskRun.create({ - data: { - friendlyId: "run_urgent_dev", - taskIdentifier: "my-task", - runTags: ["urgent", "development"], - payload: JSON.stringify({ foo: "bar" }), - traceId: "1236", - spanId: "1236", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await setTimeout(1000); - - const runsRepository = new RunsRepository({ - prisma, - clickhouse, - }); - - // Test filtering by tags - const { runs } = await runsRepository.listRuns({ - page: { size: 10 }, - projectId: project.id, - environmentId: runtimeEnvironment.id, - organizationId: organization.id, - tags: ["urgent"], - }); - - expect(runs).toHaveLength(2); - expect(runs.map((r) => r.friendlyId).sort()).toEqual(["run_urgent", "run_urgent_dev"]); - } - ); - - containerTest( - "should filter runs by scheduleId", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - const { clickhouse } = await setupClickhouseReplication({ - prisma, - databaseUrl: postgresContainer.getConnectionUri(), - clickhouseUrl: clickhouseContainer.getConnectionUrl(), - redisOptions, - }); - - const organization = await prisma.organization.create({ - data: { - title: "test", - slug: "test", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test", - slug: "test", - organizationId: organization.id, - externalRef: "test", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test", - pkApiKey: "test", - shortcode: "test", - }, - }); - - // Create runs with different schedule IDs - await prisma.taskRun.create({ - data: { - friendlyId: "run_scheduled_1", - taskIdentifier: "my-task", - scheduleId: "schedule_1", - payload: JSON.stringify({ foo: "bar" }), - traceId: "1234", - spanId: "1234", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await prisma.taskRun.create({ - data: { - friendlyId: "run_scheduled_2", - taskIdentifier: "my-task", - scheduleId: "schedule_2", - payload: JSON.stringify({ foo: "bar" }), - traceId: "1235", - spanId: "1235", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await prisma.taskRun.create({ - data: { - friendlyId: "run_unscheduled", - taskIdentifier: "my-task", - payload: JSON.stringify({ foo: "bar" }), - traceId: "1236", - spanId: "1236", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await setTimeout(1000); - - const runsRepository = new RunsRepository({ - prisma, - clickhouse, - }); - - // Test filtering by schedule ID - const { runs } = await runsRepository.listRuns({ - page: { size: 10 }, - projectId: project.id, - environmentId: runtimeEnvironment.id, - organizationId: organization.id, - scheduleId: "schedule_1", - }); - - expect(runs).toHaveLength(1); - expect(runs[0].friendlyId).toBe("run_scheduled_1"); - } - ); - - containerTest( - "should filter runs by isTest flag", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - const { clickhouse } = await setupClickhouseReplication({ - prisma, - databaseUrl: postgresContainer.getConnectionUri(), - clickhouseUrl: clickhouseContainer.getConnectionUrl(), - redisOptions, - }); - - const organization = await prisma.organization.create({ - data: { - title: "test", - slug: "test", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test", - slug: "test", - organizationId: organization.id, - externalRef: "test", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test", - pkApiKey: "test", - shortcode: "test", - }, - }); - - // Create test and non-test runs - await prisma.taskRun.create({ - data: { - friendlyId: "run_test", - taskIdentifier: "my-task", - isTest: true, - payload: JSON.stringify({ foo: "bar" }), - traceId: "1234", - spanId: "1234", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await prisma.taskRun.create({ - data: { - friendlyId: "run_production", - taskIdentifier: "my-task", - isTest: false, - payload: JSON.stringify({ foo: "bar" }), - traceId: "1235", - spanId: "1235", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await setTimeout(1000); - - const runsRepository = new RunsRepository({ - prisma, - clickhouse, - }); - - // Test filtering by isTest=true - const testRuns = await runsRepository.listRuns({ - page: { size: 10 }, - projectId: project.id, - environmentId: runtimeEnvironment.id, - organizationId: organization.id, - isTest: true, - }); - - expect(testRuns.runs).toHaveLength(1); - expect(testRuns.runs[0].friendlyId).toBe("run_test"); - - // Test filtering by isTest=false - const productionRuns = await runsRepository.listRuns({ - page: { size: 10 }, - projectId: project.id, - environmentId: runtimeEnvironment.id, - organizationId: organization.id, - isTest: false, - }); - - expect(productionRuns.runs).toHaveLength(1); - expect(productionRuns.runs[0].friendlyId).toBe("run_production"); - } - ); -}); \ No newline at end of file +}); diff --git a/apps/webapp/test/runsRepository.part2.test.ts b/apps/webapp/test/runsRepository.part2.test.ts index 793e19236c0..55cba6854f6 100644 --- a/apps/webapp/test/runsRepository.part2.test.ts +++ b/apps/webapp/test/runsRepository.part2.test.ts @@ -6,15 +6,15 @@ vi.mock("~/db.server", () => ({ $replica: {}, })); -import { containerTest } from "@internal/testcontainers"; +import { replicationContainerTest } from "@internal/testcontainers"; import { setTimeout } from "node:timers/promises"; import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; import { setupClickhouseReplication } from "./utils/replicationUtils"; vi.setConfig({ testTimeout: 60_000 }); -describe("RunsRepository (part 2/2)", () => { - containerTest( +describe("RunsRepository (part 2/4)", () => { + replicationContainerTest( "should filter runs by rootOnly flag", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { const { clickhouse } = await setupClickhouseReplication({ @@ -108,7 +108,7 @@ describe("RunsRepository (part 2/2)", () => { } ); - containerTest( + replicationContainerTest( "should filter runs by batchId", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { const { clickhouse } = await setupClickhouseReplication({ @@ -238,7 +238,7 @@ describe("RunsRepository (part 2/2)", () => { } ); - containerTest( + replicationContainerTest( "should filter runs by runFriendlyIds", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { const { clickhouse } = await setupClickhouseReplication({ @@ -346,7 +346,7 @@ describe("RunsRepository (part 2/2)", () => { } ); - containerTest( + replicationContainerTest( "should filter runs by runIds", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { const { clickhouse } = await setupClickhouseReplication({ @@ -453,443 +453,4 @@ describe("RunsRepository (part 2/2)", () => { expect(runs.map((r) => r.id).sort()).toEqual([run1.id, run3.id].sort()); } ); - - containerTest( - "should filter runs by date range (from/to)", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - const { clickhouse } = await setupClickhouseReplication({ - prisma, - databaseUrl: postgresContainer.getConnectionUri(), - clickhouseUrl: clickhouseContainer.getConnectionUrl(), - redisOptions, - }); - - const organization = await prisma.organization.create({ - data: { - title: "test", - slug: "test", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test", - slug: "test", - organizationId: organization.id, - externalRef: "test", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test", - pkApiKey: "test", - shortcode: "test", - }, - }); - - const now = new Date(); - const yesterday = new Date(now.getTime() - 24 * 60 * 60 * 1000); - const tomorrow = new Date(now.getTime() + 24 * 60 * 60 * 1000); - - // Create runs with different creation dates - await prisma.taskRun.create({ - data: { - friendlyId: "run_yesterday", - taskIdentifier: "my-task", - createdAt: yesterday, - payload: JSON.stringify({ foo: "bar" }), - traceId: "1234", - spanId: "1234", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await prisma.taskRun.create({ - data: { - friendlyId: "run_today", - taskIdentifier: "my-task", - createdAt: now, - payload: JSON.stringify({ foo: "bar" }), - traceId: "1235", - spanId: "1235", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await prisma.taskRun.create({ - data: { - friendlyId: "run_tomorrow", - taskIdentifier: "my-task", - createdAt: tomorrow, - payload: JSON.stringify({ foo: "bar" }), - traceId: "1236", - spanId: "1236", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await setTimeout(1000); - - const runsRepository = new RunsRepository({ - prisma, - clickhouse, - }); - - // Test filtering by date range (from yesterday to today) - const { runs } = await runsRepository.listRuns({ - page: { size: 10 }, - projectId: project.id, - environmentId: runtimeEnvironment.id, - organizationId: organization.id, - from: yesterday.getTime(), - to: now.getTime(), - }); - - expect(runs).toHaveLength(2); - expect(runs.map((r) => r.friendlyId).sort()).toEqual(["run_today", "run_yesterday"]); - } - ); - - containerTest( - "should handle multiple filters combined", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - const { clickhouse } = await setupClickhouseReplication({ - prisma, - databaseUrl: postgresContainer.getConnectionUri(), - clickhouseUrl: clickhouseContainer.getConnectionUrl(), - redisOptions, - }); - - const organization = await prisma.organization.create({ - data: { - title: "test", - slug: "test", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test", - slug: "test", - organizationId: organization.id, - externalRef: "test", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test", - pkApiKey: "test", - shortcode: "test", - }, - }); - - // Create runs with different combinations of properties - await prisma.taskRun.create({ - data: { - friendlyId: "run_match", - taskIdentifier: "task-1", - taskVersion: "1.0.0", - status: "COMPLETED_SUCCESSFULLY", - isTest: false, - runTags: ["urgent"], - payload: JSON.stringify({ foo: "bar" }), - traceId: "1234", - spanId: "1234", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await prisma.taskRun.create({ - data: { - friendlyId: "run_no_match_task", - taskIdentifier: "task-2", // Different task - taskVersion: "1.0.0", - status: "COMPLETED_SUCCESSFULLY", - isTest: false, - runTags: ["urgent"], - payload: JSON.stringify({ foo: "bar" }), - traceId: "1235", - spanId: "1235", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await prisma.taskRun.create({ - data: { - friendlyId: "run_no_match_status", - taskIdentifier: "task-1", - taskVersion: "1.0.0", - status: "PENDING", // Different status - isTest: false, - runTags: ["urgent"], - payload: JSON.stringify({ foo: "bar" }), - traceId: "1236", - spanId: "1236", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await setTimeout(1000); - - const runsRepository = new RunsRepository({ - prisma, - clickhouse, - }); - - // Test combining multiple filters - const { runs } = await runsRepository.listRuns({ - page: { size: 10 }, - projectId: project.id, - environmentId: runtimeEnvironment.id, - organizationId: organization.id, - tasks: ["task-1"], - versions: ["1.0.0"], - statuses: ["COMPLETED_SUCCESSFULLY"], - isTest: false, - tags: ["urgent"], - }); - - expect(runs).toHaveLength(1); - expect(runs[0].friendlyId).toBe("run_match"); - } - ); - - containerTest( - "should handle pagination correctly", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - const { clickhouse } = await setupClickhouseReplication({ - prisma, - databaseUrl: postgresContainer.getConnectionUri(), - clickhouseUrl: clickhouseContainer.getConnectionUrl(), - redisOptions, - }); - - const organization = await prisma.organization.create({ - data: { - title: "test", - slug: "test", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test", - slug: "test", - organizationId: organization.id, - externalRef: "test", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test", - pkApiKey: "test", - shortcode: "test", - }, - }); - - // Create multiple runs for pagination testing - const runs = []; - for (let i = 1; i <= 5; i++) { - const run = await prisma.taskRun.create({ - data: { - friendlyId: `run_${i}`, - taskIdentifier: "my-task", - payload: JSON.stringify({ foo: "bar" }), - traceId: `123${i}`, - spanId: `123${i}`, - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - runs.push(run); - } - - await setTimeout(1000); - - const runsRepository = new RunsRepository({ - prisma, - clickhouse, - }); - - // Test first page - const firstPage = await runsRepository.listRuns({ - page: { size: 2 }, - projectId: project.id, - environmentId: runtimeEnvironment.id, - organizationId: organization.id, - }); - - expect(firstPage.runs).toHaveLength(2); - expect(firstPage.pagination.nextCursor).toBeTruthy(); - expect(firstPage.pagination.previousCursor).toBe(null); - - // Test next page using cursor - const secondPage = await runsRepository.listRuns({ - page: { - size: 2, - cursor: firstPage.pagination.nextCursor!, - direction: "forward", - }, - projectId: project.id, - environmentId: runtimeEnvironment.id, - organizationId: organization.id, - }); - - expect(secondPage.runs).toHaveLength(2); - expect(secondPage.pagination.nextCursor).toBeTruthy(); - expect(secondPage.pagination.previousCursor).toBeTruthy(); - } - ); - - containerTest( - "should count new runs with listRunIds", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { - const { clickhouse } = await setupClickhouseReplication({ - prisma, - databaseUrl: postgresContainer.getConnectionUri(), - clickhouseUrl: clickhouseContainer.getConnectionUrl(), - redisOptions, - }); - - const organization = await prisma.organization.create({ - data: { - title: "test", - slug: "test", - }, - }); - - const project = await prisma.project.create({ - data: { - name: "test", - slug: "test", - organizationId: organization.id, - externalRef: "test", - }, - }); - - const runtimeEnvironment = await prisma.runtimeEnvironment.create({ - data: { - slug: "test", - type: "DEVELOPMENT", - projectId: project.id, - organizationId: organization.id, - apiKey: "test", - pkApiKey: "test", - shortcode: "test", - }, - }); - - const taskRun = await prisma.taskRun.create({ - data: { - friendlyId: "run_has_new", - taskIdentifier: "my-task", - payload: JSON.stringify({ foo: "bar" }), - traceId: "1234", - spanId: "1234", - queue: "test", - runtimeEnvironmentId: runtimeEnvironment.id, - projectId: project.id, - organizationId: organization.id, - environmentType: "DEVELOPMENT", - engine: "V2", - }, - }); - - await setTimeout(1000); - - const runsRepository = new RunsRepository({ - prisma, - clickhouse, - }); - - const baseOptions = { - projectId: project.id, - environmentId: runtimeEnvironment.id, - organizationId: organization.id, - }; - - const createdAtMs = taskRun.createdAt.getTime(); - - const newRunIdsBefore = await runsRepository.listRunIds({ - ...baseOptions, - from: createdAtMs - 1, - page: { size: 100 }, - }); - expect(newRunIdsBefore.length).toBeGreaterThanOrEqual(1); - - const newRunIdsAfter = await runsRepository.listRunIds({ - ...baseOptions, - from: createdAtMs + 60_000, - page: { size: 100 }, - }); - expect(newRunIdsAfter).toHaveLength(0); - - const fromBeforeRun = createdAtMs - 1; - - const matchingTaskIds = await runsRepository.listRunIds({ - ...baseOptions, - from: fromBeforeRun, - tasks: ["my-task"], - page: { size: 100 }, - }); - expect(matchingTaskIds.length).toBeGreaterThanOrEqual(1); - - const otherTaskIds = await runsRepository.listRunIds({ - ...baseOptions, - from: fromBeforeRun, - tasks: ["other-task"], - page: { size: 100 }, - }); - expect(otherTaskIds).toHaveLength(0); - } - ); }); diff --git a/apps/webapp/test/runsRepository.part3.test.ts b/apps/webapp/test/runsRepository.part3.test.ts new file mode 100644 index 00000000000..543ce47a018 --- /dev/null +++ b/apps/webapp/test/runsRepository.part3.test.ts @@ -0,0 +1,343 @@ +import { describe, expect, vi } from "vitest"; + +// Mock the db prisma client +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +import { replicationContainerTest } from "@internal/testcontainers"; +import { setTimeout } from "node:timers/promises"; +import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; +import { setupClickhouseReplication } from "./utils/replicationUtils"; + +vi.setConfig({ testTimeout: 60_000 }); + +describe("RunsRepository (part 3/4)", () => { + replicationContainerTest( + "should filter runs by tags", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const organization = await prisma.organization.create({ + data: { + title: "test", + slug: "test", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test", + slug: "test", + organizationId: organization.id, + externalRef: "test", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); + + // Create runs with different tags + const taskRun1 = await prisma.taskRun.create({ + data: { + friendlyId: "run_urgent", + taskIdentifier: "my-task", + runTags: ["urgent", "production"], + payload: JSON.stringify({ foo: "bar" }), + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + const taskRun2 = await prisma.taskRun.create({ + data: { + friendlyId: "run_regular", + taskIdentifier: "my-task", + runTags: ["regular", "development"], + payload: JSON.stringify({ foo: "bar" }), + traceId: "1235", + spanId: "1235", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + const taskRun3 = await prisma.taskRun.create({ + data: { + friendlyId: "run_urgent_dev", + taskIdentifier: "my-task", + runTags: ["urgent", "development"], + payload: JSON.stringify({ foo: "bar" }), + traceId: "1236", + spanId: "1236", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await setTimeout(1000); + + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + }); + + // Test filtering by tags + const { runs } = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: project.id, + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + tags: ["urgent"], + }); + + expect(runs).toHaveLength(2); + expect(runs.map((r) => r.friendlyId).sort()).toEqual(["run_urgent", "run_urgent_dev"]); + } + ); + + replicationContainerTest( + "should filter runs by scheduleId", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const organization = await prisma.organization.create({ + data: { + title: "test", + slug: "test", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test", + slug: "test", + organizationId: organization.id, + externalRef: "test", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); + + // Create runs with different schedule IDs + await prisma.taskRun.create({ + data: { + friendlyId: "run_scheduled_1", + taskIdentifier: "my-task", + scheduleId: "schedule_1", + payload: JSON.stringify({ foo: "bar" }), + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await prisma.taskRun.create({ + data: { + friendlyId: "run_scheduled_2", + taskIdentifier: "my-task", + scheduleId: "schedule_2", + payload: JSON.stringify({ foo: "bar" }), + traceId: "1235", + spanId: "1235", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await prisma.taskRun.create({ + data: { + friendlyId: "run_unscheduled", + taskIdentifier: "my-task", + payload: JSON.stringify({ foo: "bar" }), + traceId: "1236", + spanId: "1236", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await setTimeout(1000); + + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + }); + + // Test filtering by schedule ID + const { runs } = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: project.id, + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + scheduleId: "schedule_1", + }); + + expect(runs).toHaveLength(1); + expect(runs[0].friendlyId).toBe("run_scheduled_1"); + } + ); + + replicationContainerTest( + "should filter runs by isTest flag", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const organization = await prisma.organization.create({ + data: { + title: "test", + slug: "test", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test", + slug: "test", + organizationId: organization.id, + externalRef: "test", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); + + // Create test and non-test runs + await prisma.taskRun.create({ + data: { + friendlyId: "run_test", + taskIdentifier: "my-task", + isTest: true, + payload: JSON.stringify({ foo: "bar" }), + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await prisma.taskRun.create({ + data: { + friendlyId: "run_production", + taskIdentifier: "my-task", + isTest: false, + payload: JSON.stringify({ foo: "bar" }), + traceId: "1235", + spanId: "1235", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await setTimeout(1000); + + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + }); + + // Test filtering by isTest=true + const testRuns = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: project.id, + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + isTest: true, + }); + + expect(testRuns.runs).toHaveLength(1); + expect(testRuns.runs[0].friendlyId).toBe("run_test"); + + // Test filtering by isTest=false + const productionRuns = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: project.id, + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + isTest: false, + }); + + expect(productionRuns.runs).toHaveLength(1); + expect(productionRuns.runs[0].friendlyId).toBe("run_production"); + } + ); +}); diff --git a/apps/webapp/test/runsRepository.part4.test.ts b/apps/webapp/test/runsRepository.part4.test.ts new file mode 100644 index 00000000000..b79e41397ee --- /dev/null +++ b/apps/webapp/test/runsRepository.part4.test.ts @@ -0,0 +1,455 @@ +import { describe, expect, vi } from "vitest"; + +// Mock the db prisma client +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +import { replicationContainerTest } from "@internal/testcontainers"; +import { setTimeout } from "node:timers/promises"; +import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; +import { setupClickhouseReplication } from "./utils/replicationUtils"; + +vi.setConfig({ testTimeout: 60_000 }); + +describe("RunsRepository (part 4/4)", () => { + replicationContainerTest( + "should filter runs by date range (from/to)", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const organization = await prisma.organization.create({ + data: { + title: "test", + slug: "test", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test", + slug: "test", + organizationId: organization.id, + externalRef: "test", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); + + const now = new Date(); + const yesterday = new Date(now.getTime() - 24 * 60 * 60 * 1000); + const tomorrow = new Date(now.getTime() + 24 * 60 * 60 * 1000); + + // Create runs with different creation dates + await prisma.taskRun.create({ + data: { + friendlyId: "run_yesterday", + taskIdentifier: "my-task", + createdAt: yesterday, + payload: JSON.stringify({ foo: "bar" }), + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await prisma.taskRun.create({ + data: { + friendlyId: "run_today", + taskIdentifier: "my-task", + createdAt: now, + payload: JSON.stringify({ foo: "bar" }), + traceId: "1235", + spanId: "1235", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await prisma.taskRun.create({ + data: { + friendlyId: "run_tomorrow", + taskIdentifier: "my-task", + createdAt: tomorrow, + payload: JSON.stringify({ foo: "bar" }), + traceId: "1236", + spanId: "1236", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await setTimeout(1000); + + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + }); + + // Test filtering by date range (from yesterday to today) + const { runs } = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: project.id, + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + from: yesterday.getTime(), + to: now.getTime(), + }); + + expect(runs).toHaveLength(2); + expect(runs.map((r) => r.friendlyId).sort()).toEqual(["run_today", "run_yesterday"]); + } + ); + + replicationContainerTest( + "should handle multiple filters combined", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const organization = await prisma.organization.create({ + data: { + title: "test", + slug: "test", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test", + slug: "test", + organizationId: organization.id, + externalRef: "test", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); + + // Create runs with different combinations of properties + await prisma.taskRun.create({ + data: { + friendlyId: "run_match", + taskIdentifier: "task-1", + taskVersion: "1.0.0", + status: "COMPLETED_SUCCESSFULLY", + isTest: false, + runTags: ["urgent"], + payload: JSON.stringify({ foo: "bar" }), + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await prisma.taskRun.create({ + data: { + friendlyId: "run_no_match_task", + taskIdentifier: "task-2", // Different task + taskVersion: "1.0.0", + status: "COMPLETED_SUCCESSFULLY", + isTest: false, + runTags: ["urgent"], + payload: JSON.stringify({ foo: "bar" }), + traceId: "1235", + spanId: "1235", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await prisma.taskRun.create({ + data: { + friendlyId: "run_no_match_status", + taskIdentifier: "task-1", + taskVersion: "1.0.0", + status: "PENDING", // Different status + isTest: false, + runTags: ["urgent"], + payload: JSON.stringify({ foo: "bar" }), + traceId: "1236", + spanId: "1236", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await setTimeout(1000); + + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + }); + + // Test combining multiple filters + const { runs } = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: project.id, + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + tasks: ["task-1"], + versions: ["1.0.0"], + statuses: ["COMPLETED_SUCCESSFULLY"], + isTest: false, + tags: ["urgent"], + }); + + expect(runs).toHaveLength(1); + expect(runs[0].friendlyId).toBe("run_match"); + } + ); + + replicationContainerTest( + "should handle pagination correctly", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const organization = await prisma.organization.create({ + data: { + title: "test", + slug: "test", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test", + slug: "test", + organizationId: organization.id, + externalRef: "test", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); + + // Create multiple runs for pagination testing + const runs = []; + for (let i = 1; i <= 5; i++) { + const run = await prisma.taskRun.create({ + data: { + friendlyId: `run_${i}`, + taskIdentifier: "my-task", + payload: JSON.stringify({ foo: "bar" }), + traceId: `123${i}`, + spanId: `123${i}`, + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + runs.push(run); + } + + await setTimeout(1000); + + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + }); + + // Test first page + const firstPage = await runsRepository.listRuns({ + page: { size: 2 }, + projectId: project.id, + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + }); + + expect(firstPage.runs).toHaveLength(2); + expect(firstPage.pagination.nextCursor).toBeTruthy(); + expect(firstPage.pagination.previousCursor).toBe(null); + + // Test next page using cursor + const secondPage = await runsRepository.listRuns({ + page: { + size: 2, + cursor: firstPage.pagination.nextCursor!, + direction: "forward", + }, + projectId: project.id, + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + }); + + expect(secondPage.runs).toHaveLength(2); + expect(secondPage.pagination.nextCursor).toBeTruthy(); + expect(secondPage.pagination.previousCursor).toBeTruthy(); + } + ); + + replicationContainerTest( + "should count new runs with listRunIds", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const organization = await prisma.organization.create({ + data: { + title: "test", + slug: "test", + }, + }); + + const project = await prisma.project.create({ + data: { + name: "test", + slug: "test", + organizationId: organization.id, + externalRef: "test", + }, + }); + + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: "test", + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: "test", + pkApiKey: "test", + shortcode: "test", + }, + }); + + const taskRun = await prisma.taskRun.create({ + data: { + friendlyId: "run_has_new", + taskIdentifier: "my-task", + payload: JSON.stringify({ foo: "bar" }), + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); + + await setTimeout(1000); + + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + }); + + const baseOptions = { + projectId: project.id, + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + }; + + const createdAtMs = taskRun.createdAt.getTime(); + + const newRunIdsBefore = await runsRepository.listRunIds({ + ...baseOptions, + from: createdAtMs - 1, + page: { size: 100 }, + }); + expect(newRunIdsBefore.length).toBeGreaterThanOrEqual(1); + + const newRunIdsAfter = await runsRepository.listRunIds({ + ...baseOptions, + from: createdAtMs + 60_000, + page: { size: 100 }, + }); + expect(newRunIdsAfter).toHaveLength(0); + + const fromBeforeRun = createdAtMs - 1; + + const matchingTaskIds = await runsRepository.listRunIds({ + ...baseOptions, + from: fromBeforeRun, + tasks: ["my-task"], + page: { size: 100 }, + }); + expect(matchingTaskIds.length).toBeGreaterThanOrEqual(1); + + const otherTaskIds = await runsRepository.listRunIds({ + ...baseOptions, + from: fromBeforeRun, + tasks: ["other-task"], + page: { size: 100 }, + }); + expect(otherTaskIds).toHaveLength(0); + } + ); +}); diff --git a/apps/webapp/test/sessionsReplicationService.test.ts b/apps/webapp/test/sessionsReplicationService.test.ts index 8b5dfe22fe1..1d3c761e813 100644 --- a/apps/webapp/test/sessionsReplicationService.test.ts +++ b/apps/webapp/test/sessionsReplicationService.test.ts @@ -1,5 +1,5 @@ import { ClickHouse } from "@internal/clickhouse"; -import { containerTest } from "@internal/testcontainers"; +import { replicationContainerTest } from "@internal/testcontainers"; import { setTimeout } from "node:timers/promises"; import { z } from "zod"; import { SessionsReplicationService } from "~/services/sessionsReplicationService.server"; @@ -8,7 +8,7 @@ import { TestReplicationClickhouseFactory } from "./utils/testReplicationClickho vi.setConfig({ testTimeout: 60_000 }); describe("SessionsReplicationService", () => { - containerTest( + replicationContainerTest( "replicates an insert from Postgres Session → ClickHouse sessions_v1", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { // Logical replication needs full-row images for DELETE events. @@ -116,7 +116,7 @@ describe("SessionsReplicationService", () => { } ); - containerTest( + replicationContainerTest( "replicates an update (close) from Postgres → ClickHouse", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { await prisma.$executeRawUnsafe(`ALTER TABLE public."Session" REPLICA IDENTITY FULL;`); diff --git a/apps/webapp/vitest.config.ts b/apps/webapp/vitest.config.ts index 6a6b550fc64..69eb980732f 100644 --- a/apps/webapp/vitest.config.ts +++ b/apps/webapp/vitest.config.ts @@ -1,8 +1,10 @@ import { defineConfig } from "vitest/config"; +import { DurationShardingSequencer } from "@internal/testcontainers/sequencer"; import tsconfigPaths from "vite-tsconfig-paths"; export default defineConfig({ test: { + sequence: { sequencer: DurationShardingSequencer }, include: ["test/**/*.test.ts"], // *.e2e.test.ts: smoke matrix, run via vitest.e2e.config.ts. // *.e2e.full.test.ts: full auth suite, runs via vitest.e2e.full.config.ts diff --git a/internal-packages/clickhouse/vitest.config.ts b/internal-packages/clickhouse/vitest.config.ts index f3687eb4098..26c9ecebf11 100644 --- a/internal-packages/clickhouse/vitest.config.ts +++ b/internal-packages/clickhouse/vitest.config.ts @@ -1,7 +1,9 @@ import { defineConfig } from "vitest/config"; +import { DurationShardingSequencer } from "@internal/testcontainers/sequencer"; export default defineConfig({ test: { + sequence: { sequencer: DurationShardingSequencer }, include: ["**/*.test.ts"], globals: true, isolate: true, diff --git a/internal-packages/llm-model-catalog/vitest.config.ts b/internal-packages/llm-model-catalog/vitest.config.ts index 9ba46467cad..88831ee2ae5 100644 --- a/internal-packages/llm-model-catalog/vitest.config.ts +++ b/internal-packages/llm-model-catalog/vitest.config.ts @@ -1,7 +1,9 @@ import { defineConfig } from "vitest/config"; +import { DurationShardingSequencer } from "@internal/testcontainers/sequencer"; export default defineConfig({ test: { + sequence: { sequencer: DurationShardingSequencer }, include: ["**/*.test.ts"], globals: true, isolate: true, diff --git a/internal-packages/replication/vitest.config.ts b/internal-packages/replication/vitest.config.ts index f3687eb4098..26c9ecebf11 100644 --- a/internal-packages/replication/vitest.config.ts +++ b/internal-packages/replication/vitest.config.ts @@ -1,7 +1,9 @@ import { defineConfig } from "vitest/config"; +import { DurationShardingSequencer } from "@internal/testcontainers/sequencer"; export default defineConfig({ test: { + sequence: { sequencer: DurationShardingSequencer }, include: ["**/*.test.ts"], globals: true, isolate: true, diff --git a/internal-packages/run-engine/src/engine/tests/batchTriggerAndWait.test.ts b/internal-packages/run-engine/src/engine/tests/batchTriggerAndWait.test.ts index 3fe9d3348a0..a632c707390 100644 --- a/internal-packages/run-engine/src/engine/tests/batchTriggerAndWait.test.ts +++ b/internal-packages/run-engine/src/engine/tests/batchTriggerAndWait.test.ts @@ -1,4 +1,7 @@ -import { assertNonNullable, containerTest } from "@internal/testcontainers"; +import { + assertNonNullable, + containerTestWithIsolatedRedis as containerTest, +} from "@internal/testcontainers"; import { trace } from "@internal/tracing"; import { expect, describe } from "vitest"; import { RunEngine } from "../index.js"; diff --git a/internal-packages/run-engine/src/engine/tests/batchTwoPhase.test.ts b/internal-packages/run-engine/src/engine/tests/batchTwoPhase.test.ts index 6208560a56a..8471c07844b 100644 --- a/internal-packages/run-engine/src/engine/tests/batchTwoPhase.test.ts +++ b/internal-packages/run-engine/src/engine/tests/batchTwoPhase.test.ts @@ -1,4 +1,7 @@ -import { assertNonNullable, containerTest } from "@internal/testcontainers"; +import { + assertNonNullable, + containerTestWithIsolatedRedis as containerTest, +} from "@internal/testcontainers"; import { trace } from "@internal/tracing"; import { expect, describe, vi } from "vitest"; import { RunEngine } from "../index.js"; diff --git a/internal-packages/run-engine/src/engine/tests/ttl.test.ts b/internal-packages/run-engine/src/engine/tests/ttl.test.ts index e787d916f8a..13d4c55b669 100644 --- a/internal-packages/run-engine/src/engine/tests/ttl.test.ts +++ b/internal-packages/run-engine/src/engine/tests/ttl.test.ts @@ -142,177 +142,177 @@ describe("RunEngine ttl", () => { } }); - containerTest("First enqueue from trigger includes ttlExpiresAt in message", async ({ - prisma, - redisOptions, - }) => { - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + containerTest( + "First enqueue from trigger includes ttlExpiresAt in message", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - processWorkerQueueDebounceMs: 50, - masterQueueConsumersDisabled: true, - ttlSystem: { + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, pollIntervalMs: 100, - batchSize: 10, - batchMaxWaitMs: 100, }, - }, - runLock: { - redis: redisOptions, - }, - machines: { - defaultMachine: "small-1x", + queue: { + redis: redisOptions, + processWorkerQueueDebounceMs: 50, + masterQueueConsumersDisabled: true, + ttlSystem: { + pollIntervalMs: 100, + batchSize: 10, + batchMaxWaitMs: 100, + }, + }, + runLock: { + redis: redisOptions, + }, machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, }, + baseCostInCents: 0.0001, }, - baseCostInCents: 0.0001, - }, - tracer: trace.getTracer("test", "0.0.0"), - }); + tracer: trace.getTracer("test", "0.0.0"), + }); - try { - const taskIdentifier = "test-task"; - await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + try { + const taskIdentifier = "test-task"; + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_ttlmsg1", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t_ttl", - spanId: "s_ttl", - workerQueue: "main", - queue: "task/test-task", - isTest: false, - tags: [], - ttl: "1s", - }, - prisma - ); + const run = await engine.trigger( + { + number: 1, + friendlyId: "run_ttlmsg1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t_ttl", + spanId: "s_ttl", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + ttl: "1s", + }, + prisma + ); - const message = await engine.runQueue.readMessage( - authenticatedEnvironment.organization.id, - run.id - ); - assertNonNullable(message); - expect(message.ttlExpiresAt).toBeDefined(); - expect(typeof message.ttlExpiresAt).toBe("number"); - } finally { - await engine.quit(); + const message = await engine.runQueue.readMessage( + authenticatedEnvironment.organization.id, + run.id + ); + assertNonNullable(message); + expect(message.ttlExpiresAt).toBeDefined(); + expect(typeof message.ttlExpiresAt).toBe("number"); + } finally { + await engine.quit(); + } } - }); + ); - containerTest("Re-enqueue with includeTtl false does not set ttlExpiresAt", async ({ - prisma, - redisOptions, - }) => { - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + containerTest( + "Re-enqueue with includeTtl false does not set ttlExpiresAt", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - processWorkerQueueDebounceMs: 50, - masterQueueConsumersDisabled: true, - ttlSystem: { + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, pollIntervalMs: 100, - batchSize: 10, - batchMaxWaitMs: 100, }, - }, - runLock: { - redis: redisOptions, - }, - machines: { - defaultMachine: "small-1x", + queue: { + redis: redisOptions, + processWorkerQueueDebounceMs: 50, + masterQueueConsumersDisabled: true, + ttlSystem: { + pollIntervalMs: 100, + batchSize: 10, + batchMaxWaitMs: 100, + }, + }, + runLock: { + redis: redisOptions, + }, machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, }, + baseCostInCents: 0.0001, }, - baseCostInCents: 0.0001, - }, - tracer: trace.getTracer("test", "0.0.0"), - }); + tracer: trace.getTracer("test", "0.0.0"), + }); - try { - const taskIdentifier = "test-task"; - await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + try { + const taskIdentifier = "test-task"; + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_reenq01", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t_re", - spanId: "s_re", - workerQueue: "main", - queue: "task/test-task", - isTest: false, - tags: [], - ttl: "1s", - }, - prisma - ); + const run = await engine.trigger( + { + number: 1, + friendlyId: "run_reenq01", + environment: authenticatedEnvironment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t_re", + spanId: "s_re", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + ttl: "1s", + }, + prisma + ); - const messageAfterTrigger = await engine.runQueue.readMessage( - authenticatedEnvironment.organization.id, - run.id - ); - assertNonNullable(messageAfterTrigger); - expect(messageAfterTrigger.ttlExpiresAt).toBeDefined(); - - await engine.enqueueSystem.enqueueRun({ - run, - env: authenticatedEnvironment, - tx: prisma, - skipRunLock: true, - includeTtl: false, - }); + const messageAfterTrigger = await engine.runQueue.readMessage( + authenticatedEnvironment.organization.id, + run.id + ); + assertNonNullable(messageAfterTrigger); + expect(messageAfterTrigger.ttlExpiresAt).toBeDefined(); - const messageAfterReenqueue = await engine.runQueue.readMessage( - authenticatedEnvironment.organization.id, - run.id - ); - assertNonNullable(messageAfterReenqueue); - expect(messageAfterReenqueue.ttlExpiresAt).toBeUndefined(); - } finally { - await engine.quit(); + await engine.enqueueSystem.enqueueRun({ + run, + env: authenticatedEnvironment, + tx: prisma, + skipRunLock: true, + includeTtl: false, + }); + + const messageAfterReenqueue = await engine.runQueue.readMessage( + authenticatedEnvironment.organization.id, + run.id + ); + assertNonNullable(messageAfterReenqueue); + expect(messageAfterReenqueue.ttlExpiresAt).toBeUndefined(); + } finally { + await engine.quit(); + } } - }); + ); containerTest( "Re-enqueued runs are not expired by TTL once they have started", @@ -392,10 +392,7 @@ describe("RunEngine ttl", () => { // Dequeue the run — this simulates the run starting to execute, which // ZREMs its TTL set entry. - await engine.runQueue.processMasterQueueForEnvironment( - authenticatedEnvironment.id, - 10 - ); + await engine.runQueue.processMasterQueueForEnvironment(authenticatedEnvironment.id, 10); const dequeued = await engine.dequeueFromWorkerQueue({ consumerId: "test-consumer", workerQueue: "main", @@ -854,11 +851,9 @@ describe("RunEngine ttl", () => { consumerId: "test-consumer", workerQueue: "main", maxRunCount: 1, - backgroundWorkerId: ( - await prisma.backgroundWorker.findFirst({ - where: { runtimeEnvironmentId: authenticatedEnvironment.id }, - }) - )!.id, + backgroundWorkerId: (await prisma.backgroundWorker.findFirst({ + where: { runtimeEnvironmentId: authenticatedEnvironment.id }, + }))!.id, }); expect(dequeued.length).toBe(0); @@ -957,10 +952,7 @@ describe("RunEngine ttl", () => { // Manually process the master queue - the dequeue Lua script should // encounter the expired message and skip it (removing from queue sorted // sets but leaving messageKey and ttlQueueKey for TTL consumer) - await engine.runQueue.processMasterQueueForEnvironment( - authenticatedEnvironment.id, - 10 - ); + await engine.runQueue.processMasterQueueForEnvironment(authenticatedEnvironment.id, 10); // Try to dequeue from worker queue - nothing should be there since // the expired message was skipped by the Lua script @@ -976,12 +968,13 @@ describe("RunEngine ttl", () => { assertNonNullable(executionData2); expect(executionData2.run.status).toBe("PENDING"); - // Now wait for the TTL consumer to poll and expire the run - // (pollIntervalMs is 5000 for TTL scan + up to 5000ms batch maxWaitMs + processing) - await setTimeout(13_000); - - // The TTL consumer should have found and expired the run - expect(expiredEvents.length).toBe(1); + // Wait (event-driven) for the TTL consumer to poll and expire the run. pollIntervalMs is + // 5000ms here so the consumer fires only after the dequeue-skip assertions above; waitFor + // resolves as soon as the event lands instead of a fixed worst-case sleep. + await vi.waitFor(() => expect(expiredEvents.length).toBe(1), { + timeout: 15_000, + interval: 100, + }); expect(expiredEvents[0]?.run.id).toBe(run.id); // Check the run status directly from the database (the batch TTL path @@ -1006,8 +999,7 @@ describe("RunEngine ttl", () => { containerTest( "TTL expiration clears env concurrency keys with proj segment", async ({ prisma, redisOptions }) => { - const authenticatedEnvironment = - await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); const engine = new RunEngine({ prisma, @@ -1071,14 +1063,9 @@ describe("RunEngine ttl", () => { prisma ); - const queue = engine.runQueue.keys.queueKey( - authenticatedEnvironment, - "task/test-task" - ); - const envConcurrencyKey = - engine.runQueue.keys.envCurrentConcurrencyKeyFromQueue(queue); - const envDequeuedKey = - engine.runQueue.keys.envCurrentDequeuedKeyFromQueue(queue); + const queue = engine.runQueue.keys.queueKey(authenticatedEnvironment, "task/test-task"); + const envConcurrencyKey = engine.runQueue.keys.envCurrentConcurrencyKeyFromQueue(queue); + const envDequeuedKey = engine.runQueue.keys.envCurrentDequeuedKeyFromQueue(queue); await engine.runQueue.redis.sadd(envConcurrencyKey, run.id); await engine.runQueue.redis.sadd(envDequeuedKey, run.id); @@ -1089,28 +1076,26 @@ describe("RunEngine ttl", () => { expect(concurrencyBefore).toContain(run.id); await setTimeout(1_500); - await engine.runQueue.processMasterQueueForEnvironment( - authenticatedEnvironment.id, - 10 + await engine.runQueue.processMasterQueueForEnvironment(authenticatedEnvironment.id, 10); + // Wait (event-driven) for the TTL consumer to expire the run; resolves as soon as the DB + // reflects EXPIRED instead of a fixed worst-case sleep (pollIntervalMs is 5000ms here). + await vi.waitFor( + async () => { + const expiredRun = await prisma.taskRun.findUnique({ + where: { id: run.id }, + select: { status: true }, + }); + expect(expiredRun?.status).toBe("EXPIRED"); + }, + { timeout: 15_000, interval: 200 } ); - // Wait for TTL scan (5000ms) + batch maxWaitMs (5000ms) + processing buffer - await setTimeout(13_000); - - const expiredRun = await prisma.taskRun.findUnique({ - where: { id: run.id }, - select: { status: true }, - }); - expect(expiredRun?.status).toBe("EXPIRED"); const concurrencyAfter = await engine.runQueue.getCurrentConcurrencyOfEnvironment( authenticatedEnvironment ); expect(concurrencyAfter).not.toContain(run.id); - const stillInDequeued = await engine.runQueue.redis.sismember( - envDequeuedKey, - run.id - ); + const stillInDequeued = await engine.runQueue.redis.sismember(envDequeuedKey, run.id); expect(stillInDequeued).toBe(0); } finally { await engine.quit(); @@ -1215,10 +1200,7 @@ describe("RunEngine ttl", () => { // Manually process the master queue - the Lua script should skip the // expired message and dequeue only the non-expired one to the worker queue - await engine.runQueue.processMasterQueueForEnvironment( - authenticatedEnvironment.id, - 10 - ); + await engine.runQueue.processMasterQueueForEnvironment(authenticatedEnvironment.id, 10); // Dequeue from worker queue - only the non-expired run should be there const dequeued = await engine.dequeueFromWorkerQueue({ @@ -1238,95 +1220,92 @@ describe("RunEngine ttl", () => { } ); - containerTest( - "expireRunsBatch skips runs that are locked", - async ({ prisma, redisOptions }) => { - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + containerTest("expireRunsBatch skips runs that are locked", async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - processWorkerQueueDebounceMs: 50, - masterQueueConsumersDisabled: true, - ttlSystem: { - disabled: true, // We'll manually test the batch function - }, - }, - runLock: { - redis: redisOptions, + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + processWorkerQueueDebounceMs: 50, + masterQueueConsumersDisabled: true, + ttlSystem: { + disabled: true, // We'll manually test the batch function }, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", machines: { - defaultMachine: "small-1x", - machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, - }, + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, }, - baseCostInCents: 0.0001, }, - tracer: trace.getTracer("test", "0.0.0"), - }); + baseCostInCents: 0.0001, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); - try { - const taskIdentifier = "test-task"; + try { + const taskIdentifier = "test-task"; - await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); - // Trigger a run with TTL - const run = await engine.trigger( - { - number: 1, - friendlyId: "run_l1234", - environment: authenticatedEnvironment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - context: {}, - traceContext: {}, - traceId: "t1", - spanId: "s1", - workerQueue: "main", - queue: "task/test-task", - isTest: false, - tags: [], - ttl: "1s", - }, - prisma - ); + // Trigger a run with TTL + const run = await engine.trigger( + { + number: 1, + friendlyId: "run_l1234", + environment: authenticatedEnvironment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t1", + spanId: "s1", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + ttl: "1s", + }, + prisma + ); - // Manually lock the run (simulating it being about to execute) - await prisma.taskRun.update({ - where: { id: run.id }, - data: { lockedAt: new Date() }, - }); + // Manually lock the run (simulating it being about to execute) + await prisma.taskRun.update({ + where: { id: run.id }, + data: { lockedAt: new Date() }, + }); - // Try to expire the run via batch - const result = await engine.ttlSystem.expireRunsBatch([run.id]); + // Try to expire the run via batch + const result = await engine.ttlSystem.expireRunsBatch([run.id]); - // Should be skipped because it's locked - expect(result.expired.length).toBe(0); - expect(result.skipped.length).toBe(1); - expect(result.skipped[0]?.reason).toBe("locked"); + // Should be skipped because it's locked + expect(result.expired.length).toBe(0); + expect(result.skipped.length).toBe(1); + expect(result.skipped[0]?.reason).toBe("locked"); - // Run should still be PENDING - const executionData = await engine.getRunExecutionData({ runId: run.id }); - assertNonNullable(executionData); - expect(executionData.run.status).toBe("PENDING"); - } finally { - await engine.quit(); - } + // Run should still be PENDING + const executionData = await engine.getRunExecutionData({ runId: run.id }); + assertNonNullable(executionData); + expect(executionData.run.status).toBe("PENDING"); + } finally { + await engine.quit(); } - ); + }); containerTest( "expireRunsBatch skips runs with non-PENDING status", @@ -1417,58 +1396,55 @@ describe("RunEngine ttl", () => { } ); - containerTest( - "expireRunsBatch handles non-existent runs", - async ({ prisma, redisOptions }) => { - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + containerTest("expireRunsBatch handles non-existent runs", async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - processWorkerQueueDebounceMs: 50, - masterQueueConsumersDisabled: true, - ttlSystem: { - disabled: true, - }, - }, - runLock: { - redis: redisOptions, + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + processWorkerQueueDebounceMs: 50, + masterQueueConsumersDisabled: true, + ttlSystem: { + disabled: true, }, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", machines: { - defaultMachine: "small-1x", - machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, - }, + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, }, - baseCostInCents: 0.0001, }, - tracer: trace.getTracer("test", "0.0.0"), - }); + baseCostInCents: 0.0001, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); - try { - // Try to expire a non-existent run - const result = await engine.ttlSystem.expireRunsBatch(["non_existent_run_id"]); + try { + // Try to expire a non-existent run + const result = await engine.ttlSystem.expireRunsBatch(["non_existent_run_id"]); - // Should be skipped as not found - expect(result.expired.length).toBe(0); - expect(result.skipped.length).toBe(1); - expect(result.skipped[0]?.reason).toBe("not_found"); - } finally { - await engine.quit(); - } + // Should be skipped as not found + expect(result.expired.length).toBe(0); + expect(result.skipped.length).toBe(1); + expect(result.skipped[0]?.reason).toBe("not_found"); + } finally { + await engine.quit(); } - ); + }); containerTest( "TTL-expired child run completes waitpoint and resumes parent", @@ -1639,54 +1615,51 @@ describe("RunEngine ttl", () => { } ); - containerTest( - "expireRunsBatch handles empty array", - async ({ prisma, redisOptions }) => { - const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + containerTest("expireRunsBatch handles empty array", async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - const engine = new RunEngine({ - prisma, - worker: { - redis: redisOptions, - workers: 1, - tasksPerWorker: 10, - pollIntervalMs: 100, - }, - queue: { - redis: redisOptions, - processWorkerQueueDebounceMs: 50, - masterQueueConsumersDisabled: true, - ttlSystem: { - disabled: true, - }, - }, - runLock: { - redis: redisOptions, + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + processWorkerQueueDebounceMs: 50, + masterQueueConsumersDisabled: true, + ttlSystem: { + disabled: true, }, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", machines: { - defaultMachine: "small-1x", - machines: { - "small-1x": { - name: "small-1x" as const, - cpu: 0.5, - memory: 0.5, - centsPerMs: 0.0001, - }, + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, }, - baseCostInCents: 0.0001, }, - tracer: trace.getTracer("test", "0.0.0"), - }); + baseCostInCents: 0.0001, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); - try { - // Try to expire an empty array - const result = await engine.ttlSystem.expireRunsBatch([]); + try { + // Try to expire an empty array + const result = await engine.ttlSystem.expireRunsBatch([]); - expect(result.expired.length).toBe(0); - expect(result.skipped.length).toBe(0); - } finally { - await engine.quit(); - } + expect(result.expired.length).toBe(0); + expect(result.skipped.length).toBe(0); + } finally { + await engine.quit(); } - ); + }); }); diff --git a/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts b/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts index 9937314d799..55e1b0d0836 100644 --- a/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts +++ b/internal-packages/run-engine/src/engine/tests/waitpoints.test.ts @@ -107,7 +107,16 @@ describe("RunEngine Waitpoints", () => { const executionData = await engine.getRunExecutionData({ runId: run.id }); expect(executionData?.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - await setTimeout(2_000); + // Event-driven wait: the run resumes once the datetime waitpoint (~1s out) completes and the + // worker unblocks it. Gate on the final state the test asserts (run EXECUTING), not just the + // waitpoint status, which flips slightly earlier. + await vi.waitFor( + async () => { + const ed = await engine.getRunExecutionData({ runId: run.id }); + expect(ed?.snapshot.executionStatus).toBe("EXECUTING"); + }, + { timeout: 10_000, interval: 100 } + ); const waitpoint2 = await prisma.waitpoint.findFirst({ where: { @@ -497,7 +506,14 @@ describe("RunEngine Waitpoints", () => { const executionData = await engine.getRunExecutionData({ runId: run.id }); expect(executionData?.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); - await setTimeout(750); + // Event-driven wait: resume as soon as the waitpoint completes, no fixed margin. + await vi.waitFor( + async () => { + const ed = await engine.getRunExecutionData({ runId: run.id }); + expect(ed?.snapshot.executionStatus).toBe("EXECUTING"); + }, + { timeout: 10_000, interval: 100 } + ); const executionData2 = await engine.getRunExecutionData({ runId: run.id }); expect(executionData2?.snapshot.executionStatus).toBe("EXECUTING"); @@ -781,7 +797,16 @@ describe("RunEngine Waitpoints", () => { event = result; }); - await setTimeout(1_250); + // Event-driven wait: resume as soon as the timeout fires and the worker notifies, instead + // of a fixed 1250ms margin against the ~1s worker poll (the original flaky race). + await vi.waitFor( + async () => { + const ed = await engine.getRunExecutionData({ runId: run.id }); + expect(ed?.snapshot.executionStatus).toBe("EXECUTING"); + assertNonNullable(event); + }, + { timeout: 10_000, interval: 100 } + ); const executionData2 = await engine.getRunExecutionData({ runId: run.id }); expect(executionData2?.snapshot.executionStatus).toBe("EXECUTING"); diff --git a/internal-packages/run-engine/vitest.config.ts b/internal-packages/run-engine/vitest.config.ts index fc9a8f271e5..cb048f00927 100644 --- a/internal-packages/run-engine/vitest.config.ts +++ b/internal-packages/run-engine/vitest.config.ts @@ -1,7 +1,9 @@ import { defineConfig } from "vitest/config"; +import { DurationShardingSequencer } from "@internal/testcontainers/sequencer"; export default defineConfig({ test: { + sequence: { sequencer: DurationShardingSequencer }, include: ["**/*.test.ts"], globals: true, // CI-only: absorbs timing races (real-clock waits vs worker poll interval) under shard CPU contention diff --git a/internal-packages/schedule-engine/test/scheduleEngine.test.ts b/internal-packages/schedule-engine/test/scheduleEngine.test.ts index 4063521d823..c261697dacc 100644 --- a/internal-packages/schedule-engine/test/scheduleEngine.test.ts +++ b/internal-packages/schedule-engine/test/scheduleEngine.test.ts @@ -239,108 +239,4 @@ describe("ScheduleEngine Integration", () => { } } ); - - // Deploy-moment backward compatibility. At deploy time, in-flight Redis jobs - // were enqueued by the old engine — their payload has no `lastScheduleTime` - // field — and `instance.lastScheduledTimestamp` is still populated (last - // written by the old engine pre-deploy). The new engine must report that DB - // value as `payload.lastTimestamp` so customers don't see a transient - // `undefined` for the one fire per schedule that drains the legacy queue. - containerTest( - "should fall back to instance.lastScheduledTimestamp when payload lacks lastScheduleTime", - { timeout: 30_000 }, - async ({ prisma, redisOptions }) => { - const triggerCalls: TriggerScheduledTaskParams[] = []; - const engine = new ScheduleEngine({ - prisma, - redis: redisOptions, - distributionWindow: { seconds: 10 }, - worker: { - concurrency: 1, - disabled: true, // Don't actually run the worker — calling triggerScheduledTask directly - pollIntervalMs: 1000, - }, - tracer: trace.getTracer("test", "0.0.0"), - onTriggerScheduledTask: async (params) => { - triggerCalls.push(params); - return { success: true }; - }, - isDevEnvironmentConnectedHandler: vi.fn().mockResolvedValue(true), - }); - - try { - const organization = await prisma.organization.create({ - data: { title: "Legacy Payload Org", slug: "legacy-payload-org" }, - }); - - const project = await prisma.project.create({ - data: { - name: "Legacy Payload Project", - slug: "legacy-payload-project", - externalRef: "legacy-payload-ref", - organizationId: organization.id, - }, - }); - - const environment = await prisma.runtimeEnvironment.create({ - data: { - slug: "legacy-payload-env", - type: "PRODUCTION", - projectId: project.id, - organizationId: organization.id, - apiKey: "tr_legacy_1234", - pkApiKey: "pk_legacy_1234", - shortcode: "legacy-short", - }, - }); - - const taskSchedule = await prisma.taskSchedule.create({ - data: { - friendlyId: "sched_legacy_payload", - taskIdentifier: "legacy-payload-task", - projectId: project.id, - deduplicationKey: "legacy-payload-dedup", - userProvidedDeduplicationKey: false, - generatorExpression: "*/5 * * * *", - generatorDescription: "Every 5 minutes", - timezone: "UTC", - type: "DECLARATIVE", - active: true, - externalId: "legacy-ext", - }, - }); - - // Pre-populate lastScheduledTimestamp on the instance — simulates the - // value the old engine wrote to the DB before this PR deployed. - const preDeployLastFire = new Date("2026-04-30T10:00:00.000Z"); - const scheduleInstance = await prisma.taskScheduleInstance.create({ - data: { - taskScheduleId: taskSchedule.id, - environmentId: environment.id, - projectId: project.id, - active: true, - lastScheduledTimestamp: preDeployLastFire, - }, - }); - - // Call triggerScheduledTask directly without lastScheduleTime, - // simulating an in-flight Redis job enqueued by the old engine. - const exactScheduleTime = new Date("2026-04-30T10:05:00.000Z"); - await engine.triggerScheduledTask({ - instanceId: scheduleInstance.id, - finalAttempt: false, - exactScheduleTime, - // lastScheduleTime intentionally omitted — legacy payload shape - }); - - expect(triggerCalls.length).toBe(1); - expect(triggerCalls[0].payload.timestamp).toEqual(exactScheduleTime); - // Falls back to instance.lastScheduledTimestamp from the DB rather - // than reporting undefined for this one transitional fire. - expect(triggerCalls[0].payload.lastTimestamp).toEqual(preDeployLastFire); - } finally { - await engine.quit(); - } - } - ); }); diff --git a/internal-packages/schedule-engine/test/scheduleEngine2.test.ts b/internal-packages/schedule-engine/test/scheduleEngine2.test.ts new file mode 100644 index 00000000000..64936a89152 --- /dev/null +++ b/internal-packages/schedule-engine/test/scheduleEngine2.test.ts @@ -0,0 +1,112 @@ +import { containerTest } from "@internal/testcontainers"; +import { trace } from "@internal/tracing"; +import { setTimeout } from "timers/promises"; +import { describe, expect, vi } from "vitest"; +import { TriggerScheduledTaskParams } from "../src/engine/types.js"; +import { ScheduleEngine } from "../src/index.js"; + +describe("ScheduleEngine Integration (part 2)", () => { + // Deploy-moment backward compatibility. At deploy time, in-flight Redis jobs + // were enqueued by the old engine — their payload has no `lastScheduleTime` + // field — and `instance.lastScheduledTimestamp` is still populated (last + // written by the old engine pre-deploy). The new engine must report that DB + // value as `payload.lastTimestamp` so customers don't see a transient + // `undefined` for the one fire per schedule that drains the legacy queue. + containerTest( + "should fall back to instance.lastScheduledTimestamp when payload lacks lastScheduleTime", + { timeout: 30_000 }, + async ({ prisma, redisOptions }) => { + const triggerCalls: TriggerScheduledTaskParams[] = []; + const engine = new ScheduleEngine({ + prisma, + redis: redisOptions, + distributionWindow: { seconds: 10 }, + worker: { + concurrency: 1, + disabled: true, // Don't actually run the worker — calling triggerScheduledTask directly + pollIntervalMs: 1000, + }, + tracer: trace.getTracer("test", "0.0.0"), + onTriggerScheduledTask: async (params) => { + triggerCalls.push(params); + return { success: true }; + }, + isDevEnvironmentConnectedHandler: vi.fn().mockResolvedValue(true), + }); + + try { + const organization = await prisma.organization.create({ + data: { title: "Legacy Payload Org", slug: "legacy-payload-org" }, + }); + + const project = await prisma.project.create({ + data: { + name: "Legacy Payload Project", + slug: "legacy-payload-project", + externalRef: "legacy-payload-ref", + organizationId: organization.id, + }, + }); + + const environment = await prisma.runtimeEnvironment.create({ + data: { + slug: "legacy-payload-env", + type: "PRODUCTION", + projectId: project.id, + organizationId: organization.id, + apiKey: "tr_legacy_1234", + pkApiKey: "pk_legacy_1234", + shortcode: "legacy-short", + }, + }); + + const taskSchedule = await prisma.taskSchedule.create({ + data: { + friendlyId: "sched_legacy_payload", + taskIdentifier: "legacy-payload-task", + projectId: project.id, + deduplicationKey: "legacy-payload-dedup", + userProvidedDeduplicationKey: false, + generatorExpression: "*/5 * * * *", + generatorDescription: "Every 5 minutes", + timezone: "UTC", + type: "DECLARATIVE", + active: true, + externalId: "legacy-ext", + }, + }); + + // Pre-populate lastScheduledTimestamp on the instance — simulates the + // value the old engine wrote to the DB before this PR deployed. + const preDeployLastFire = new Date("2026-04-30T10:00:00.000Z"); + const scheduleInstance = await prisma.taskScheduleInstance.create({ + data: { + taskScheduleId: taskSchedule.id, + environmentId: environment.id, + projectId: project.id, + active: true, + lastScheduledTimestamp: preDeployLastFire, + }, + }); + + // Call triggerScheduledTask directly without lastScheduleTime, + // simulating an in-flight Redis job enqueued by the old engine. + const exactScheduleTime = new Date("2026-04-30T10:05:00.000Z"); + await engine.triggerScheduledTask({ + instanceId: scheduleInstance.id, + finalAttempt: false, + exactScheduleTime, + // lastScheduleTime intentionally omitted — legacy payload shape + }); + + expect(triggerCalls.length).toBe(1); + expect(triggerCalls[0].payload.timestamp).toEqual(exactScheduleTime); + // Falls back to instance.lastScheduledTimestamp from the DB rather + // than reporting undefined for this one transitional fire. + expect(triggerCalls[0].payload.lastTimestamp).toEqual(preDeployLastFire); + } finally { + await engine.quit(); + } + } + ); +}); diff --git a/internal-packages/schedule-engine/vitest.config.ts b/internal-packages/schedule-engine/vitest.config.ts index e773ec6d163..8f9b6a01a9a 100644 --- a/internal-packages/schedule-engine/vitest.config.ts +++ b/internal-packages/schedule-engine/vitest.config.ts @@ -1,7 +1,9 @@ import { defineConfig } from "vitest/config"; +import { DurationShardingSequencer } from "@internal/testcontainers/sequencer"; export default defineConfig({ test: { + sequence: { sequencer: DurationShardingSequencer }, globals: true, // CI-only: absorbs timing races (real-clock waits vs worker poll interval) under shard CPU contention retry: process.env.CI ? 2 : 0, diff --git a/internal-packages/testcontainers/README.md b/internal-packages/testcontainers/README.md index 51c2240d6c9..8f74b1137f3 100644 --- a/internal-packages/testcontainers/README.md +++ b/internal-packages/testcontainers/README.md @@ -1,3 +1,78 @@ -# Test container +# Test containers -This is package exposes some useful vitest utilities for writing tests with Postgres, Prisma, and Redis. +Vitest utilities for writing tests against real Postgres, Prisma, Redis and ClickHouse - we don't mock +(see the root `CLAUDE.md`), we boot containers. Also exposes a duration-weighted shard sequencer for +splitting slow suites across CI shards. + +## Choosing a fixture + +Most tests share one set of containers per vitest worker (booted once, reset between tests) - this is +much faster than a container per test. Reach for an isolated variant only when a test needs it. + +| Fixture | Postgres | Redis | ClickHouse | Use for | +| -------------------------------- | -------------- | -------- | ---------- | --------------------------------------- | +| `redisTest` | - | shared | - | redis-only tests | +| `postgresTest` | shared (clone) | - | - | db-only tests | +| `containerTest` | shared (clone) | shared | shared | the default - needs all three | +| `isolatedRedisTest` | - | per-test | - | background redis work (see below) | +| `containerTestWithIsolatedRedis` | shared (clone) | per-test | shared | background redis work + db/clickhouse | +| `replicationContainerTest` | per-test | per-test | shared | Postgres→ClickHouse logical replication | + +"shared (clone)" = one Postgres per worker with a template database; each test gets a fast `CREATE +DATABASE ... TEMPLATE` clone, so schema isn't re-pushed per test. + +### The background-work gotcha + +If a test spawns work that **outlives the test body** - a `RunEngine`, a `redis-worker` Worker, a +`BatchQueue` - and that work isn't fully drained before the test ends, you **must** use an isolated +redis fixture (`isolatedRedisTest` / `containerTestWithIsolatedRedis`). + +On the shared fixture, the leaked background loop keeps polling the one worker-scoped redis after the +test's clients close, bleeding into the next test. The symptom is an intermittent `"Connection is +closed"` error or a test that hangs until its timeout. `FLUSHALL` between tests does **not** fix this - +it clears data, not live connections/loops, so per-test key prefixes won't help either. A plain +db/redis test with no lingering background work is fine on the shared fixtures. + +## Sharding (`./sequencer`) + +CI splits the slow suites with `vitest --shard=i/N`. `DurationShardingSequencer` replaces vitest's +default file-count split with a duration-weighted one: it reads `test-timings.json` at the repo root +(`{ "": }`) and greedily bin-packs files so each shard does roughly equal +_work_, not an equal _number of files_. The packing is deterministic, so every shard computes the same +bins and runs each file exactly once. + +Configs opt in via: + +```ts +import { DurationShardingSequencer } from "@internal/testcontainers/sequencer"; +// in defineConfig: +test: { + sequence: { + sequencer: DurationShardingSequencer, + }, +} +``` + +### Adding tests - nothing to do + +New test files are discovered by vitest's glob and sharded automatically. A file with no entry in +`test-timings.json` is given the **median** duration as a fallback, so it's still placed on exactly one +shard - correctness never depends on the timings being present or current. + +What the timings affect is **balance**. A new heavy test estimated at the median can be under-weighted +and land on an already-full shard, making that shard slower. There's headroom between the current +makespan and the CI budget to absorb this, so it tolerates drift - but if a shard creeps toward the +budget, refresh the timings. + +### Refreshing `test-timings.json` + +Measure each shard with the JSON reporter and write per-file `endTime - startTime` (ms), keyed by +repo-relative path, back into `test-timings.json`. Set `GITHUB_ACTIONS=true` so suites that +`skipIf(CI)` are excluded, matching what actually runs on CI: + +```bash +GITHUB_ACTIONS=true pnpm exec vitest run --reporter=json --outputFile=/tmp/run.json +``` + +Stale entries for deleted/renamed files are harmless (they're simply ignored). This is a periodic +chore, not a per-PR one. diff --git a/internal-packages/testcontainers/TESTING.md b/internal-packages/testcontainers/TESTING.md new file mode 100644 index 00000000000..8ca6c335789 --- /dev/null +++ b/internal-packages/testcontainers/TESTING.md @@ -0,0 +1,60 @@ +# Fast local testing loop + +These tests use real Docker containers (Postgres, ClickHouse, Redis, Electric, MinIO) via testcontainers - never mocks. This guide is the fast inner loop for working on them. + +## Prerequisites + +- **Docker daemon running.** That's it - testcontainers boots its own containers. You do **not** need `pnpm run docker` (that compose stack is for running the app, and is separate). + +## The loop + +```bash +# 1. Build upstream deps once (turbo-caches them; only re-runs when a dep changes) +pnpm run build --filter @internal/run-engine + +# 2. Iterate by running vitest DIRECTLY in the package - not via `turbo run test` +cd internal-packages/run-engine +pnpm exec vitest run src/engine/tests/ttl.test.ts # one file +pnpm exec vitest src/engine/tests/ttl.test.ts # watch mode, tightest loop +pnpm exec vitest run src/engine/tests/ --reporter=verbose # per-test timings +``` + +> **Why run vitest directly, not `turbo run test`?** The `test` turbo task is cacheable +> (`outputs: []`). A second `turbo run test` with no input change replays the cached +> result in ~0ms instead of executing - useless when you're measuring timing. Run vitest +> directly (or `turbo run test --force`) so tests actually run. + +## Measuring container boot/teardown vs test time + +Container lifecycle (boot + migrate + teardown) dominates these suites. To see the split: + +```bash +# JSON timing lines are gated on TESTCONTAINERS_TIMING locally (always on in CI), +# and need --disableConsoleIntercept so vitest doesn't swallow them. +TESTCONTAINERS_TIMING=1 pnpm exec vitest run --disableConsoleIntercept +``` + +## Approximating the 2-core CI runner locally (flake repro) + +To reproduce CI-like CPU pressure on a beefy local machine - useful when a test only flakes under +the 2-core CI runner: + +```bash +# cap each testcontainer's CPU/mem (TESTCONTAINERS_CPU = cores, TESTCONTAINERS_MEMORY_GB = GB), +# and pin the test runner to 2 cores. Off unless the env vars are set. +TESTCONTAINERS_CPU=2 TESTCONTAINERS_MEMORY_GB=2 taskset -c 0,1 pnpm exec vitest run +``` + +Note: in practice the scoped tests here are latency/IO/sleep-bound, not CPU-bound, so this changes +timings little - the original CI slowness was per-test container _boots_, which worker-scoping removed. +Keep it for the cases that genuinely starve on CPU (e.g. timing races against a worker poll). + +## Timing harness + +Or use the harness, which aggregates the split for you: + +```bash +node internal-packages/testcontainers/scripts/measure-test-timing.mjs \ + src/client/client.test.ts --cwd internal-packages/clickhouse --runs 3 +# -> run 1/3 passed=true wall=10.58s teardown=0.67s ... +``` diff --git a/internal-packages/testcontainers/package.json b/internal-packages/testcontainers/package.json index 1c57952509c..4ea83344c34 100644 --- a/internal-packages/testcontainers/package.json +++ b/internal-packages/testcontainers/package.json @@ -6,7 +6,11 @@ "types": "./src/index.ts", "exports": { ".": "./src/index.ts", - "./webapp": "./src/webapp.ts" + "./webapp": "./src/webapp.ts", + "./sequencer": { + "types": "./src/sequencer.d.cts", + "default": "./src/sequencer.cjs" + } }, "dependencies": { "@clickhouse/client": "^1.11.1", @@ -25,4 +29,4 @@ "scripts": { "typecheck": "tsc --noEmit" } -} \ No newline at end of file +} diff --git a/internal-packages/testcontainers/scripts/measure-test-timing.mjs b/internal-packages/testcontainers/scripts/measure-test-timing.mjs new file mode 100644 index 00000000000..fd71a2b93a3 --- /dev/null +++ b/internal-packages/testcontainers/scripts/measure-test-timing.mjs @@ -0,0 +1,76 @@ +#!/usr/bin/env node +// Measure testcontainers boot/teardown vs test time for a single test file. +// +// Usage (from any package dir, or pass --cwd): +// node /measure-test-timing.mjs [--cwd ] [--runs N] +// +// Relies on the TESTCONTAINERS_TIMING log gate in src/logs.ts and runs vitest with +// --disableConsoleIntercept so the JSON timing lines reach stdout. + +import { spawn } from "node:child_process"; + +const args = process.argv.slice(2); +const testFile = args.find((a) => !a.startsWith("--")); +const cwd = valueOf("--cwd") ?? process.cwd(); +const runs = Number(valueOf("--runs") ?? "1"); + +function valueOf(flag) { + const i = args.indexOf(flag); + return i >= 0 ? args[i + 1] : undefined; +} + +if (!testFile) { + console.error("usage: measure-test-timing.mjs [--cwd dir] [--runs N]"); + process.exit(1); +} + +function runOnce() { + return new Promise((resolve) => { + const child = spawn("pnpm", ["exec", "vitest", "run", testFile, "--disableConsoleIntercept"], { + cwd, + env: { ...process.env, TESTCONTAINERS_TIMING: "1" }, + }); + + let out = ""; + const collect = (buf) => (out += buf.toString()); + child.stdout.on("data", collect); + child.stderr.on("data", collect); + + child.on("close", () => { + const cleanups = []; + let duration = null; + for (const line of out.split("\n")) { + const trimmed = line.trim(); + if (trimmed.startsWith("{")) { + try { + const ev = JSON.parse(trimmed); + if (ev.type === "cleanup") cleanups.push(ev); + } catch {} + } + const m = trimmed.match(/Duration\s+([\d.]+)s/); + if (m) duration = Number(m[1]); + } + resolve({ cleanups, duration, passed: /Tests\s+\d+ passed/.test(out) }); + }); + }); +} + +for (let i = 0; i < runs; i++) { + const { cleanups, duration, passed } = await runOnce(); + const byResource = {}; + for (const c of cleanups) { + const key = c.resource.split(":")[0]; + byResource[key] ??= { totalMs: 0, count: 0 }; + byResource[key].totalMs += c.durationMs ?? 0; + byResource[key].count += 1; + } + const teardownMs = Object.values(byResource).reduce((a, r) => a + r.totalMs, 0); + console.log( + `\nrun ${i + 1}/${runs} passed=${passed} wall=${duration}s teardown=${( + teardownMs / 1000 + ).toFixed(2)}s` + ); + for (const [res, r] of Object.entries(byResource)) { + console.log(` teardown ${res}: ${(r.totalMs / 1000).toFixed(2)}s over ${r.count}`); + } +} diff --git a/internal-packages/testcontainers/src/clickhouse.ts b/internal-packages/testcontainers/src/clickhouse.ts index 577111af3d5..1bd7f758e02 100644 --- a/internal-packages/testcontainers/src/clickhouse.ts +++ b/internal-packages/testcontainers/src/clickhouse.ts @@ -144,6 +144,24 @@ export class StartedClickHouseContainer extends AbstractStartedContainer { } } +/** + * Resets data between tests on a reused ClickHouse container by truncating every base table + * (MergeTree etc.) in the migrated database. Views/materialized views are skipped - their target + * tables are base tables and get truncated too, which clears MV state. Cheaper than dropping + + * re-migrating, and these migrations aren't version-tracked so they can't simply be re-run. + */ +export async function truncateClickhouseTables(client: ClickHouseClient, database = "trigger_dev") { + const result = await client.query({ + query: `SELECT name FROM system.tables WHERE database = '${database}' AND engine NOT LIKE '%View%'`, + format: "JSONEachRow", + }); + const tables = await result.json<{ name: string }>(); + + for (const { name } of tables) { + await client.command({ query: `TRUNCATE TABLE \`${database}\`.\`${name}\`` }); + } +} + export async function runClickhouseMigrations(client: ClickHouseClient, migrationsPath: string) { // Get all the *.sql files in the migrations path const queries = await getAllClickhouseMigrationQueries(migrationsPath); diff --git a/internal-packages/testcontainers/src/index.ts b/internal-packages/testcontainers/src/index.ts index 95b4ab48e8c..8b687402f6d 100644 --- a/internal-packages/testcontainers/src/index.ts +++ b/internal-packages/testcontainers/src/index.ts @@ -1,7 +1,7 @@ -import { StartedPostgreSqlContainer } from "@testcontainers/postgresql"; +import { PostgreSqlContainer, StartedPostgreSqlContainer } from "@testcontainers/postgresql"; import { StartedRedisContainer } from "@testcontainers/redis"; import { PrismaClient } from "@trigger.dev/database"; -import { RedisOptions } from "ioredis"; +import Redis, { RedisOptions } from "ioredis"; import { Network, type StartedNetwork } from "testcontainers"; import { TestContext, test } from "vitest"; import { @@ -9,13 +9,21 @@ import { createElectricContainer, createPostgresContainer, createRedisContainer, - createMinIOContainer, + postgresUriWithDatabase, + pushDatabaseSchema, useContainer, + withCiResourceLimits, withContainerSetup, } from "./utils"; import { getTaskMetadata, logCleanup, logSetup } from "./logs"; -import { StartedClickHouseContainer } from "./clickhouse"; -import { StartedMinIOContainer, type MinIOConnectionConfig } from "./minio"; +import path from "path"; +import { + ClickHouseContainer, + StartedClickHouseContainer, + runClickhouseMigrations, + truncateClickhouseTables, +} from "./clickhouse"; +import { MinIOContainer, StartedMinIOContainer, type MinIOConnectionConfig } from "./minio"; import { ClickHouseClient, createClient } from "@clickhouse/client"; export { assertNonNullable, createPostgresContainer } from "./utils"; @@ -120,7 +128,112 @@ export const prisma = async ( } }; -export const postgresTest = test.extend({ network, postgresContainer, prisma }); +const POSTGRES_TEMPLATE_DB = "template_db"; +let pgCloneCounter = 0; + +type PostgresTestContext = { + postgresContainer: StartedPostgreSqlContainer; + prisma: PrismaClient; +}; + +// --- Worker-scoped + per-test-isolated fixtures (shared by the standalone *Test and containerTest) --- +// The pattern: boot each container ONCE per worker; isolate per test cheaply (postgres = template +// clone, redis = FLUSHALL, clickhouse = TRUNCATE) instead of re-booting. Reset fixtures are `auto` +// so they run for every test even if it doesn't destructure them. + +// Boot postgres ONCE per worker (module singleton, reaped by Ryuk on worker exit) and push the +// schema into a dedicated template db that nothing else connects to (so CREATE DATABASE ... TEMPLATE +// never trips on an active session). +let workerPostgresContainer: Promise | undefined; +const getWorkerPostgresContainer = () => { + if (!workerPostgresContainer) { + workerPostgresContainer = (async () => { + const container = await withCiResourceLimits(new PostgreSqlContainer("docker.io/postgres:14")) + .withCommand(["-c", "listen_addresses=*", "-c", "wal_level=logical"]) + .start(); + // Create the template db explicitly via an admin connection (the same primitive the per-test + // clone uses) instead of relying on `prisma db push` to create a missing database. That + // create-if-missing path behaves differently on CI and - because push errors were swallowed - + // surfaced only later as a confusing "template database template_db does not exist" at clone + // time. Pushing into an already-existing db is the path the pre-worker-scope code always used. + const admin = new PrismaClient({ + datasources: { + db: { url: postgresUriWithDatabase(container.getConnectionUri(), "postgres") }, + }, + }); + await admin.$executeRawUnsafe(`CREATE DATABASE "${POSTGRES_TEMPLATE_DB}"`); + await admin.$disconnect(); + await pushDatabaseSchema( + postgresUriWithDatabase(container.getConnectionUri(), POSTGRES_TEMPLATE_DB) + ); + return container; + })(); + } + return workerPostgresContainer; +}; + +// Per test: clone a fresh database from the template (fast filesystem copy), then hand back a view +// of the shared container whose connection points at the clone. This keeps prisma AND any code that +// reads postgresContainer.getConnectionUri()/getDatabase() (e.g. logical replication) on the SAME +// isolated database - and it's parallel-ready (each test owns its db). +const clonedPostgresContainer = async ({}, use: Use) => { + const container = await getWorkerPostgresContainer(); + const baseUri = container.getConnectionUri(); + const cloneDb = `test_${pgCloneCounter++}`; + + const admin = new PrismaClient({ + datasources: { db: { url: postgresUriWithDatabase(baseUri, "postgres") } }, + }); + await admin.$executeRawUnsafe(`CREATE DATABASE "${cloneDb}" TEMPLATE "${POSTGRES_TEMPLATE_DB}"`); + await admin.$disconnect(); + + const cloneUri = postgresUriWithDatabase(baseUri, cloneDb); + const view = new Proxy(container, { + get(target, prop, receiver) { + if (prop === "getConnectionUri") return () => cloneUri; + if (prop === "getDatabase") return () => cloneDb; + const value = Reflect.get(target, prop, receiver); + return typeof value === "function" ? value.bind(target) : value; + }, + }); + + try { + await use(view); + } finally { + // Best-effort drop so clones don't pile up in the worker's pg over a long suite. WITH (FORCE) + // terminates any lingering backends (pg 13+). A failed drop is harmless - the whole container is + // reaped on worker exit - so we never let cleanup fail the test. + const cleanup = new PrismaClient({ + datasources: { db: { url: postgresUriWithDatabase(baseUri, "postgres") } }, + }); + try { + await cleanup.$executeRawUnsafe(`DROP DATABASE IF EXISTS "${cloneDb}" WITH (FORCE)`); + } catch { + // ignore - reaped with the container anyway + } finally { + await cleanup.$disconnect(); + } + } +}; + +const prismaFromContainer = async ( + { postgresContainer }: { postgresContainer: StartedPostgreSqlContainer }, + use: Use +) => { + const prisma = new PrismaClient({ + datasources: { db: { url: postgresContainer.getConnectionUri() } }, + }); + try { + await use(prisma); + } finally { + await logCleanup("prisma", prisma.$disconnect()); + } +}; + +export const postgresTest = test.extend({ + postgresContainer: clonedPostgresContainer, + prisma: prismaFromContainer, +}); export const redisContainer = async ( { network, task }: { network: StartedNetwork } & TestContext, @@ -173,7 +286,58 @@ export const redisOptions = async ( await use(options); }; -export const redisTest = test.extend({ network, redisContainer, redisOptions }); +// Worker-scoped redis: booted once per worker, FLUSHALL per test. Big win for redis-heavy files +// (buffer.test.ts: 88 boots -> 1). Safe ONLY for tests that don't leave background redis work +// (a Worker loop, BatchQueue) running past the test body - use isolatedRedisTest for those. +const bootWorkerRedis = async ({}, use: Use) => { + const { container } = await createRedisContainer({ port: 6379 }); + try { + await use(container); + } finally { + await container.stop({ timeout: 0 }); + } +}; + +const flushRedis = async ( + { redisContainer }: { redisContainer: StartedRedisContainer }, + use: Use +) => { + const redis = new Redis({ + host: redisContainer.getHost(), + port: redisContainer.getPort(), + password: redisContainer.getPassword(), + maxRetriesPerRequest: 3, + }); + try { + await redis.flushall(); + } finally { + redis.disconnect(); + } + await use(); +}; + +type RedisTestContext = { + redisContainer: StartedRedisContainer; + resetRedis: void; + redisOptions: RedisOptions; +}; + +// Worker-scoped redis (boots once, FLUSHALL between tests). Use isolatedRedisTest for tests that run +// background redis work (redis-worker Workers, BatchQueue) past the test body - see its note + README. +export const redisTest = test.extend({ + redisContainer: [bootWorkerRedis, { scope: "worker" }], + resetRedis: [flushRedis, { auto: true }], + redisOptions, +}); + +// Per-test redis for tests with background redis work (redis-worker Workers, BatchQueue) that can +// outlive the test body - a shared redis would let leaked work hit a closed connection / next test +// ("Connection is closed"). Boot is kept fast (see createRedisContainer). +export const isolatedRedisTest = test.extend({ + network, + redisContainer, + redisOptions, +}); const electricOrigin = async ( { @@ -225,12 +389,60 @@ type ClickhouseContext = { clickhouseClient: ClickHouseClient; }; -export const clickhouseTest = test.extend({ - network, - clickhouseContainer, - clickhouseClient, +const clickhouseMigrationsPath = path.resolve(__dirname, "../../clickhouse/schema"); + +type ClickhouseTestContext = { + clickhouseContainer: StartedClickHouseContainer; + resetClickhouse: void; + clickhouseClient: ClickHouseClient; +}; + +// Boot + migrate clickhouse once per worker. +const bootWorkerClickhouse = async ({}, use: Use) => { + const container = await withCiResourceLimits(new ClickHouseContainer()).start(); + const client = createClient({ url: container.getConnectionUrl() }); + await client.ping(); + await runClickhouseMigrations(client, clickhouseMigrationsPath); + await client.close(); + try { + await use(container); + } finally { + await container.stop({ timeout: 0 }); + } +}; + +// Per test: truncate all tables on the shared clickhouse (auto fixture so it runs for every test). +const truncateClickhouseFixture = async ( + { clickhouseContainer }: { clickhouseContainer: StartedClickHouseContainer }, + use: Use +) => { + const client = createClient({ url: clickhouseContainer.getConnectionUrl() }); + await truncateClickhouseTables(client); + await client.close(); + await use(); +}; + +const scopedClickhouseClient = async ( + { clickhouseContainer }: { clickhouseContainer: StartedClickHouseContainer }, + use: Use +) => { + const client = createClient({ url: clickhouseContainer.getConnectionUrl() }); + try { + await use(client); + } finally { + await logCleanup("clickhouseClient", client.close()); + } +}; + +export const clickhouseTest = test.extend({ + clickhouseContainer: [bootWorkerClickhouse, { scope: "worker" }], + resetClickhouse: [truncateClickhouseFixture, { auto: true }], + clickhouseClient: scopedClickhouseClient, }); +// NOTE: per-test containers (not worker-scoped) - the replication package does logical replication +// (slots/publications/REPLICA IDENTITY), which doesn't play nicely with a shared container + +// template-clone. A dedicated container per test is the correct, isolated choice here. export const postgresAndRedisTest = test.extend({ network, postgresContainer, @@ -239,14 +451,81 @@ export const postgresAndRedisTest = test.extend({ redisOptions, }); -export const containerTest = test.extend({ +type ContainerTestContext = { + postgresContainer: StartedPostgreSqlContainer; + prisma: PrismaClient; + redisContainer: StartedRedisContainer; + resetRedis: void; + redisOptions: RedisOptions; + clickhouseContainer: StartedClickHouseContainer; + resetClickhouse: void; + clickhouseClient: ClickHouseClient; +}; + +// The workhorse fixture (~36 files). Postgres (template-clone), Redis (FLUSHALL) and ClickHouse +// (truncate) all boot once per worker - no per-test container boots. Use containerTestWithIsolatedRedis +// for tests that run background redis work (BatchQueue, redis-worker Workers) past the test body. +export const containerTest = test.extend({ + postgresContainer: clonedPostgresContainer, + prisma: prismaFromContainer, + redisContainer: [bootWorkerRedis, { scope: "worker" }], + resetRedis: [flushRedis, { auto: true }], + redisOptions, + clickhouseContainer: [bootWorkerClickhouse, { scope: "worker" }], + resetClickhouse: [truncateClickhouseFixture, { auto: true }], + clickhouseClient: scopedClickhouseClient, +}); + +type ContainerWithIsolatedRedisContext = { + network: StartedNetwork; + postgresContainer: StartedPostgreSqlContainer; + prisma: PrismaClient; + redisContainer: StartedRedisContainer; + redisOptions: RedisOptions; + clickhouseContainer: StartedClickHouseContainer; + resetClickhouse: void; + clickhouseClient: ClickHouseClient; +}; + +// Same as containerTest but Redis is PER-TEST - for tests whose background redis work (BatchQueue, +// Workers) outlives the test body and would otherwise hit a closed/shared connection. +export const containerTestWithIsolatedRedis = test.extend({ + network, + postgresContainer: clonedPostgresContainer, + prisma: prismaFromContainer, + redisContainer, + redisOptions, + clickhouseContainer: [bootWorkerClickhouse, { scope: "worker" }], + resetClickhouse: [truncateClickhouseFixture, { auto: true }], + clickhouseClient: scopedClickhouseClient, +}); + +// For tests that exercise the Postgres -> ClickHouse logical-replication pipeline (WAL slots, +// publications, REPLICA IDENTITY). These need a dedicated Postgres per test - the worker-scoped + +// template-clone model used by containerTest doesn't carry logical replication across cloned dbs. +// Postgres is per-test (the WAL slot/publication lives in the db it writes to); ClickHouse is +// worker-scoped + truncated (the pipeline writes pg->clickhouse and a shared+truncated clickhouse is +// fine). Redis is per-test too (background work safety, same as containerTest). +type ReplicationContainerTestContext = { + network: StartedNetwork; + postgresContainer: StartedPostgreSqlContainer; + prisma: PrismaClient; + redisContainer: StartedRedisContainer; + redisOptions: RedisOptions; + clickhouseContainer: StartedClickHouseContainer; + resetClickhouse: void; + clickhouseClient: ClickHouseClient; +}; + +export const replicationContainerTest = test.extend({ network, postgresContainer, prisma, redisContainer, redisOptions, - clickhouseContainer, - clickhouseClient, + clickhouseContainer: [bootWorkerClickhouse, { scope: "worker" }], + resetClickhouse: [truncateClickhouseFixture, { auto: true }], + clickhouseClient: scopedClickhouseClient, }); export const containerWithElectricTest = test.extend({ @@ -267,17 +546,22 @@ export const containerWithElectricAndRedisTest = test.extend -) => { - const { container, metadata } = await withContainerSetup({ - name: "minioContainer", - task, - setup: createMinIOContainer(network), - }); +// Boot minio once per worker; reset the bucket per test (auto fixture). +const bootWorkerMinio = async ({}, use: Use) => { + const container = await withCiResourceLimits(new MinIOContainer()).start(); + try { + await use(container); + } finally { + await container.stop({ timeout: 0 }); + } +}; - await useContainer("minioContainer", { container, task, use: () => use(container) }); +const minioReset = async ( + { minioContainer }: { minioContainer: StartedMinIOContainer }, + use: Use +) => { + await minioContainer.resetBucket(); + await use(); }; const minioConfig = async ( @@ -287,18 +571,30 @@ const minioConfig = async ( await use(minioContainer.getConnectionConfig()); }; -export const minioTest = test.extend({ - network, - minioContainer, +type MinioTestContext = { + minioContainer: StartedMinIOContainer; + resetMinio: void; + minioConfig: MinIOConnectionConfig; +}; + +export const minioTest = test.extend({ + minioContainer: [bootWorkerMinio, { scope: "worker" }], + resetMinio: [minioReset, { auto: true }], minioConfig, }); -type PostgresAndMinIOContext = NetworkContext & PostgresContext & MinIOContext; +type PostgresAndMinioTestContext = { + postgresContainer: StartedPostgreSqlContainer; + prisma: PrismaClient; + minioContainer: StartedMinIOContainer; + resetMinio: void; + minioConfig: MinIOConnectionConfig; +}; -export const postgresAndMinioTest = test.extend({ - network, - postgresContainer, - prisma, - minioContainer, +export const postgresAndMinioTest = test.extend({ + postgresContainer: clonedPostgresContainer, + prisma: prismaFromContainer, + minioContainer: [bootWorkerMinio, { scope: "worker" }], + resetMinio: [minioReset, { auto: true }], minioConfig, }); diff --git a/internal-packages/testcontainers/src/logs.ts b/internal-packages/testcontainers/src/logs.ts index 865a6592bd5..3ea3e5fe8a6 100644 --- a/internal-packages/testcontainers/src/logs.ts +++ b/internal-packages/testcontainers/src/logs.ts @@ -5,10 +5,13 @@ import { StartedTestContainer } from "testcontainers"; let setupOrder = 0; +// Emit timing JSON in CI, or locally when TESTCONTAINERS_TIMING is set (drives the local timing harness) +const emitTimingLogs = isCI || !!env.TESTCONTAINERS_TIMING; + export function logSetup(resource: string, metadata: Record) { const order = setupOrder++; - if (!isCI) { + if (!emitTimingLogs) { return; } @@ -67,7 +70,7 @@ export async function logCleanup( const activeAtEnd = --activeCleanups; const parallel = activeAtStart > 1 || activeAtEnd > 0; - if (!isCI) { + if (!emitTimingLogs) { return; } diff --git a/internal-packages/testcontainers/src/minio.ts b/internal-packages/testcontainers/src/minio.ts index 4f85149b7a4..f7ef2d1275e 100644 --- a/internal-packages/testcontainers/src/minio.ts +++ b/internal-packages/testcontainers/src/minio.ts @@ -68,11 +68,9 @@ export class MinIOContainer extends GenericContainer { { throwOnError: true } ); - await x( - "docker", - ["exec", startedContainer.getId(), "mc", "mb", "local/packets"], - { throwOnError: true } - ); + await x("docker", ["exec", startedContainer.getId(), "mc", "mb", "local/packets"], { + throwOnError: true, + }); return new StartedMinIOContainer( startedContainer, @@ -120,6 +118,23 @@ export class StartedMinIOContainer extends AbstractStartedContainer { return `${protocol}://${host}:${port}`; } + /** + * Empties the bucket between tests on a reused container (the "local" mc alias and the bucket are + * created at boot). Recreates the bucket so each test starts from the same empty state. + */ + public async resetBucket(bucket = "packets"): Promise { + await x( + "docker", + ["exec", this.getId(), "mc", "rm", "--recursive", "--force", `local/${bucket}`], + { + throwOnError: false, + } + ); + await x("docker", ["exec", this.getId(), "mc", "mb", "--ignore-existing", `local/${bucket}`], { + throwOnError: true, + }); + } + /** * Gets connection configuration suitable for object storage clients. */ diff --git a/internal-packages/testcontainers/src/sequencer.cjs b/internal-packages/testcontainers/src/sequencer.cjs new file mode 100644 index 00000000000..14084e6e84d --- /dev/null +++ b/internal-packages/testcontainers/src/sequencer.cjs @@ -0,0 +1,129 @@ +// Authored as plain CommonJS (NOT .ts) on purpose. vitest loads each package's vitest.config.ts by +// bundling it, and it EXTERNALIZES this workspace subpath - node then loads this file verbatim. A .ts +// here reaches node as raw TypeScript and crashes config loading on CI's pinned node 20 (no type +// stripping: `SyntaxError`). Keeping it dependency-free JS - and importing nothing from the ESM-only +// `vitest/node` - makes it loadable on every node. Types for consumers live in sequencer.d.cts. + +const { existsSync, readFileSync } = require("node:fs"); +const path = require("node:path"); + +// Walk up from the package dir (cwd at config-load time) to the monorepo root (pnpm-workspace.yaml). +function findRepoRoot(start) { + let dir = start; + for (let i = 0; i < 20; i++) { + if (existsSync(path.join(dir, "pnpm-workspace.yaml"))) return dir; + const parent = path.dirname(dir); + if (parent === dir) break; + dir = parent; + } + return start; +} + +// test-timings.json lives at the monorepo root: { "": } +const REPO_ROOT = findRepoRoot(process.cwd()); +const TIMINGS_PATH = path.resolve(REPO_ROOT, "test-timings.json"); + +let cachedTimings; + +function loadTimings() { + if (!cachedTimings) { + // A MISSING file is a legitimate state (no timings configured yet => count-based split). But a + // file that EXISTS and won't parse is a real problem with a committed artifact we control - fail + // loud rather than silently degrading sharding (silent fallbacks are what hid earlier bugs). + if (!existsSync(TIMINGS_PATH)) { + cachedTimings = {}; + return cachedTimings; + } + try { + cachedTimings = JSON.parse(readFileSync(TIMINGS_PATH, "utf-8")); + } catch (error) { + throw new Error(`Failed to parse ${TIMINGS_PATH}: ${error?.message ?? error}`); + } + } + return cachedTimings; +} + +function median(nums) { + if (nums.length === 0) return 1; + const sorted = [...nums].sort((a, b) => a - b); + const mid = Math.floor(sorted.length / 2); + return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; +} + +// Stable per-package offset (derived from the package dir) so each package's heaviest file - which +// LPT always drops into bin 0 - maps to a DIFFERENT shard. Without it, a serial multi-package job +// (`turbo --concurrency=1 --filter "@internal/*"`) stacks every package's heaviest file into shard 1. +// It's a rotation of the bin->shard mapping, so coverage stays exact (each file runs once). +function packageOffset(specs, count) { + if (specs.length === 0) return 0; + const rel = path.relative(REPO_ROOT, specs[0].moduleId); + const key = rel.split(path.sep).slice(0, 2).join("/"); + // FNV-1a - spreads similar sibling package names (e.g. internal-packages/*) far better than a + // simple polynomial hash mod count, which collided run-engine + schedule-engine onto one shard. + let h = 2166136261; + for (let i = 0; i < key.length; i++) { + h ^= key.charCodeAt(i); + h = Math.imul(h, 16777619); + } + return (h >>> 0) % count; +} + +/** + * Duration-weighted interpretation of `--shard=i/N`. Instead of vitest's default file-count split, + * this greedily bin-packs test files by recorded duration (test-timings.json at the repo root; + * unknown/new files get the median) so each shard does roughly equal work. + * + * The packing is fully deterministic (sort by duration desc, then moduleId) so every shard computes + * the identical bins and just takes its own - no file runs twice or gets dropped. Falls back to the + * full set when no shard is configured, and to ~count-based when no timings exist. + * + * Implemented as a standalone TestSequencer (not extending BaseSequencer) so this file never imports + * `vitest/node` - see the header note. + */ +class DurationShardingSequencer { + constructor(ctx) { + this.ctx = ctx; + } + + // Deterministic order (heaviest first, then moduleId) - stable across shards and a sensible + // in-shard run order, replacing BaseSequencer's default sort we no longer inherit. + async sort(files) { + const timings = loadTimings(); + const fallback = median(Object.values(timings)); + return [...files].sort((a, b) => { + const am = timings[path.relative(REPO_ROOT, a.moduleId)] ?? fallback; + const bm = timings[path.relative(REPO_ROOT, b.moduleId)] ?? fallback; + return bm - am || a.moduleId.localeCompare(b.moduleId); + }); + } + + async shard(specs) { + const shard = this.ctx.config.shard; + if (!shard || specs.length === 0) { + return specs; + } + + const timings = loadTimings(); + const fallback = median(Object.values(timings)); + + const weighted = specs + .map((spec) => ({ + spec, + ms: timings[path.relative(REPO_ROOT, spec.moduleId)] ?? fallback, + })) + .sort((a, b) => b.ms - a.ms || a.spec.moduleId.localeCompare(b.spec.moduleId)); + + const bins = Array.from({ length: shard.count }, () => ({ total: 0, specs: [] })); + + for (const { spec, ms } of weighted) { + const lightest = bins.reduce((min, bin) => (bin.total < min.total ? bin : min)); + lightest.total += ms; + lightest.specs.push(spec); + } + + const offset = packageOffset(specs, shard.count); + return bins[(shard.index - 1 + offset) % shard.count].specs; + } +} + +module.exports = { DurationShardingSequencer }; diff --git a/internal-packages/testcontainers/src/sequencer.d.cts b/internal-packages/testcontainers/src/sequencer.d.cts new file mode 100644 index 00000000000..2fbecc89ccd --- /dev/null +++ b/internal-packages/testcontainers/src/sequencer.d.cts @@ -0,0 +1,13 @@ +import type { TestSequencer, TestSpecification, Vitest } from "vitest/node"; + +/** + * Duration-weighted `--shard=i/N`: bin-packs test files by recorded duration (test-timings.json at + * the repo root) so each shard does roughly equal work. The runtime lives in `sequencer.cjs` (plain + * JS, so vitest config loading can load it on any node - see that file's header); this declaration + * supplies the types for configs that wire it via `sequence: { sequencer: DurationShardingSequencer }`. + */ +export declare class DurationShardingSequencer implements TestSequencer { + constructor(ctx: Vitest); + sort(files: TestSpecification[]): Promise; + shard(files: TestSpecification[]): Promise; +} diff --git a/internal-packages/testcontainers/src/utils.ts b/internal-packages/testcontainers/src/utils.ts index 5f689bc5bfe..4183e85b40b 100644 --- a/internal-packages/testcontainers/src/utils.ts +++ b/internal-packages/testcontainers/src/utils.ts @@ -12,17 +12,20 @@ import { ClickHouseContainer, runClickhouseMigrations } from "./clickhouse"; import { MinIOContainer } from "./minio"; import { getContainerMetadata, getTaskMetadata, logCleanup, logSetup } from "./logs"; -export async function createPostgresContainer(network: StartedNetwork) { - const container = await new PostgreSqlContainer("docker.io/postgres:14") - .withNetwork(network) - .withNetworkAliases("database") - .withCommand(["-c", "listen_addresses=*", "-c", "wal_level=logical"]) - .start(); +/** Returns the container's connection URI with the database path swapped to `database`. */ +export function postgresUriWithDatabase(uri: string, database: string): string { + const url = new URL(uri); + url.pathname = `/${database}`; + return url.toString(); +} - // Run migrations +/** Pushes the Prisma schema into the database at `databaseUrl` (which must already exist). */ +export async function pushDatabaseSchema(databaseUrl: string) { const databasePath = path.resolve(__dirname, "../../database"); - await x( + // throwOnError is essential: without it tinyexec swallows a non-zero `prisma db push`, so a failed + // push looks like success and only surfaces much later as a confusing downstream error. + const result = await x( `${databasePath}/node_modules/.bin/prisma`, [ "db", @@ -34,21 +37,65 @@ export async function createPostgresContainer(network: StartedNetwork) { `${databasePath}/prisma/schema.prisma`, ], { + throwOnError: true, nodeOptions: { env: { ...process.env, - DATABASE_URL: container.getConnectionUri(), - DIRECT_URL: container.getConnectionUri(), + DATABASE_URL: databaseUrl, + DIRECT_URL: databaseUrl, }, }, } ); + return result; +} + +/** + * Caps each container's CPU/memory to approximate the 2-core CI runner locally (for timing + flake + * reproduction). Set TESTCONTAINERS_CPU (cores per container, e.g. "2") and/or + * TESTCONTAINERS_MEMORY_GB (GB per container). Pair with running the runner under `taskset -c 0,1`. + * No-op when neither is set. (testcontainers v11 has no cpuset pinning, only this quota cap.) + */ +export function withCiResourceLimits(container: T): T { + const cpu = parsePositiveNumberEnv("TESTCONTAINERS_CPU"); + const memory = parsePositiveNumberEnv("TESTCONTAINERS_MEMORY_GB"); + if (cpu === undefined && memory === undefined) { + return container; + } + return container.withResourcesQuota({ + ...(cpu !== undefined ? { cpu } : {}), + ...(memory !== undefined ? { memory } : {}), + }); +} + +// Fail fast on a malformed value rather than letting NaN reach the container runtime as a cryptic error. +function parsePositiveNumberEnv(name: string): number | undefined { + const raw = process.env[name]; + if (!raw) return undefined; + const value = Number(raw); + if (!Number.isFinite(value) || value <= 0) { + throw new Error(`${name} must be a positive number, got "${raw}"`); + } + return value; +} + +export async function createPostgresContainer(network: StartedNetwork) { + const container = await withCiResourceLimits(new PostgreSqlContainer("docker.io/postgres:14")) + .withNetwork(network) + .withNetworkAliases("database") + .withCommand(["-c", "listen_addresses=*", "-c", "wal_level=logical"]) + .start(); + + await pushDatabaseSchema(container.getConnectionUri()); + return { url: container.getConnectionUri(), container, network }; } export async function createClickHouseContainer(network: StartedNetwork) { - const container = await new ClickHouseContainer().withNetwork(network).start(); + const container = await withCiResourceLimits(new ClickHouseContainer()) + .withNetwork(network) + .start(); const client = createClient({ url: container.getConnectionUrl(), @@ -75,7 +122,7 @@ export async function createRedisContainer({ port?: number; network?: StartedNetwork; }) { - let container = new RedisContainer("redis:7.2") + let container = withCiResourceLimits(new RedisContainer("redis:7.2")) .withExposedPorts(port ?? 6379) .withStartupTimeout(120_000); // 2 minutes @@ -83,16 +130,11 @@ export async function createRedisContainer({ container = container.withNetwork(network).withNetworkAliases("redis"); } + // Wait only on the readiness log (RedisContainer's default) - the previous Docker healthcheck added + // a full poll-cycle of latency per boot, which dominates per-test redis. verifyRedisConnection + // below still confirms the container actually accepts connections before we hand it to the test. const startedContainer = await container - .withHealthCheck({ - test: ["CMD", "redis-cli", "ping"], - interval: 1000, - timeout: 3000, - retries: 5, - }) - .withWaitStrategy( - Wait.forAll([Wait.forHealthCheck(), Wait.forLogMessage("Ready to accept connections")]) - ) + .withWaitStrategy(Wait.forLogMessage("Ready to accept connections")) .start(); // Add a verification step @@ -156,8 +198,10 @@ export async function createElectricContainer( network.getName() )}:5432/${postgresContainer.getDatabase()}?sslmode=disable`; - const container = await new GenericContainer( - "electricsql/electric:1.2.4@sha256:20da3d0b0e74926c5623392db67fd56698b9e374c4aeb6cb5cadeb8fea171c36" + const container = await withCiResourceLimits( + new GenericContainer( + "electricsql/electric:1.2.4@sha256:20da3d0b0e74926c5623392db67fd56698b9e374c4aeb6cb5cadeb8fea171c36" + ) ) .withExposedPorts(3000) .withNetwork(network) @@ -174,7 +218,7 @@ export async function createElectricContainer( } export async function createMinIOContainer(network: StartedNetwork) { - const container = await new MinIOContainer() + const container = await withCiResourceLimits(new MinIOContainer()) .withNetwork(network) .withNetworkAliases("minio") .start(); @@ -250,8 +294,9 @@ export async function useContainer( const useDurationMs = Date.now() - start; metadata.useDurationMs = useDurationMs; } finally { - // WARNING: Testcontainers by default will not wait until the container has stopped. It will simply issue the stop command and return immediately. - // If you need to wait for the container to be stopped, you can provide a timeout. The unit of timeout option here is milliseconds (changed from seconds in testcontainers v11) - await logCleanup(name, container.stop({ timeout: 10_000 }), metadata); + // Containers are throwaway, so we force-kill (SIGKILL) instead of waiting for a graceful + // shutdown - ClickHouse alone spends ~5s/test gracefully stopping. timeout: 0 = immediate kill. + // We still await it (no pileup); logCleanup swallows any teardown-time connection errors. + await logCleanup(name, container.stop({ timeout: 0 }), metadata); } } diff --git a/packages/redis-worker/src/worker.test.ts b/packages/redis-worker/src/worker.test.ts index e4b6fd3e858..bd6c70b9676 100644 --- a/packages/redis-worker/src/worker.test.ts +++ b/packages/redis-worker/src/worker.test.ts @@ -1,4 +1,4 @@ -import { redisTest } from "@internal/testcontainers"; +import { isolatedRedisTest as redisTest } from "@internal/testcontainers"; import { Logger } from "@trigger.dev/core/logger"; import { describe } from "node:test"; import { expect } from "vitest"; diff --git a/packages/redis-worker/vitest.config.ts b/packages/redis-worker/vitest.config.ts index 452a66037d2..b52b49a0dc4 100644 --- a/packages/redis-worker/vitest.config.ts +++ b/packages/redis-worker/vitest.config.ts @@ -1,7 +1,9 @@ import { defineConfig } from "vitest/config"; +import { DurationShardingSequencer } from "@internal/testcontainers/sequencer"; export default defineConfig({ test: { + sequence: { sequencer: DurationShardingSequencer }, include: ["**/*.test.ts"], globals: true, // CI-only: absorbs timing races (real-clock waits vs worker poll interval) under shard CPU contention diff --git a/test-timings.json b/test-timings.json new file mode 100644 index 00000000000..1c1504c029d --- /dev/null +++ b/test-timings.json @@ -0,0 +1,222 @@ +{ + "apps/webapp/test/EnvironmentVariablesPresenter.test.ts": 10249, + "apps/webapp/test/GCRARateLimiter.test.ts": 4984, + "apps/webapp/test/authorizationRateLimitMiddleware.test.ts": 1, + "apps/webapp/test/bufferedTriggerPayload.test.ts": 3, + "apps/webapp/test/calculateNextSchedule.test.ts": 345, + "apps/webapp/test/chat-snapshot-integration.test.ts": 2326, + "apps/webapp/test/clickhouseFactory.test.ts": 13885, + "apps/webapp/test/concurrentFlushScheduler.test.ts": 361, + "apps/webapp/test/createDeploymentWithNextVersion.test.ts": 17889, + "apps/webapp/test/detectbadJsonStrings.test.ts": 98, + "apps/webapp/test/environmentVariableDeduplication.test.ts": 3, + "apps/webapp/test/environmentVariableRules.test.ts": 3, + "apps/webapp/test/environmentVariablesEnvironments.test.ts": 10355, + "apps/webapp/test/environmentVariablesRepository.test.ts": 18320, + "apps/webapp/test/errorFingerprinting.test.ts": 16, + "apps/webapp/test/errorGroupWebhook.test.ts": 7, + "apps/webapp/test/fairDequeuingStrategy.test.ts": 5705, + "apps/webapp/test/findOrCreateBackgroundWorker.test.ts": 17442, + "apps/webapp/test/getDeploymentImageRef.test.ts": 7, + "apps/webapp/test/httpErrors.test.ts": 25, + "apps/webapp/test/marqsKeyProducer.test.ts": 7, + "apps/webapp/test/metadataRouteOperationsLogging.test.ts": 6, + "apps/webapp/test/mollifierApplyMetadataMutation.test.ts": 497, + "apps/webapp/test/mollifierClaimResolution.test.ts": 6, + "apps/webapp/test/mollifierDrainerHandler.test.ts": 21, + "apps/webapp/test/mollifierDrainerWorker.test.ts": 5, + "apps/webapp/test/mollifierDrainingGauge.test.ts": 449, + "apps/webapp/test/mollifierGate.test.ts": 16, + "apps/webapp/test/mollifierIdempotencyClaim.test.ts": 13, + "apps/webapp/test/mollifierMollify.test.ts": 5, + "apps/webapp/test/mollifierMutateWithFallback.test.ts": 18, + "apps/webapp/test/mollifierReadFallback.test.ts": 15, + "apps/webapp/test/mollifierReplayPayloadShape.test.ts": 2, + "apps/webapp/test/mollifierResetIdempotencyKey.test.ts": 9, + "apps/webapp/test/mollifierResolveRunForMutation.test.ts": 7, + "apps/webapp/test/mollifierStaleSweep.test.ts": 969, + "apps/webapp/test/mollifierSynthesiseFoundRun.test.ts": 5, + "apps/webapp/test/mollifierSyntheticApiResponses.test.ts": 5, + "apps/webapp/test/mollifierSyntheticRedirectInfo.test.ts": 805, + "apps/webapp/test/mollifierSyntheticReplayTaskRun.test.ts": 2, + "apps/webapp/test/mollifierSyntheticRunHeader.test.ts": 4, + "apps/webapp/test/mollifierSyntheticSpanRun.test.ts": 8, + "apps/webapp/test/mollifierSyntheticTrace.test.ts": 6, + "apps/webapp/test/mollifierTripEvaluator.test.ts": 621, + "apps/webapp/test/objectStore.test.ts": 15979, + "apps/webapp/test/organizationDataStoresRegistry.test.ts": 16732, + "apps/webapp/test/otlpExporter.test.ts": 13, + "apps/webapp/test/otlpUtf16Sanitization.integration.test.ts": 6540, + "apps/webapp/test/realtimeClient.test.ts": 1, + "apps/webapp/test/redisRealtimeStreams.test.ts": 6214, + "apps/webapp/test/registryConfig.test.ts": 652, + "apps/webapp/test/replay-after-crash.test.ts": 2233, + "apps/webapp/test/runsBackfiller.test.ts": 15478, + "apps/webapp/test/runsReplicationBenchmark.test.ts": 0, + "apps/webapp/test/runsRepository.part1.test.ts": 53000, + "apps/webapp/test/runsRepository.part2.test.ts": 57000, + "apps/webapp/test/sanitizeRowsOnParseError.test.ts": 8, + "apps/webapp/test/sentryTenantContext.test.ts": 5, + "apps/webapp/test/sentryTraceContext.server.test.ts": 12, + "apps/webapp/test/sessionDuration.test.ts": 18416, + "apps/webapp/test/sessionsReplicationService.test.ts": 30000, + "apps/webapp/test/shouldRevalidateRunsList.test.ts": 5, + "apps/webapp/test/slackErrorAlerts.test.ts": 0, + "apps/webapp/test/tenantContext.test.ts": 26, + "apps/webapp/test/tenantContextFromAuthEnvironment.test.ts": 2, + "apps/webapp/test/tenantContextResolver.test.ts": 19, + "apps/webapp/test/timeGranularity.test.ts": 3, + "apps/webapp/test/timelineSpanEvents.test.ts": 6, + "apps/webapp/test/updateMetadata.test.ts": 26380, + "apps/webapp/test/validateGitBranchName.test.ts": 7, + "apps/webapp/test/vercelUrls.test.ts": 3, + "apps/webapp/test/webhookErrorAlerts.test.ts": 5, + "apps/webapp/test/workerQueueSplit.test.ts": 3, + "apps/webapp/test/components/DateTime.test.ts": 24, + "apps/webapp/test/engine/batchPayloads.test.ts": 5018, + "apps/webapp/test/engine/streamBatchItems.test.ts": 45000, + "apps/webapp/test/engine/taskIdentifierRegistry.test.ts": 13152, + "apps/webapp/test/engine/triggerTask.test.ts": 31630, + "apps/webapp/test/presenters/mapRunToLiveFields.test.ts": 3, + "apps/webapp/test/services/organizationAccessToken.test.ts": 9, + "apps/webapp/test/services/personalAccessToken.test.ts": 8, + "apps/webapp/test/components/code/tsql/tsqlCompletion.test.ts": 10, + "apps/webapp/test/components/code/tsql/tsqlLinter.test.ts": 237, + "apps/webapp/test/components/runs/v3/RunTag.test.ts": 5, + "packages/trigger-sdk/test/chat-snapshot.test.ts": 22, + "packages/trigger-sdk/test/chatHandover.test.ts": 1658, + "packages/trigger-sdk/test/merge-by-id.test.ts": 9, + "packages/trigger-sdk/test/mockChatAgent.test.ts": 2254, + "packages/trigger-sdk/test/recovery-boot.test.ts": 671, + "packages/trigger-sdk/test/replay-session-in.test.ts": 12, + "packages/trigger-sdk/test/replay-session-out.test.ts": 40, + "packages/trigger-sdk/test/skill.test.ts": 16, + "packages/trigger-sdk/test/skillsRuntime.test.ts": 131, + "packages/trigger-sdk/test/wire-shape.test.ts": 15, + "packages/trigger-sdk/src/v3/chat-server.test.ts": 185, + "packages/trigger-sdk/src/v3/chat-tab-coordinator.test.ts": 14, + "packages/trigger-sdk/src/v3/chat.test.ts": 77, + "packages/trigger-sdk/src/v3/createStartSessionAction.test.ts": 5, + "packages/trigger-sdk/src/v3/sessions.test.ts": 31, + "packages/trigger-sdk/src/v3/shared.test.ts": 68, + "packages/trigger-sdk/src/v3/streams.test.ts": 6, + "packages/trigger-sdk/src/v3/triggerClient.test.ts": 66, + "packages/trigger-sdk/src/v3/triggerClient.types.test.ts": 11, + "packages/redis-worker/src/cron.test.ts": 27371, + "packages/redis-worker/src/queue.test.ts": 3435, + "packages/redis-worker/src/worker.test.ts": 32870, + "packages/redis-worker/src/mollifier/buffer.test.ts": 3091, + "packages/redis-worker/src/mollifier/drainer.test.ts": 8403, + "packages/redis-worker/src/fair-queue/tests/concurrency.test.ts": 1341, + "packages/redis-worker/src/fair-queue/tests/drr.test.ts": 1203, + "packages/redis-worker/src/fair-queue/tests/fairQueue.test.ts": 7728, + "packages/redis-worker/src/fair-queue/tests/raceConditions.test.ts": 14961, + "packages/redis-worker/src/fair-queue/tests/retry.test.ts": 10, + "packages/redis-worker/src/fair-queue/tests/tenantDispatch.test.ts": 5769, + "packages/redis-worker/src/fair-queue/tests/visibility.test.ts": 3783, + "packages/redis-worker/src/fair-queue/tests/workerQueue.test.ts": 1116, + "packages/core/test/duration.test.ts": 42, + "packages/core/test/errors.test.ts": 51, + "packages/core/test/eventFilterMatches.test.ts": 38, + "packages/core/test/externalSpanExporterWrapper.test.ts": 13, + "packages/core/test/flattenAttributes.test.ts": 59, + "packages/core/test/ioSerialization.test.ts": 306, + "packages/core/test/jumpHash.test.ts": 385, + "packages/core/test/mockTaskContext.test.ts": 61, + "packages/core/test/recordSpanException.test.ts": 65, + "packages/core/test/resourceCatalog.test.ts": 71, + "packages/core/test/runStream.test.ts": 245, + "packages/core/test/skillCatalog.test.ts": 17, + "packages/core/test/standardMetadataManager.test.ts": 456, + "packages/core/test/streamsWriterV1.test.ts": 84112, + "packages/core/test/taskExecutor.test.ts": 364, + "packages/core/test/utils.test.ts": 15, + "packages/core/src/v3/apiClient/runStream.test.ts": 1893, + "packages/core/src/v3/apiClient/streamBatchItems.test.ts": 278, + "packages/core/src/v3/build/flags.test.ts": 12, + "packages/core/src/v3/idempotency-key-catalog/lruIdempotencyKeyCatalog.test.ts": 21, + "packages/core/src/v3/machines/max-old-space.test.ts": 18, + "packages/core/src/v3/realtimeStreams/manager.test.ts": 28, + "packages/core/src/v3/realtimeStreams/streamsWriterV2.test.ts": 91, + "packages/core/src/v3/schemas/api-type.test.ts": 33, + "packages/core/src/v3/schemas/batchItemNDJSON.test.ts": 15, + "packages/core/src/v3/schemas/idempotencyKey.test.ts": 68, + "packages/core/src/v3/sessionStreams/manager.test.ts": 126, + "packages/core/src/v3/serverOnly/shutdownManager.test.ts": 57, + "packages/core/src/v3/taskContext/index.test.ts": 23, + "packages/core/src/v3/utils/reconnectBackoff.test.ts": 45, + "packages/core/src/v3/runEngineWorker/supervisor/consumerPool.test.ts": 123, + "packages/core/src/v3/runEngineWorker/supervisor/queueMetricsProcessor.test.ts": 98, + "packages/core/src/v3/runEngineWorker/supervisor/scalingStrategies.test.ts": 92, + "packages/schema-to-json/tests/index.test.ts": 17, + "internal-packages/run-engine/src/run-queue/index.test.ts": 82296, + "internal-packages/run-engine/src/batch-queue/tests/index.test.ts": 5462, + "internal-packages/run-engine/src/engine/tests/attemptFailures.test.ts": 35471, + "internal-packages/run-engine/src/engine/tests/batchTrigger.test.ts": 33127, + "internal-packages/run-engine/src/engine/tests/batchTriggerAndWait.test.ts": 33681, + "internal-packages/run-engine/src/engine/tests/batchTwoPhase.test.ts": 28159, + "internal-packages/run-engine/src/engine/tests/cancelling.test.ts": 26240, + "internal-packages/run-engine/src/engine/tests/checkpoints.test.ts": 29420, + "internal-packages/run-engine/src/engine/tests/createCancelledRun.test.ts": 31807, + "internal-packages/run-engine/src/engine/tests/createFailedTaskRun.test.ts": 23573, + "internal-packages/run-engine/src/engine/tests/debounce.test.ts": 58554, + "internal-packages/run-engine/src/engine/tests/delays.test.ts": 42748, + "internal-packages/run-engine/src/engine/tests/dequeuing.test.ts": 25542, + "internal-packages/run-engine/src/engine/tests/getSnapshotsSince.test.ts": 32158, + "internal-packages/run-engine/src/engine/tests/heartbeats.test.ts": 39634, + "internal-packages/run-engine/src/engine/tests/lazyWaitpoint.test.ts": 34757, + "internal-packages/run-engine/src/engine/tests/locking.test.ts": 51090, + "internal-packages/run-engine/src/engine/tests/pendingVersion.test.ts": 28762, + "internal-packages/run-engine/src/engine/tests/priority.test.ts": 28808, + "internal-packages/run-engine/src/engine/tests/trigger.test.ts": 30601, + "internal-packages/run-engine/src/engine/tests/triggerAndWait.test.ts": 28893, + "internal-packages/run-engine/src/engine/tests/ttl.test.ts": 61981, + "internal-packages/run-engine/src/engine/tests/waitpointRace.test.ts": 22855, + "internal-packages/run-engine/src/engine/tests/waitpoints.test.ts": 39521, + "internal-packages/run-engine/src/run-queue/tests/ack.test.ts": 3156, + "internal-packages/run-engine/src/run-queue/tests/ckCounters.test.ts": 16715, + "internal-packages/run-engine/src/run-queue/tests/ckIndex.test.ts": 8916, + "internal-packages/run-engine/src/run-queue/tests/concurrencySweeper.test.ts": 6379, + "internal-packages/run-engine/src/run-queue/tests/dequeueMessageFromWorkerQueue.test.ts": 66701, + "internal-packages/run-engine/src/run-queue/tests/enqueueMessage.test.ts": 9108, + "internal-packages/run-engine/src/run-queue/tests/fairQueueSelectionStrategy.test.ts": 7997, + "internal-packages/run-engine/src/run-queue/tests/keyProducer.test.ts": 19, + "internal-packages/run-engine/src/run-queue/tests/migrateLegacyMasterQueue.test.ts": 1728, + "internal-packages/run-engine/src/run-queue/tests/nack.test.ts": 6184, + "internal-packages/run-engine/src/run-queue/tests/releaseConcurrency.test.ts": 3018, + "internal-packages/run-engine/src/run-queue/tests/workerQueueResolver.test.ts": 38, + "internal-packages/cache/src/stores/lruMemory.test.ts": 44, + "internal-packages/schedule-engine/test/scheduleEngine.test.ts": 43000, + "internal-packages/schedule-engine/test/scheduleRecovery.test.ts": 17396, + "internal-packages/replication/src/client.test.ts": 31306, + "internal-packages/tsql/src/index.test.ts": 246, + "internal-packages/tsql/src/grammar/parser.test.ts": 150, + "internal-packages/tsql/src/query/escape.test.ts": 9, + "internal-packages/tsql/src/query/parser.test.ts": 368, + "internal-packages/tsql/src/query/printer.test.ts": 942, + "internal-packages/tsql/src/query/results.test.ts": 4, + "internal-packages/tsql/src/query/schema.test.ts": 18, + "internal-packages/tsql/src/query/security.test.ts": 487, + "internal-packages/tsql/src/query/time_buckets.test.ts": 5, + "internal-packages/tsql/src/query/validator.test.ts": 250, + "internal-packages/rbac/src/ability.test.ts": 6, + "internal-packages/rbac/src/loader.test.ts": 3, + "internal-packages/llm-model-catalog/src/registry.test.ts": 15, + "internal-packages/llm-model-catalog/src/sync.test.ts": 15852, + "internal-packages/clickhouse/src/taskRuns.test.ts": 6813, + "internal-packages/clickhouse/src/tsql.test.ts": 9021, + "internal-packages/clickhouse/src/tsqlFunctions.test.ts": 12971, + "internal-packages/clickhouse/src/client/client.test.ts": 9138, + "internal-packages/sdk-compat-tests/src/tests/bundler.test.ts": 348, + "internal-packages/sdk-compat-tests/src/tests/import.test.ts": 4742, + "apps/webapp/test/runsReplicationService.part1.test.ts": 74000, + "apps/webapp/test/runsReplicationService.part2.test.ts": 64000, + "apps/webapp/test/runsReplicationService.part3.test.ts": 30000, + "apps/webapp/test/runsReplicationService.part4.test.ts": 70000, + "apps/webapp/test/runsReplicationService.part5.test.ts": 43000, + "apps/webapp/test/runsReplicationService.part6.test.ts": 32000, + "apps/webapp/test/runsRepository.part3.test.ts": 43000, + "apps/webapp/test/runsRepository.part4.test.ts": 57000, + "apps/webapp/test/runsReplicationService.part7.test.ts": 43000, + "internal-packages/schedule-engine/test/scheduleEngine2.test.ts": 43000 +}