Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,13 @@ class ProjectBatchLockController(
logger.debug("Retrieving all project batch locks")

val locks = batchJobProjectLockingManager.getMap()
val lockModels = locks.map { (projectId, lockedJobId) ->
val lockStatus = when (lockedJobId) {
null -> LockStatus.UNINITIALIZED
0L -> LockStatus.UNLOCKED
val lockModels = locks.entries.map { (projectId, lockedJobIds) ->
val lockStatus = when {
lockedJobIds.isEmpty() -> LockStatus.UNLOCKED
else -> LockStatus.LOCKED
}

val jobInfo = if (lockedJobId != null && lockedJobId > 0L) {
val jobInfos = lockedJobIds.mapNotNull { lockedJobId ->
val jobDto = batchJobService.findJobDto(lockedJobId)
if (jobDto == null) {
logger.warn("Locked job $lockedJobId in project $projectId not found")
Expand All @@ -67,15 +66,13 @@ class ProjectBatchLockController(
createdAt = jobDto.createdAt
)
}
} else {
null
}

ProjectLockModel(
projectId = projectId,
lockedJobId = lockedJobId,
lockedJobIds = lockedJobIds,
lockStatus = lockStatus,
jobInfo = jobInfo
jobInfos = jobInfos
)
}

Expand Down Expand Up @@ -113,9 +110,9 @@ class ProjectBatchLockController(
*/
data class ProjectLockModel(
val projectId: Long,
val lockedJobId: Long?,
val lockedJobIds: Set<Long>,
val lockStatus: LockStatus,
val jobInfo: JobInfo?
val jobInfos: List<JobInfo>
)

/**
Expand All @@ -132,13 +129,10 @@ data class JobInfo(
* Status of the project lock
*/
enum class LockStatus {
/** Lock is explicitly cleared (value = 0L) */
/** Lock is explicitly cleared (value = empty set) */
UNLOCKED,

/** Lock has never been initialized (value = null) */
UNINITIALIZED,

/** Lock is held by a specific job (value = jobId) */
/** Lock is held by one or more jobs (value = set of job IDs) */
LOCKED
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.tolgee.api.v2.controllers.batch

import io.tolgee.batch.BatchJobProjectLockingManager
import io.tolgee.fixtures.andIsForbidden
import io.tolgee.fixtures.andIsOk
import io.tolgee.fixtures.waitFor
Expand All @@ -14,6 +15,8 @@ import kotlinx.coroutines.ensureActive
import org.junit.jupiter.api.AfterEach
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.ValueSource
import org.mockito.kotlin.*
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.boot.test.context.SpringBootTest
Expand All @@ -30,72 +33,91 @@ class BatchJobManagementControllerCancellationTest :
@Autowired
lateinit var stuckBatchJobTestUtil: StuckBatchJobTestUtil

@Autowired
lateinit var batchJobProjectLockingManager: BatchJobProjectLockingManager

@Autowired
lateinit var batchProperties: io.tolgee.configuration.tolgee.BatchProperties

var simulateLongRunningChunkRun = true
var originalProjectConcurrency = 1

@BeforeEach
fun setup() {
simulateLongRunningChunkRun = true
originalProjectConcurrency = batchProperties.projectConcurrency
}

@AfterEach
fun clean() {
simulateLongRunningChunkRun = false
batchProperties.projectConcurrency = originalProjectConcurrency
}

@Test
@ParameterizedTest
@ValueSource(ints = [1, 2])
@ProjectJWTAuthTestMethod
fun `cancels a job`() {
batchDumper.finallyDump {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is batchDumper removed here? It's really convinient for investigation in case of a test failure

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just maybe batchDumper should be extended with smth like

  fun <T> finallyDumpAll(fn: () -> T): T {
    return try {
      fn()
    } finally {
      getAllJobs().forEach { this.dump(it.id) }
    }
  }

  fun getAllJobs(): List<BatchJob> = entityManager.createQuery("""from BatchJob""", BatchJob::class.java).resultList

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

while playing with it i actually noticed that setting batchProperties.projectConcurrency = 2 doesn't work here. It must be passed as application properties in the @SpringBootTest(

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re batchDumper, I missed this, will have a look.

re batchProperties.projectConcurrency = 2 how did you notice it doesn't work? Spring will start it with its standard configuration, and then it is overridden before the test, and then returned to the original configuration after the tests.
As the checks against the properties in BatchJobProjectLockingManager are always done dynamically, it should work.
This approach allows "@ParameterizedTest" to be used. With spring test properties, it would require instantiating a different spring boot test context with different methods.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just pushed the requested batchDumper changes.

Unfortunately tests are all failing on my machine today (due to PG setup), but the change makes sense and it all compiles... Let's see if the CI agrees :D

@bdshadow Would you have any idea why the tests might be failing locally?
The build system seems to have launched the docker containers successfully

CONTAINER ID   IMAGE                           COMMAND                  CREATED       STATUS      PORTS                                           NAMES
ed02aeb8b7a6   postgres:16.3                   "docker-entrypoint.s…"   2 weeks ago   Up 3 days   0.0.0.0:55434->5432/tcp, [::]:55434->5432/tcp   tolgee_backend_tests_postgres_ee
bf2b3485c2f4   postgres:16.3                   "docker-entrypoint.s…"   2 weeks ago   Up 3 days   0.0.0.0:55433->5432/tcp, [::]:55433->5432/tcp   tolgee_backend_tests_postgres_main

val keys = testData.addTranslationOperationData(100)
saveAndPrepare()

val keyIds = keys.map { it.id }.toList()

val count = AtomicInteger(0)

doAnswer {
if (count.incrementAndGet() > 5) {
while (simulateLongRunningChunkRun) {
// this simulates long-running operation, which checks for active context
val context = it.arguments[2] as CoroutineContext
context.ensureActive()
Thread.sleep(10)
}
fun `cancels a job`(projectConcurrency: Int) {
val keys = testData.addTranslationOperationData(100)
saveAndPrepare()

batchProperties.projectConcurrency = projectConcurrency

val keyIds = keys.map { it.id }.toList()

val count = AtomicInteger(0)

doAnswer {
if (count.incrementAndGet() > 5) {
while (simulateLongRunningChunkRun) {
// this simulates long-running operation, which checks for active context
val context = it.arguments[2] as CoroutineContext
context.ensureActive()
Thread.sleep(10)
}
it.callRealMethod()
}.whenever(machineTranslationChunkProcessor).process(any(), any(), any(), any())

performProjectAuthPost(
"start-batch-job/machine-translate",
mapOf(
"keyIds" to keyIds,
"targetLanguageIds" to
listOf(
testData.projectBuilder.getLanguageByTag("cs")!!.self.id,
),
),
).andIsOk

waitFor {
batchJobConcurrentLauncher.runningJobs.size >= 5
}
it.callRealMethod()
}.whenever(machineTranslationChunkProcessor).process(any(), any(), any(), any())

val job = util.getSingleJob()
performProjectAuthPut("batch-jobs/${job.id}/cancel")
.andIsOk
performProjectAuthPost(
"start-batch-job/machine-translate",
mapOf(
"keyIds" to keyIds,
"targetLanguageIds" to
listOf(
testData.projectBuilder.getLanguageByTag("cs")!!.self.id,
),
),
).andIsOk

waitForNotThrowing(pollTime = 100) {
executeInNewTransaction {
util.getSingleJob().status.assert.isEqualTo(BatchJobStatus.CANCELLED)
verify(batchJobActivityFinalizer, times(1)).finalizeActivityWhenJobCompleted(any())
waitFor {
batchJobConcurrentLauncher.runningJobs.size >= 5
}

// assert activity stored
entityManager.createQuery("""from ActivityRevision ar where ar.batchJob.id = :id""")
.setParameter("id", job.id).resultList
.assert.hasSize(1)
}
val job = util.getSingleJob()

// Verify the job is locked
val lockedJobs = batchJobProjectLockingManager.getLockedForProject(testData.project.id)
lockedJobs.assert.contains(job.id)

performProjectAuthPut("batch-jobs/${job.id}/cancel")
.andIsOk

waitForNotThrowing(pollTime = 100) {
executeInNewTransaction {
util.getSingleJob().status.assert.isEqualTo(BatchJobStatus.CANCELLED)
verify(batchJobActivityFinalizer, times(1)).finalizeActivityWhenJobCompleted(any())

// assert activity stored
entityManager.createQuery("""from ActivityRevision ar where ar.batchJob.id = :id""")
.setParameter("id", job.id).resultList
.assert.hasSize(1)
}
}

// Verify the job lock was released
val lockedJobsAfterCancel = batchJobProjectLockingManager.getLockedForProject(testData.project.id)
lockedJobsAfterCancel.assert.doesNotContain(job.id)
}

@Test
Expand Down Expand Up @@ -135,4 +157,106 @@ class BatchJobManagementControllerCancellationTest :

util.getSingleJob().status.assert.isEqualTo(BatchJobStatus.CANCELLED)
}

@Test
@ProjectJWTAuthTestMethod
fun `cancels one of two running jobs with projectConcurrency=2`() {
val keys = testData.addTranslationOperationData(100)
saveAndPrepare()

batchProperties.projectConcurrency = 2

val keyIds = keys.map { it.id }.toList()

val firstJobCount = AtomicInteger(0)
val secondJobCount = AtomicInteger(0)

doAnswer {
val jobId = it.getArgument<io.tolgee.batch.data.BatchJobDto>(0).id
val allJobs = executeInNewTransaction {
batchJobService.getAllByProjectId(testData.project.id)
}
val isFirstJob = allJobs.size > 0 && jobId == allJobs[0].id

val counter = if (isFirstJob) firstJobCount else secondJobCount

if (counter.incrementAndGet() > 5) {
while (simulateLongRunningChunkRun) {
// this simulates long-running operation, which checks for active context
val context = it.arguments[2] as CoroutineContext
context.ensureActive()
Thread.sleep(10)
}
}
it.callRealMethod()
}.whenever(machineTranslationChunkProcessor).process(any(), any(), any(), any())
Comment on lines +176 to +194
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

Ensure deterministic job ordering in the test.

The mock logic at line 181 assumes allJobs[0] is consistently the "first" job created, but getAllByProjectId may lack explicit ordering, leading to non-deterministic results. If the query does not guarantee order (e.g., by ID or creation timestamp), the test could be flaky.

Verify that getAllByProjectId orders jobs deterministically. If not, consider:

  1. Adding explicit ordering to the query, or
  2. Identifying jobs by storing the returned job IDs from the POST responses (lines 197-206 and 213-222) and using those IDs directly instead of relying on array index.

Example approach using stored IDs:

// After line 206
val firstJobId = performProjectAuthPost(...)
  .andIsOk.andReturn().response.contentAsString.let { 
    // parse job ID from response 
  }

Then reference firstJobId directly in the mock logic instead of checking allJobs[0].id.

Also applies to: 228-230


🏁 Script executed:

#!/bin/bash
# Find BatchJobService and its getAllByProjectId method
ast-grep --pattern 'fun getAllByProjectId($_, $_) {
  $$$
}'

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Search for getAllByProjectId usages and definitions in the codebase
rg -nP --type=kotlin 'getAllByProjectId' -B3 -A8

Length of output: 25551


🏁 Script executed:

#!/bin/bash
# Examine the test file to understand job ID storage and mock usage
cat backend/app/src/test/kotlin/io/tolgee/api/v2/controllers/batch/BatchJobManagementControllerCancellationTest.kt | head -250

Length of output: 7613


🏁 Script executed:

#!/bin/bash
# Find BatchJobRepository and its findAllByProjectId method
rg -nP --type=kotlin 'class BatchJobRepository|fun findAllByProjectId' -B2 -A10 | head -100

Length of output: 10377


🏁 Script executed:

#!/bin/bash
# Search more specifically for the repository definition and query
find . -name "BatchJobRepository.kt" -type f -exec cat {} \;

Length of output: 3402


🏁 Script executed:

#!/bin/bash
# Check the POST response to see if job IDs are returned
grep -n "performProjectAuthPost" backend/app/src/test/kotlin/io/tolgee/api/v2/controllers/batch/BatchJobManagementControllerCancellationTest.kt | head -20

Length of output: 96


🏁 Script executed:

#!/bin/bash
# Look at the batch job controller to see what the POST response contains
find . -path "*/controller/*BatchJob*Controller.kt" -type f | head -5

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check what util.getSingleJob() does and if responses contain job IDs
grep -n "getSingleJob\|startBatchJob" backend/app/src/test/kotlin/io/tolgee/api/v2/controllers/batch/*.kt | head -20

Length of output: 1243


🏁 Script executed:

#!/bin/bash
# Find the abstract test class
find . -name "AbstractBatchJobManagementControllerTest.kt" -type f -exec cat {} \;

Length of output: 2604


🏁 Script executed:

#!/bin/bash
# Search for the util field initialization and getSingleJob implementation
grep -n "class.*util\|val util\|fun getSingleJob" backend/app/src/test/kotlin/io/tolgee/api/v2/controllers/batch/*.kt -A5

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Check what the POST response looks like (check controller or DTO)
rg -nP --type=kotlin 'BatchJobDto|BatchJobResponse' backend/app/src/main/kotlin --type=kotlin -C3 | head -50

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Find BatchJobTestUtil
find . -name "BatchJobTestUtil.kt" -type f -exec cat {} \;

Length of output: 15364


🏁 Script executed:

#!/bin/bash
# Search for BatchJobTestUtil and its methods
rg -nP --type=kotlin 'class BatchJobTestUtil|fun getSingleJob' -A10

Length of output: 3610


🏁 Script executed:

#!/bin/bash
# Check the batch job data models to see what fields are available
find . -path "*/batch/data/*" -name "*.kt" -type f | head -10

Length of output: 668


🏁 Script executed:

#!/bin/bash
# Find the batch job controller endpoint for POST machine-translate
rg -nP --type=kotlin '"start-batch-job"' -B5 -A15 | head -80

Length of output: 0


🏁 Script executed:

#!/bin/bash
# Look for BatchJobDto to see if it contains ID
cat backend/data/src/main/kotlin/io/tolgee/batch/data/BatchJobDto.kt

Length of output: 1702


Deterministic job identification needed—getAllByProjectId lacks explicit ordering.

The mock logic assumes allJobs[0] is consistently the first job, but getAllByProjectId has no @query annotation, so it generates a query without ORDER BY. This makes result ordering non-deterministic and could cause the test to fail or behave unpredictably.

Store and use job IDs from the POST responses (lines 197-206, 213-222) instead of relying on array indices at lines 181, 236-237. Since BatchJobDto exposes an id field, extract and store the returned IDs:

val firstJobIdResponse = performProjectAuthPost(
  "start-batch-job/machine-translate",
  mapOf(...)
).andIsOk.andReturn().response.contentAsString
// Parse firstJobId from JSON response

Then reference stored IDs directly in the mock and assertions instead of allJobs[0].id.


// Start first job
performProjectAuthPost(
"start-batch-job/machine-translate",
mapOf(
"keyIds" to keyIds,
"targetLanguageIds" to
listOf(
testData.projectBuilder.getLanguageByTag("cs")!!.self.id,
),
),
).andIsOk

waitFor(timeout = 5000) {
batchJobConcurrentLauncher.runningJobs.size >= 5
}

// Start second job
performProjectAuthPost(
"start-batch-job/machine-translate",
mapOf(
"keyIds" to keyIds,
"targetLanguageIds" to
listOf(
testData.projectBuilder.getLanguageByTag("cs")!!.self.id,
),
),
).andIsOk

waitFor(timeout = 5000) {
batchJobConcurrentLauncher.runningJobs.size >= 10
}

val jobs = executeInNewTransaction {
batchJobService.getAllByProjectId(testData.project.id)
}
jobs.size.assert.isEqualTo(2)

// Verify both jobs are locked
val lockedJobs = batchJobProjectLockingManager.getLockedForProject(testData.project.id)
lockedJobs.size.assert.isEqualTo(2)
lockedJobs.assert.contains(jobs[0].id)
lockedJobs.assert.contains(jobs[1].id)

// Cancel the first job
val firstJob = jobs[0]
performProjectAuthPut("batch-jobs/${firstJob.id}/cancel")
.andIsOk

waitForNotThrowing(pollTime = 100) {
executeInNewTransaction {
batchJobService.getJobDto(firstJob.id).status.assert.isEqualTo(BatchJobStatus.CANCELLED)
}
}

// Verify the first job lock was released but second job is still locked
val lockedJobsAfterCancel = batchJobProjectLockingManager.getLockedForProject(testData.project.id)
lockedJobsAfterCancel.assert.doesNotContain(firstJob.id)
lockedJobsAfterCancel.assert.contains(jobs[1].id)

// Let the second job complete
simulateLongRunningChunkRun = false

waitForNotThrowing(pollTime = 100, timeout = 10000) {
executeInNewTransaction {
batchJobService.getJobDto(jobs[1].id).status.assert.isEqualTo(BatchJobStatus.SUCCESS)
}
}
}
}
Loading
Loading