From e460105c854d25be9fec5ba4fb4cc2e15fc73024 Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 09:54:30 +0530 Subject: [PATCH 01/12] Fix flaky lifecycle/artifactory project tests on shared JPD Intermittent "Project not found" failures occur when lifecycle and artifactory test suites run concurrently against the same JFrog Platform instance. createTestProject() calls deleteProjectIfExists(tests.ProjectKey) unconditionally before creating the project, so if two suites resolve to the same ProjectKey one will silently delete the other's project (and every release bundle inside it). utils/tests/utils.go: - Add SanitizedCiRunId() helper that converts --ci.runId to a valid project-key string (lowercase, non-alphanumeric chars replaced with hyphens, leading/trailing hyphens trimmed). - Splice the sanitized runId into the test ProjectKey so concurrent suites get distinct keys (e.g. "prjlinux-lifecycle-1781784930" vs "prjlinux-artifactory-1781784935"). Previously both suites resolved to the same 7-digit-suffixed key. - Use the full 10-digit Unix timestamp instead of the last 7 digits, giving 1000x more distinct values and making same-second collisions between independent CI runs effectively impossible. Co-authored-by: Cursor --- utils/tests/utils.go | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/utils/tests/utils.go b/utils/tests/utils.go index 4bacf7ffc..dfdd7bb1e 100644 --- a/utils/tests/utils.go +++ b/utils/tests/utils.go @@ -88,6 +88,28 @@ var ( timestampAdded bool ) +// nonProjectKeyCharsRegex matches any character that isn't allowed in an Artifactory +// project key (project keys allow only lowercase alphanumeric characters and hyphens). +// We use this to sanitize the --ci.runId value before splicing it into resource names +// whose format is constrained (project keys, GPG keypair names, etc.). Project-key +// charset is a strict subset of the GPG keypair charset, so a single sanitization is +// safe for both. +var nonProjectKeyCharsRegex = regexp.MustCompile(`[^a-z0-9-]+`) + +// SanitizedCiRunId returns the --ci.runId flag value lowercased with any characters +// outside [a-z0-9-] collapsed to a single hyphen and surrounding hyphens trimmed. +// Returns "" if the flag wasn't set. Callers that need a per-runId suffix on +// resources whose name format is constrained (e.g. Artifactory project keys, GPG +// keypair names) should use this so concurrent runs against a shared JPD don't +// clobber each other. +func SanitizedCiRunId() string { + if ciRunId == nil || *ciRunId == "" { + return "" + } + sanitized := nonProjectKeyCharsRegex.ReplaceAllString(strings.ToLower(*ciRunId), "-") + return strings.Trim(sanitized, "-") +} + func init() { JfrogUrl = flag.String("jfrog.url", "http://localhost:8081/", "JFrog platform url") JfrogUser = flag.String("jfrog.user", "admin", "JFrog platform username") @@ -641,7 +663,25 @@ func AddTimestampToGlobalVars() { Password2 += uniqueSuffix + strconv.FormatFloat(randomSequence.Float64(), 'f', 2, 32) // Projects - ProjectKey += timestamp[len(timestamp)-7:] + // Artifactory project keys must be 2-32 lowercase alphanumeric or hyphen + // characters and must start with a letter. We always include the sanitized + // --ci.runId (when set) so that concurrent runs against a shared JPD don't + // clobber each other's project — createTestProject calls + // deleteProjectIfExists(tests.ProjectKey) unconditionally, which means a + // colliding key from another concurrent suite will silently delete the + // project (and every release bundle inside it) out from under us. + projectSuffix := timestamp + if sanitizedRunId := SanitizedCiRunId(); sanitizedRunId != "" { + projectSuffix = sanitizedRunId + "-" + projectSuffix + } + // ProjectKey starts as "prj" (3 chars), and the total must be <= 32. Trim + // from the front so the trailing timestamp (used for visual debuggability) + // is preserved and we don't end up with a key that starts with a hyphen. + const maxProjectKeyLen = 32 + if maxSuffixLen := maxProjectKeyLen - len(ProjectKey); len(projectSuffix) > maxSuffixLen { + projectSuffix = strings.TrimLeft(projectSuffix[len(projectSuffix)-maxSuffixLen:], "-") + } + ProjectKey += projectSuffix timestampAdded = true } From f0caf9b9787d6878059ea877fba4a5ecb4c36607 Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 10:44:18 +0530 Subject: [PATCH 02/12] fix(lifecycle): wait for project visibility across all HA nodes before using it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After createTestProject() succeeds, each Artifactory node behind the load balancer may still have a stale in-memory cache of Access project data. This causes intermittent 400 "Project not found" on build-publish and release-bundle creation when a request is routed to a node whose cache has not yet refreshed. Add waitForProjectInArtifactory() which polls GET /api/repositories/-build-info (the repo Artifactory auto-creates per project) and requires 5 consecutive 200 responses before proceeding. Requiring consecutive successes — not just one — ensures that every node in a round-robin pool has warmed its cache, eliminating the intermittent failures seen in TestReleaseBundlesSearchVersions, TestReleaseBundleCreationFromMultiBundlesUsingCommandFlagWithProject and TestCreateBundleWithoutSpecAndWithProject. Co-authored-by: Cursor --- lifecycle_test.go | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/lifecycle_test.go b/lifecycle_test.go index 7d58a229b..8d42c527b 100644 --- a/lifecycle_test.go +++ b/lifecycle_test.go @@ -163,6 +163,7 @@ func TestReleaseBundleCreationFromMultiBundlesUsingCommandFlagWithProject(t *tes } }() } + waitForProjectInArtifactory(t, tests.ProjectKey) lcManager := getLcServiceManager(t) deleteBuilds := uploadBuildsWithProject(t) @@ -724,6 +725,7 @@ func TestCreateBundleWithoutSpecAndWithProject(t *testing.T) { } }() } + waitForProjectInArtifactory(t, tests.ProjectKey) lcManager := getLcServiceManager(t) deleteBuilds := uploadBuildsWithProject(t) defer deleteBuilds() @@ -1424,6 +1426,7 @@ func TestReleaseBundlesSearchVersions(t *testing.T) { } }() } + waitForProjectInArtifactory(t, tests.ProjectKey) deleteBuildsWithProject := uploadBuildsWithProject(t) defer deleteBuildsWithProject() @@ -1765,3 +1768,37 @@ type KeyPairPayload struct { PublicKey string `json:"publicKey,omitempty"` PrivateKey string `json:"privateKey,omitempty"` // #nosec G117 -- test struct, not a real secret } + +// waitForProjectInArtifactory polls until the project's build-info repository is visible +// on every Artifactory node behind the load balancer. In HA deployments each node has its +// own in-memory cache of Access project data; a single 200 only confirms one node is warm. +// We require consecutiveRequired back-to-back 200 responses (one per poll tick, each +// potentially hitting a different node in round-robin) before proceeding. +func waitForProjectInArtifactory(t *testing.T, projectKey string) { + const ( + timeout = 30 * time.Second + pollInterval = 1 * time.Second + consecutiveRequired = 5 + ) + client, err := httpclient.ClientBuilder().Build() + if !assert.NoError(t, err) { + return + } + // Artifactory auto-creates -build-info when the project is registered. + probeURL := serverDetails.ArtifactoryUrl + "api/repositories/" + projectKey + "-build-info" + deadline := time.Now().Add(timeout) + consecutive := 0 + for time.Now().Before(deadline) { + resp, _, _, probErr := client.SendGet(probeURL, true, artHttpDetails, "") + if probErr == nil && resp.StatusCode == http.StatusOK { + consecutive++ + if consecutive >= consecutiveRequired { + return + } + } else { + consecutive = 0 + } + time.Sleep(pollInterval) + } + t.Logf("waitForProjectInArtifactory: project %q did not become fully visible within %s; proceeding anyway", projectKey, timeout) +} From 4867c8eeff3a97465258dfe959f3c939f105e96b Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 10:57:49 +0530 Subject: [PATCH 03/12] fix(lifecycle): use Access API as project probe, sleep for Artifactory sync The correct API to check project existence is GET /access/api/v1/projects/. Poll until Access returns 200, then sleep 5s for Artifactory's internal project cache to sync before build-publish or release-bundle calls that scope to the project. Co-authored-by: Cursor --- lifecycle_test.go | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/lifecycle_test.go b/lifecycle_test.go index 8d42c527b..f650ea27c 100644 --- a/lifecycle_test.go +++ b/lifecycle_test.go @@ -1769,36 +1769,31 @@ type KeyPairPayload struct { PrivateKey string `json:"privateKey,omitempty"` // #nosec G117 -- test struct, not a real secret } -// waitForProjectInArtifactory polls until the project's build-info repository is visible -// on every Artifactory node behind the load balancer. In HA deployments each node has its -// own in-memory cache of Access project data; a single 200 only confirms one node is warm. -// We require consecutiveRequired back-to-back 200 responses (one per poll tick, each -// potentially hitting a different node in round-robin) before proceeding. +// waitForProjectInArtifactory first confirms the project is visible in the Access service, +// then waits for Artifactory's internal project cache to sync from Access. The Access API +// (GET /access/api/v1/projects/) is the correct source of truth for project existence. +// After Access confirms the project, we sleep briefly to allow Artifactory nodes to pick up +// the new project before any build-publish or release-bundle calls that scope to the project. func waitForProjectInArtifactory(t *testing.T, projectKey string) { const ( - timeout = 30 * time.Second - pollInterval = 1 * time.Second - consecutiveRequired = 5 + accessTimeout = 30 * time.Second + pollInterval = 500 * time.Millisecond + artifactoryTTL = 5 * time.Second ) client, err := httpclient.ClientBuilder().Build() if !assert.NoError(t, err) { return } - // Artifactory auto-creates -build-info when the project is registered. - probeURL := serverDetails.ArtifactoryUrl + "api/repositories/" + projectKey + "-build-info" - deadline := time.Now().Add(timeout) - consecutive := 0 + probeURL := *tests.JfrogUrl + "access/api/v1/projects/" + projectKey + deadline := time.Now().Add(accessTimeout) for time.Now().Before(deadline) { resp, _, _, probErr := client.SendGet(probeURL, true, artHttpDetails, "") if probErr == nil && resp.StatusCode == http.StatusOK { - consecutive++ - if consecutive >= consecutiveRequired { - return - } - } else { - consecutive = 0 + // Project confirmed in Access; give Artifactory time to sync its cache. + time.Sleep(artifactoryTTL) + return } time.Sleep(pollInterval) } - t.Logf("waitForProjectInArtifactory: project %q did not become fully visible within %s; proceeding anyway", projectKey, timeout) + t.Logf("waitForProjectInArtifactory: project %q not visible in Access within %s; proceeding anyway", projectKey, accessTimeout) } From 68624b8cbed14f6bb799ef3cfa31bb1e90c678e0 Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 10:59:50 +0530 Subject: [PATCH 04/12] fix(lifecycle): increase Artifactory project sync wait to 30s Co-authored-by: Cursor --- lifecycle_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifecycle_test.go b/lifecycle_test.go index f650ea27c..bcd6f4308 100644 --- a/lifecycle_test.go +++ b/lifecycle_test.go @@ -1778,7 +1778,7 @@ func waitForProjectInArtifactory(t *testing.T, projectKey string) { const ( accessTimeout = 30 * time.Second pollInterval = 500 * time.Millisecond - artifactoryTTL = 5 * time.Second + artifactoryTTL = 30 * time.Second ) client, err := httpclient.ClientBuilder().Build() if !assert.NoError(t, err) { From 3356ad712c927a26865d2df254e71d480f4dadc2 Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 11:04:07 +0530 Subject: [PATCH 05/12] fix: move project wait into createTestProject and cover artifactory tests Instead of scattering waitForProjectInArtifactory at each call site, call it inside createTestProject so every caller gets the 30s sync wait for free. Also add the wait to the two artifactory inline tests (TestArtifactoryDownloadByBuildUsingSimpleDownloadWithProject and TestArtifactoryDownloadWithEnvProject) which inline their own project creation and had the same race against Artifactory cache sync. Co-authored-by: Cursor --- artifactory_test.go | 2 ++ lifecycle_test.go | 4 ---- transfer_test.go | 1 + 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/artifactory_test.go b/artifactory_test.go index 9d5bd245b..710690f14 100644 --- a/artifactory_test.go +++ b/artifactory_test.go @@ -3672,6 +3672,7 @@ func TestArtifactoryDownloadByBuildUsingSimpleDownloadWithProject(t *testing.T) // Assign the repository to the project err = accessManager.AssignRepoToProject(tests.RtRepo1, tests.ProjectKey, true) assert.NoError(t, err) + waitForProjectInArtifactory(t, tests.ProjectKey) // Delete the build if exists inttestutils.DeleteBuild(serverDetails.ArtifactoryUrl, tests.RtBuildName1, artHttpDetails) @@ -3724,6 +3725,7 @@ func TestArtifactoryDownloadWithEnvProject(t *testing.T) { // Assign the repository to the project err = accessManager.AssignRepoToProject(tests.RtRepo1, tests.ProjectKey, true) assert.NoError(t, err) + waitForProjectInArtifactory(t, tests.ProjectKey) // Delete the build if exists inttestutils.DeleteBuild(serverDetails.ArtifactoryUrl, tests.RtBuildName1, artHttpDetails) diff --git a/lifecycle_test.go b/lifecycle_test.go index bcd6f4308..9d3128d2b 100644 --- a/lifecycle_test.go +++ b/lifecycle_test.go @@ -163,7 +163,6 @@ func TestReleaseBundleCreationFromMultiBundlesUsingCommandFlagWithProject(t *tes } }() } - waitForProjectInArtifactory(t, tests.ProjectKey) lcManager := getLcServiceManager(t) deleteBuilds := uploadBuildsWithProject(t) @@ -725,7 +724,6 @@ func TestCreateBundleWithoutSpecAndWithProject(t *testing.T) { } }() } - waitForProjectInArtifactory(t, tests.ProjectKey) lcManager := getLcServiceManager(t) deleteBuilds := uploadBuildsWithProject(t) defer deleteBuilds() @@ -1426,8 +1424,6 @@ func TestReleaseBundlesSearchVersions(t *testing.T) { } }() } - waitForProjectInArtifactory(t, tests.ProjectKey) - deleteBuildsWithProject := uploadBuildsWithProject(t) defer deleteBuildsWithProject() diff --git a/transfer_test.go b/transfer_test.go index c972442b8..eb39f5c92 100644 --- a/transfer_test.go +++ b/transfer_test.go @@ -559,6 +559,7 @@ func createTestProject(t *testing.T) func() error { } if assert.NoError(t, accessManager.CreateProject(accessServices.ProjectParams{ProjectDetails: projectDetails})) { + waitForProjectInArtifactory(t, tests.ProjectKey) return func() error { return accessManager.DeleteProject(tests.ProjectKey) } From 7ce606ca9331a4c02ba46ceb2075ed6eb73c5a62 Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 11:35:28 +0530 Subject: [PATCH 06/12] fix(lifecycle): retry project-scoped ops on cache propagation delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace unreliable fixed sleep with targeted retry logic. After a project is created via the Access API, Artifactory and Lifecycle services take 35-40s (sometimes longer on HA nodes) to sync their internal project cache. retryOnProjectNotFound() wraps any project-scoped operation and retries up to 12 times with 5s intervals (max 60s) when the response contains 'not found' or 'project key' — the exact errors from both Artifactory's build-publish API and Lifecycle's release-bundle creation API. Applied to: - uploadBuildWithArtifactsAndProject: retries jf rt build-publish - uploadBuildWithDepsAndProject: retries jf rt build-publish - createRbWithFlags: retries jf rbc waitForProjectInArtifactory is kept but simplified to only confirm project creation in Access (no more fixed sleep). Co-authored-by: Cursor --- lifecycle_test.go | 52 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/lifecycle_test.go b/lifecycle_test.go index 9d3128d2b..958f3c0c6 100644 --- a/lifecycle_test.go +++ b/lifecycle_test.go @@ -767,7 +767,9 @@ func createRbWithFlags(t *testing.T, specFilePath, sourceOption, buildName, buil argsAndOptions = append(argsAndOptions, getOption(cliutils.Draft, "true")) } - assert.NoError(t, lcCli.Exec(argsAndOptions...)) + assert.NoError(t, retryOnProjectNotFound(func() error { + return lcCli.Exec(argsAndOptions...) + })) } func updateRbWithFlags(t *testing.T, specFilePath, rbName, rbVersion, project, sourceTypeBuilds string, sync bool) { @@ -1642,7 +1644,9 @@ func uploadBuildWithArtifactsAndProject(t *testing.T, specFileName, buildName, b assert.NoError(t, err) runRt(t, "upload", "--spec="+specFile, "--build-name="+buildName, "--build-number="+buildNumber, "--project="+projectKey) - runRt(t, "build-publish", buildName, buildNumber, "--project="+projectKey) + assert.NoError(t, retryOnProjectNotFound(func() error { + return artifactoryCli.Exec("build-publish", buildName, buildNumber, "--project="+projectKey) + })) } func uploadBuildWithDepsAndProject(t *testing.T, buildName, buildNumber, projectKey string) { @@ -1655,7 +1659,9 @@ func uploadBuildWithDepsAndProject(t *testing.T, buildName, buildNumber, project runRt(t, "upload", randFile.Name(), tests.RtDevRepo, "--flat", "--project="+projectKey) assert.NoError(t, lcCli.WithoutCredentials().Exec("rt", "bad", buildName, buildNumber, tests.RtDevRepo+"/dep-file", "--from-rt")) - runRt(t, "build-publish", buildName, buildNumber, "--project="+projectKey) + assert.NoError(t, retryOnProjectNotFound(func() error { + return artifactoryCli.Exec("build-publish", buildName, buildNumber, "--project="+projectKey) + })) } func initLifecycleTest(t *testing.T, minVersion string) (cleanCallback func()) { @@ -1765,16 +1771,13 @@ type KeyPairPayload struct { PrivateKey string `json:"privateKey,omitempty"` // #nosec G117 -- test struct, not a real secret } -// waitForProjectInArtifactory first confirms the project is visible in the Access service, -// then waits for Artifactory's internal project cache to sync from Access. The Access API -// (GET /access/api/v1/projects/) is the correct source of truth for project existence. -// After Access confirms the project, we sleep briefly to allow Artifactory nodes to pick up -// the new project before any build-publish or release-bundle calls that scope to the project. +// waitForProjectInArtifactory polls the Access API until the project is confirmed created. +// It does NOT sleep for cache propagation — project-scoped operations use retryOnProjectNotFound +// to handle the Access→Artifactory and Access→Lifecycle cache sync delay at the call site. func waitForProjectInArtifactory(t *testing.T, projectKey string) { const ( - accessTimeout = 30 * time.Second - pollInterval = 500 * time.Millisecond - artifactoryTTL = 30 * time.Second + accessTimeout = 30 * time.Second + pollInterval = 500 * time.Millisecond ) client, err := httpclient.ClientBuilder().Build() if !assert.NoError(t, err) { @@ -1785,11 +1788,34 @@ func waitForProjectInArtifactory(t *testing.T, projectKey string) { for time.Now().Before(deadline) { resp, _, _, probErr := client.SendGet(probeURL, true, artHttpDetails, "") if probErr == nil && resp.StatusCode == http.StatusOK { - // Project confirmed in Access; give Artifactory time to sync its cache. - time.Sleep(artifactoryTTL) return } time.Sleep(pollInterval) } t.Logf("waitForProjectInArtifactory: project %q not visible in Access within %s; proceeding anyway", projectKey, accessTimeout) } + +// retryOnProjectNotFound retries fn when Artifactory or Lifecycle reports that a project is not +// yet visible — a transient condition caused by the Access→service cache propagation delay in HA +// deployments. Up to maxRetries attempts are made with retryInterval between each attempt. +func retryOnProjectNotFound(fn func() error) error { + const ( + maxRetries = 12 + retryInterval = 5 * time.Second + ) + var err error + for attempt := 0; attempt < maxRetries; attempt++ { + err = fn() + if err == nil { + return nil + } + errMsg := err.Error() + if !strings.Contains(errMsg, "not found") && !strings.Contains(errMsg, "project key") { + return err + } + if attempt < maxRetries-1 { + time.Sleep(retryInterval) + } + } + return err +} From e568ad58d49d1b3252e3b2aff08c7b0a5411286a Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 11:39:20 +0530 Subject: [PATCH 07/12] fix: simplify to 5 retries x 30s, remove waitForProjectInArtifactory Replace the unreliable sleep-based wait with clean retry logic: - retryOnProjectNotFound: 5 attempts, 30s between each (2.5min max) - Removed waitForProjectInArtifactory and all call sites - Artifactory build-publish retry now also applied directly in the two artifactory project tests Co-authored-by: Cursor --- artifactory_test.go | 14 ++++++++------ lifecycle_test.go | 28 ++-------------------------- transfer_test.go | 1 - 3 files changed, 10 insertions(+), 33 deletions(-) diff --git a/artifactory_test.go b/artifactory_test.go index 710690f14..41525bab8 100644 --- a/artifactory_test.go +++ b/artifactory_test.go @@ -3672,7 +3672,6 @@ func TestArtifactoryDownloadByBuildUsingSimpleDownloadWithProject(t *testing.T) // Assign the repository to the project err = accessManager.AssignRepoToProject(tests.RtRepo1, tests.ProjectKey, true) assert.NoError(t, err) - waitForProjectInArtifactory(t, tests.ProjectKey) // Delete the build if exists inttestutils.DeleteBuild(serverDetails.ArtifactoryUrl, tests.RtBuildName1, artHttpDetails) @@ -3684,8 +3683,10 @@ func TestArtifactoryDownloadByBuildUsingSimpleDownloadWithProject(t *testing.T) // Upload files with buildName, buildNumber and project flags runRt(t, "upload", "--spec="+specFileB, "--build-name="+tests.RtBuildName1, "--build-number="+buildNumberA, "--project="+tests.ProjectKey) - // Publish buildInfo with project flag - runRt(t, "build-publish", tests.RtBuildName1, buildNumberA, "--project="+tests.ProjectKey) + // Publish buildInfo with project flag — retried automatically if project cache not yet warm + assert.NoError(t, retryOnProjectNotFound(func() error { + return artifactoryCli.Exec("build-publish", tests.RtBuildName1, buildNumberA, "--project="+tests.ProjectKey) + })) // Download by project, b1 should be downloaded runRt(t, "download", tests.RtRepo1+"/data/b1.in", filepath.Join(tests.Out, "download", "simple_by_build")+fileutils.GetFileSeparator(), @@ -3725,7 +3726,6 @@ func TestArtifactoryDownloadWithEnvProject(t *testing.T) { // Assign the repository to the project err = accessManager.AssignRepoToProject(tests.RtRepo1, tests.ProjectKey, true) assert.NoError(t, err) - waitForProjectInArtifactory(t, tests.ProjectKey) // Delete the build if exists inttestutils.DeleteBuild(serverDetails.ArtifactoryUrl, tests.RtBuildName1, artHttpDetails) @@ -3742,8 +3742,10 @@ func TestArtifactoryDownloadWithEnvProject(t *testing.T) { // Upload files with buildName, buildNumber and project flags runRt(t, "upload", "--spec="+specFileB) - // Publish buildInfo with project flag - runRt(t, "build-publish") + // Publish buildInfo with project flag — retried automatically if project cache not yet warm + assert.NoError(t, retryOnProjectNotFound(func() error { + return artifactoryCli.Exec("build-publish") + })) // Download by project, b1 should be downloaded runRt(t, "download", tests.RtRepo1+"/data/b1.in", filepath.Join(tests.Out, "download", "simple_by_build")+fileutils.GetFileSeparator(), diff --git a/lifecycle_test.go b/lifecycle_test.go index 958f3c0c6..e56ed0543 100644 --- a/lifecycle_test.go +++ b/lifecycle_test.go @@ -1771,37 +1771,13 @@ type KeyPairPayload struct { PrivateKey string `json:"privateKey,omitempty"` // #nosec G117 -- test struct, not a real secret } -// waitForProjectInArtifactory polls the Access API until the project is confirmed created. -// It does NOT sleep for cache propagation — project-scoped operations use retryOnProjectNotFound -// to handle the Access→Artifactory and Access→Lifecycle cache sync delay at the call site. -func waitForProjectInArtifactory(t *testing.T, projectKey string) { - const ( - accessTimeout = 30 * time.Second - pollInterval = 500 * time.Millisecond - ) - client, err := httpclient.ClientBuilder().Build() - if !assert.NoError(t, err) { - return - } - probeURL := *tests.JfrogUrl + "access/api/v1/projects/" + projectKey - deadline := time.Now().Add(accessTimeout) - for time.Now().Before(deadline) { - resp, _, _, probErr := client.SendGet(probeURL, true, artHttpDetails, "") - if probErr == nil && resp.StatusCode == http.StatusOK { - return - } - time.Sleep(pollInterval) - } - t.Logf("waitForProjectInArtifactory: project %q not visible in Access within %s; proceeding anyway", projectKey, accessTimeout) -} - // retryOnProjectNotFound retries fn when Artifactory or Lifecycle reports that a project is not // yet visible — a transient condition caused by the Access→service cache propagation delay in HA // deployments. Up to maxRetries attempts are made with retryInterval between each attempt. func retryOnProjectNotFound(fn func() error) error { const ( - maxRetries = 12 - retryInterval = 5 * time.Second + maxRetries = 5 + retryInterval = 30 * time.Second ) var err error for attempt := 0; attempt < maxRetries; attempt++ { diff --git a/transfer_test.go b/transfer_test.go index eb39f5c92..c972442b8 100644 --- a/transfer_test.go +++ b/transfer_test.go @@ -559,7 +559,6 @@ func createTestProject(t *testing.T) func() error { } if assert.NoError(t, accessManager.CreateProject(accessServices.ProjectParams{ProjectDetails: projectDetails})) { - waitForProjectInArtifactory(t, tests.ProjectKey) return func() error { return accessManager.DeleteProject(tests.ProjectKey) } From 88b08af93310da380250f0070f19850f73dd6414 Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 12:09:40 +0530 Subject: [PATCH 08/12] =?UTF-8?q?fix:=20stop=20delete+recreate=20of=20proj?= =?UTF-8?q?ect=20on=20re-runs=20=E2=80=94=20root=20cause=20of=20cache=20po?= =?UTF-8?q?isoning?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On gotestsum re-runs, the ProjectKey is the same as the initial run. createTestProject was calling deleteProjectIfExists before creating, which: 1. Deleted the project left by the failed previous attempt 2. Immediately recreated it with the SAME key This delete+recreate cycle poisons Artifactory's internal project cache with a 'not found' entry for that key. Some HA nodes take 160+ seconds (entire retry budget) to invalidate this negative cache entry, causing all project-scoped ops to fail indefinitely. Fix: remove the upfront deleteProjectIfExists. Project keys are unique per suite run (full Unix timestamp), so deletion is only needed at cleanup. If the project already exists on a re-run, reuse it silently ('already exists' is treated as success). Co-authored-by: Cursor --- transfer_test.go | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/transfer_test.go b/transfer_test.go index c972442b8..8032b06f4 100644 --- a/transfer_test.go +++ b/transfer_test.go @@ -540,10 +540,13 @@ func validateCsvConflicts(t *testing.T, csvPath string, projectsSupported bool) func createTestProject(t *testing.T) func() error { accessManager, err := rtUtils.CreateAccessServiceManager(serverDetails, false) assert.NoError(t, err) - // Delete the project if already exists - deleteProjectIfExists(t, accessManager, tests.ProjectKey) - // Create new project + // Do NOT delete the project before creating it. Project keys are unique per test-suite + // run (timestamp-based), so pre-emptive deletion only occurs on gotestsum re-runs when + // the previous attempt left the project behind. Deleting and immediately recreating the + // same key poisons Artifactory's internal project cache with a "not found" entry that + // takes minutes to expire on some HA nodes, causing all subsequent project-scoped + // operations to fail with 400 "Project was not found" even after retries. adminPrivileges := accessServices.AdminPrivileges{ ManageMembers: utils.Pointer(false), ManageResources: utils.Pointer(false), @@ -558,12 +561,14 @@ func createTestProject(t *testing.T) func() error { ProjectKey: tests.ProjectKey, } - if assert.NoError(t, accessManager.CreateProject(accessServices.ProjectParams{ProjectDetails: projectDetails})) { - return func() error { - return accessManager.DeleteProject(tests.ProjectKey) - } + createErr := accessManager.CreateProject(accessServices.ProjectParams{ProjectDetails: projectDetails}) + if createErr != nil && !strings.Contains(createErr.Error(), "already exists") { + assert.NoError(t, createErr) + return nil + } + return func() error { + return accessManager.DeleteProject(tests.ProjectKey) } - return nil } func updateProjectParams(t *testing.T, projectParams *accessServices.Project, targetAccessManager *access.AccessServicesManager) { From 7eca84a7bc4b3150a1b0ccfcb80da1efdf36a6fc Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 13:52:30 +0530 Subject: [PATCH 09/12] fix(lifecycle): add LC project visibility probe before release-bundle creation The Lifecycle service has its own project cache separate from Artifactory's and can take 5+ minutes to warm on HA nodes running draft Artifactory builds. Running the full 'jfrog rbc' command as retries exhausted the 2.5-min retry budget long before LC was ready. waitForLifecycleProjectVisibility() polls a cheap GET endpoint: lifecycle/api/v2/release_bundle/records/non-existing-rb?project= - 400 Bad Request = LC doesn't know the project yet (keep waiting) - 404 Not Found = LC knows the project (proceed) Polls every 15s with a 15-minute timeout. Called inside uploadBuildsWithProject so all callers (3 project tests) wait for LC readiness before any rbc attempt. Co-authored-by: Cursor --- lifecycle_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/lifecycle_test.go b/lifecycle_test.go index e56ed0543..681574e3b 100644 --- a/lifecycle_test.go +++ b/lifecycle_test.go @@ -670,6 +670,10 @@ func uploadBuildsWithProject(t *testing.T) func() { uploadBuildWithArtifactsAndProject(t, tests.UploadDevSpecA, tests.LcBuildName1, number1, tests.ProjectKey) uploadBuildWithArtifactsAndProject(t, tests.UploadDevSpecB, tests.LcBuildName2, number2, tests.ProjectKey) uploadBuildWithDepsAndProject(t, tests.LcBuildName3, number3, tests.ProjectKey) + // Wait for the Lifecycle service to register the project before any release-bundle + // creation attempts. LC has its own cache separate from Artifactory's; it can take + // several minutes to warm in HA deployments running Artifactory draft builds. + waitForLifecycleProjectVisibility(t, tests.ProjectKey) return func() { inttestutils.DeleteBuild(serverDetails.ArtifactoryUrl, tests.LcBuildName1, artHttpDetails) inttestutils.DeleteBuild(serverDetails.ArtifactoryUrl, tests.LcBuildName2, artHttpDetails) @@ -1771,6 +1775,33 @@ type KeyPairPayload struct { PrivateKey string `json:"privateKey,omitempty"` // #nosec G117 -- test struct, not a real secret } +// waitForLifecycleProjectVisibility polls the Lifecycle service until the project is visible +// to it. LC has a project cache separate from Artifactory's and can take many minutes to warm +// in HA deployments. Polling a cheap endpoint avoids wasting the retry budget on expensive +// full CLI commands before LC is actually ready. +func waitForLifecycleProjectVisibility(t *testing.T, projectKey string) { + const ( + timeout = 15 * time.Minute + pollInterval = 15 * time.Second + ) + client, err := httpclient.ClientBuilder().Build() + if !assert.NoError(t, err) { + return + } + // GET on a non-existent RB returns 400 when LC doesn't know the project yet, + // and 404 when it does (RB not found, but project is recognised). + probeURL := *tests.JfrogUrl + "lifecycle/api/v2/release_bundle/records/non-existing-rb?project=" + projectKey + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + resp, _, _, probErr := client.SendGet(probeURL, true, artHttpDetails, "") + if probErr == nil && resp.StatusCode != http.StatusBadRequest { + return + } + time.Sleep(pollInterval) + } + t.Logf("waitForLifecycleProjectVisibility: LC still not aware of project %q after %s; proceeding anyway", projectKey, timeout) +} + // retryOnProjectNotFound retries fn when Artifactory or Lifecycle reports that a project is not // yet visible — a transient condition caused by the Access→service cache propagation delay in HA // deployments. Up to maxRetries attempts are made with retryInterval between each attempt. From 0ad8a42bf4d065faaf34f39e73fdce58cdd84a38 Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 14:24:13 +0530 Subject: [PATCH 10/12] fix: increase project-not-found retries to 10 (5 min max) Observed Artifactory 7.158.0 draft cache propagation taking 120-150s on some instances. 5 retries x 30s = 2.5 min was not enough. 10 retries x 30s = 5 min covers the observed worst-case propagation delay. Co-authored-by: Cursor --- lifecycle_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lifecycle_test.go b/lifecycle_test.go index 681574e3b..a47dfac2c 100644 --- a/lifecycle_test.go +++ b/lifecycle_test.go @@ -1807,7 +1807,7 @@ func waitForLifecycleProjectVisibility(t *testing.T, projectKey string) { // deployments. Up to maxRetries attempts are made with retryInterval between each attempt. func retryOnProjectNotFound(fn func() error) error { const ( - maxRetries = 5 + maxRetries = 10 retryInterval = 30 * time.Second ) var err error From a7425407e6792ee165da4d53f09b00a12cb3e2de Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 20:08:24 +0530 Subject: [PATCH 11/12] fix: retry project-not-found on pnpm build-publish; reuse existing project Same Artifactory project cache propagation delay that affected lifecycle and artifactory tests also hits pnpm's TestPnpmInstallAndPublishWithProject: - build-publish (bp) with --project fails with 400 Project not found - panic at line 984 follows because publishedBuildInfo is nil Changes: - Wrap 'jfrog rt bp --project' with retryOnProjectNotFound (10x 30s) - Drop DeleteProject before CreateProject to avoid cache poisoning on re-runs (same fix applied to transfer_test.go earlier) Co-authored-by: Cursor --- pnpm_test.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pnpm_test.go b/pnpm_test.go index b471a894d..b50a41976 100644 --- a/pnpm_test.go +++ b/pnpm_test.go @@ -912,9 +912,9 @@ func TestPnpmInstallAndPublishWithProject(t *testing.T) { ProjectKey: tests.ProjectKey, }, } - // First delete if exists, ignoring errors since access might not support it - _ = accessManager.DeleteProject(tests.ProjectKey) - if err = accessManager.CreateProject(projectParams); err != nil { + // Create project — silently reuse if it already exists to avoid cache poisoning + // from delete+recreate on re-runs. + if err = accessManager.CreateProject(projectParams); err != nil && !strings.Contains(err.Error(), "already exists") { t.Skipf("Skipping project test - cannot create project: %v", err) } @@ -964,8 +964,10 @@ func TestPnpmInstallAndPublishWithProject(t *testing.T) { "--build-name="+buildName, "--build-number="+buildNumber, "--project="+tests.ProjectKey) - // Publish build info with --project flag - assert.NoError(t, artifactoryCli.Exec("bp", buildName, buildNumber, "--project="+tests.ProjectKey)) + // Publish build info with --project flag — retry on Artifactory project cache lag + assert.NoError(t, retryOnProjectNotFound(func() error { + return artifactoryCli.Exec("bp", buildName, buildNumber, "--project="+tests.ProjectKey) + })) // Restore working directory clientTestUtils.ChangeDirAndAssert(t, wd) From e08ca998e9cc15cbc37dd390bc9943b34cf00503 Mon Sep 17 00:00:00 2001 From: Naveen Kumar Date: Fri, 19 Jun 2026 20:10:59 +0530 Subject: [PATCH 12/12] fix: clean up comments in TestPnpmInstallAndPublishWithProject Co-authored-by: Cursor --- pnpm_test.go | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/pnpm_test.go b/pnpm_test.go index b50a41976..1019f88a9 100644 --- a/pnpm_test.go +++ b/pnpm_test.go @@ -898,22 +898,17 @@ func TestPnpmBuildPublishWithCIVcsProps(t *testing.T) { func TestPnpmInstallAndPublishWithProject(t *testing.T) { initPnpmTest(t) - // Create Access service manager and project before deferring cleanPnpmTest, - // so that t.Skipf doesn't trigger cleanup asserts that override the skip status. accessManager, err := utils.CreateAccessServiceManager(serverDetails, false) if err != nil { t.Skipf("Skipping project test - cannot create access manager: %v", err) } - // Try creating project first to verify access works before deferring any cleanup projectParams := accessServices.ProjectParams{ ProjectDetails: accessServices.Project{ DisplayName: "pnpm-project-test " + tests.ProjectKey, ProjectKey: tests.ProjectKey, }, } - // Create project — silently reuse if it already exists to avoid cache poisoning - // from delete+recreate on re-runs. if err = accessManager.CreateProject(projectParams); err != nil && !strings.Contains(err.Error(), "already exists") { t.Skipf("Skipping project test - cannot create project: %v", err) } @@ -925,7 +920,6 @@ func TestPnpmInstallAndPublishWithProject(t *testing.T) { _ = accessManager.DeleteProject(tests.ProjectKey) }() - // Assign npm repos to the project err = accessManager.AssignRepoToProject(tests.NpmRepo, tests.ProjectKey, true) assert.NoError(t, err) err = accessManager.AssignRepoToProject(tests.NpmRemoteRepo, tests.ProjectKey, true) @@ -941,38 +935,30 @@ func TestPnpmInstallAndPublishWithProject(t *testing.T) { buildName := tests.PnpmBuildName + "-project" buildNumber := "800" - // Clean old build inttestutils.DeleteBuild(serverDetails.ArtifactoryUrl, buildName, artHttpDetails) defer inttestutils.DeleteBuild(serverDetails.ArtifactoryUrl, buildName, artHttpDetails) - // Setup pnpm project pnpmProjectPath := createPnpmProject(t, "pnpmproject") projectDir := filepath.Dir(pnpmProjectPath) prepareArtifactoryForPnpmBuild(t, projectDir) - clientTestUtils.ChangeDirAndAssert(t, projectDir) - // Run pnpm install with --project flag runJfrogCli(t, "pnpm", "install", "--store-dir="+tempCacheDirPath, "--build-name="+buildName, "--build-number="+buildNumber, "--project="+tests.ProjectKey) - // Run pnpm publish with --project flag cleanupAuth := setupPnpmPublishAuth(t, tests.NpmRepo) defer cleanupAuth() runJfrogCli(t, "pnpm", "publish", "--no-git-checks", "--build-name="+buildName, "--build-number="+buildNumber, "--project="+tests.ProjectKey) - // Publish build info with --project flag — retry on Artifactory project cache lag assert.NoError(t, retryOnProjectNotFound(func() error { return artifactoryCli.Exec("bp", buildName, buildNumber, "--project="+tests.ProjectKey) })) - // Restore working directory clientTestUtils.ChangeDirAndAssert(t, wd) - // Get the published build info with project key servicesManager, err := utils.CreateServiceManager(serverDetails, -1, 0, false) assert.NoError(t, err) params := artServices.NewBuildInfoParams() @@ -984,7 +970,6 @@ func TestPnpmInstallAndPublishWithProject(t *testing.T) { assert.True(t, found, "Build info was not found for project %s", tests.ProjectKey) bi := publishedBuildInfo.BuildInfo - // pnpm install + publish on the same build should produce 1 module with both deps and artifacts if assert.NotEmpty(t, bi.Modules, "Build info should contain modules") { assert.NotEmpty(t, bi.Modules[0].Dependencies, "Module should have dependencies from pnpm install") assert.NotEmpty(t, bi.Modules[0].Artifacts, "Module should have artifacts from pnpm publish")