Skip to content

Commit 007d981

Browse files
Handle publish divergence recovery and guidance
Co-authored-by: Andrew <andrewxhill@gmail.com>
1 parent af92988 commit 007d981

16 files changed

Lines changed: 396 additions & 25 deletions

.agents/skills/worktree/SKILL.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,17 @@ id instead of polling by branch name. Follow the submission, not the logs.
177177
Do not use sleeps, branch-name polling, `mq logs`, `mq events`, or `mq watch` as the
178178
primary way to decide whether a queued change finished.
179179

180+
If a publish request fails because remote `main` moved first, prefer retrying
181+
the publish request from the protected worktree:
182+
183+
```bash
184+
mq retry --repo ~/Projects/recallnet/mainline --publish <id>
185+
```
186+
187+
`mq` now attempts the safe case automatically by fetching upstream, rebasing the
188+
unpublished protected-branch commits onto the updated remote tip when that
189+
replay is clean, and retrying the push.
190+
180191
If the branch is behind local protected `main` or a submission blocks on a
181192
rebase conflict, use `mq rebase` as the default repair path:
182193

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,17 @@ If the wrapper expects remote landing as part of the same job, prefer:
108108
mq land --json --timeout 30m
109109
```
110110

111+
If publish loses a race because remote `main` moved first, rerun the failed
112+
publish request instead of hand-repairing protected `main`:
113+
114+
```bash
115+
mq retry --repo /path/to/protected-worktree --publish 4
116+
```
117+
118+
When the unpublished protected-branch commits can be replayed cleanly, `mq`
119+
fetches upstream, rebases the protected branch onto the updated remote tip, and
120+
retries the push automatically.
121+
111122
If a submission blocks because its topic branch is behind local protected
112123
`main`, use `mq rebase` instead of hand-rolling the Git repair:
113124

@@ -383,6 +394,7 @@ mq watch --repo /path/to/repo-root
383394
mq events --repo /path/to/repo-root --follow --json --lifecycle
384395
mq registry prune --json
385396
mq retry --repo /path/to/repo-root --submission 17
397+
mq retry --repo /path/to/protected-worktree --publish 4
386398
mq cancel --repo /path/to/repo-root --publish 4
387399
```
388400

SPEC.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,17 @@ mq wait --submission <id> --for landed --json --timeout 30m
116116
mq land --json --timeout 30m
117117
```
118118

119+
If a publish request fails because remote `main` advanced first, the supported
120+
operator recovery path is:
121+
122+
```bash
123+
mq retry --repo /path/to/protected-worktree --publish <id>
124+
```
125+
126+
Before retrying the push, `mq` may fetch upstream and rebase the protected
127+
branch onto the updated upstream tip when unpublished local commits replay
128+
cleanly.
129+
119130
### Machine-Wide Daemon Flow
120131

121132
```bash

docs/FLOWS.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,16 @@ mq submit --allow-newer-head --wait --timeout 10m --json
211211
That only permits forward movement. If the queued branch rewinds or moves to a
212212
non-descendant tip, `mq` still fails the submission and asks for a resubmit.
213213

214+
If a publish request fails because remote `main` advanced first, prefer:
215+
216+
```bash
217+
mq retry --repo /path/to/protected-worktree --publish 4
218+
```
219+
220+
That lets `mq` fetch upstream, replay unpublished protected-branch commits onto
221+
the updated remote tip when the rebase is clean, and retry the push without
222+
manual protected-worktree repair.
223+
214224
Exit codes:
215225

216226
- `0`: integrated

docs/JSON_CONTRACTS.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ submission record with optional blocked-state diagnostics:
156156
- `publish_request_id`
157157
- `publish_status`
158158
- `outcome`
159+
- `publish_failure_cause`
160+
- `publish_failure_summary`
161+
- `publish_failure_error`
159162
- `queue_position`
160163
- `estimated_completion_ms`
161164
- `estimate_basis`
@@ -321,6 +324,10 @@ Optional fields:
321324
- `protected_sha`
322325
- `publish_request_id`
323326
- `publish_status`
327+
- `publish_failure_cause`
328+
- `publish_failure_summary`
329+
- `publish_failure_error`
330+
- `retry_hint`
324331
- `last_worker_result`
325332
- `error`
326333

internal/app/app_test.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"database/sql"
77
"encoding/json"
88
"errors"
9+
"fmt"
910
"os"
1011
"path/filepath"
1112
"slices"
@@ -911,6 +912,104 @@ func TestStatusJSONCorrelatesSubmissionToPublish(t *testing.T) {
911912
}
912913
}
913914

915+
func TestStatusAndWaitSurfaceRetryCommandForRejectedPublishAfterRemoteAdvance(t *testing.T) {
916+
repoRoot, remoteDir := createTestRepoWithRemote(t)
917+
initRepoForWorker(t, repoRoot)
918+
runTestCommand(t, repoRoot, "git", "push", "origin", "main")
919+
updatePublishMode(t, repoRoot, "auto")
920+
921+
featurePath := filepath.Join(t.TempDir(), "feature-status-publish-reject")
922+
runTestCommand(t, repoRoot, "git", "worktree", "add", "-b", "feature/status-publish-reject", featurePath)
923+
writeFileAndCommit(t, featurePath, "status.txt", "status\n", "status feature")
924+
submitBranch(t, featurePath)
925+
runOnce(t, repoRoot)
926+
927+
layout, err := git.DiscoverRepositoryLayout(repoRoot)
928+
if err != nil {
929+
t.Fatalf("DiscoverRepositoryLayout: %v", err)
930+
}
931+
store := state.NewStore(state.DefaultPath(layout.GitDir))
932+
repoRecord, err := store.GetRepositoryByPath(context.Background(), layout.RepositoryRoot)
933+
if err != nil {
934+
t.Fatalf("GetRepositoryByPath: %v", err)
935+
}
936+
submissions, err := store.ListIntegrationSubmissions(context.Background(), repoRecord.ID)
937+
if err != nil {
938+
t.Fatalf("ListIntegrationSubmissions: %v", err)
939+
}
940+
requests, err := store.ListPublishRequests(context.Background(), repoRecord.ID)
941+
if err != nil {
942+
t.Fatalf("ListPublishRequests: %v", err)
943+
}
944+
if len(submissions) != 1 || len(requests) != 1 {
945+
t.Fatalf("expected 1 submission and 1 publish request, got %+v %+v", submissions, requests)
946+
}
947+
if _, err := store.UpdatePublishRequestStatus(context.Background(), requests[0].ID, domain.PublishStatusFailed, sql.NullInt64{}); err != nil {
948+
t.Fatalf("UpdatePublishRequestStatus: %v", err)
949+
}
950+
if err := appendStateEvent(context.Background(), store, state.EventRecord{
951+
RepoID: repoRecord.ID,
952+
ItemType: domain.ItemTypePublishRequest,
953+
ItemID: state.NullInt64(requests[0].ID),
954+
EventType: domain.EventTypePublishFailed,
955+
Payload: mustJSON(map[string]string{
956+
"target_sha": requests[0].TargetSHA,
957+
"error": "git push was rejected: To github.com:recallnet/tradecore.git\n ! [rejected] main -> main (fetch first)\nerror: failed to push some refs to 'github.com:recallnet/tradecore.git'",
958+
"kind": publishFailureKindGitPushFailed,
959+
"stage": publishStagePush,
960+
}),
961+
}); err != nil {
962+
t.Fatalf("appendStateEvent: %v", err)
963+
}
964+
965+
upstreamClone := filepath.Join(t.TempDir(), "upstream-clone")
966+
runTestCommand(t, t.TempDir(), "git", "clone", remoteDir, upstreamClone)
967+
runTestCommand(t, upstreamClone, "git", "config", "user.name", "Test User")
968+
runTestCommand(t, upstreamClone, "git", "config", "user.email", "test@example.com")
969+
runTestCommand(t, upstreamClone, "git", "config", "core.hooksPath", ".git/hooks")
970+
writeFileAndCommit(t, upstreamClone, "upstream.txt", "upstream\n", "upstream advance")
971+
runTestCommand(t, upstreamClone, "git", "push", "origin", "main")
972+
runTestCommand(t, repoRoot, "git", "fetch", "origin", "main")
973+
974+
var waitOut bytes.Buffer
975+
var waitErr bytes.Buffer
976+
err = runCLI([]string{"wait", "--repo", repoRoot, "--submission", strconv.FormatInt(submissions[0].ID, 10), "--for", "landed", "--json", "--timeout", "1s"}, newStepPrinter(&waitOut), &waitErr)
977+
if err == nil {
978+
t.Fatalf("expected landed wait to fail")
979+
}
980+
981+
var waitResult submissionWaitResult
982+
if err := json.Unmarshal(waitOut.Bytes(), &waitResult); err != nil {
983+
t.Fatalf("Unmarshal wait: %v", err)
984+
}
985+
if waitResult.RetryHint != "retry-publish-after-protected-reconcile" {
986+
t.Fatalf("expected protected reconcile retry hint, got %+v", waitResult)
987+
}
988+
if !strings.Contains(waitResult.PublishFailureSummary, "mq retry --repo") || !strings.Contains(waitResult.PublishFailureSummary, fmt.Sprintf("--publish %d", requests[0].ID)) {
989+
t.Fatalf("expected retry command in wait summary, got %+v", waitResult)
990+
}
991+
992+
var statusOut bytes.Buffer
993+
var statusErr bytes.Buffer
994+
if err := runCLI([]string{"status", "--repo", repoRoot, "--json"}, newStepPrinter(&statusOut), &statusErr); err != nil {
995+
t.Fatalf("runCLI status returned error: %v", err)
996+
}
997+
998+
var statusResult statusResult
999+
if err := json.Unmarshal(statusOut.Bytes(), &statusResult); err != nil {
1000+
t.Fatalf("Unmarshal status: %v", err)
1001+
}
1002+
if statusResult.LatestSubmission == nil {
1003+
t.Fatalf("expected latest submission, got %+v", statusResult)
1004+
}
1005+
if !strings.Contains(statusResult.LatestSubmission.PublishFailureSummary, "mq retry --repo") || !strings.Contains(statusResult.LatestSubmission.PublishFailureSummary, fmt.Sprintf("--publish %d", requests[0].ID)) {
1006+
t.Fatalf("expected retry command in status summary, got %+v", statusResult.LatestSubmission)
1007+
}
1008+
if len(statusResult.LatestSubmission.NextActions) == 0 || !strings.Contains(statusResult.LatestSubmission.NextActions[0].Command, "mq retry --repo") || !strings.Contains(statusResult.LatestSubmission.NextActions[0].Command, fmt.Sprintf("--publish %d", requests[0].ID)) {
1009+
t.Fatalf("expected retry next action, got %+v", statusResult.LatestSubmission.NextActions)
1010+
}
1011+
}
1012+
9141013
func TestStatusJSONReportsPublishExecutionAndProtectedWorktreeActivity(t *testing.T) {
9151014
repoRoot, _ := createTestRepoWithRemote(t)
9161015
initRepoForWorker(t, repoRoot)

internal/app/execution_estimate.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func collectExecutionEstimate(ctx context.Context, store state.Store, repoID int
4444
integrationDurations = append(integrationDurations, succeededAt.Sub(startedAt).Milliseconds())
4545
}
4646

47-
info, err := resolveSubmissionPublishInfo(ctx, store, repoID, submission, mainEngine)
47+
info, err := resolveSubmissionPublishInfo(ctx, store, repoID, submission, mainEngine, cfg.Repo.ProtectedBranch)
4848
if err != nil {
4949
return executionEstimate{}, err
5050
}

internal/app/land.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,13 @@ func waitForLandedPublish(queued queuedSubmission, timeout time.Duration, pollIn
255255
result.Published = true
256256
return result, nil
257257
} else if result.PublishStatus == "failed" || result.PublishStatus == "cancelled" || result.PublishStatus == "superseded" {
258+
if result.PublishStatus == "failed" {
259+
info, infoErr := resolvePublishFailureInfo(ctx, queued.Store, queued.RepoRecord.ID, result.PublishRequestID, mainEngine, queued.Config.Repo.ProtectedBranch)
260+
if infoErr == nil && info.Summary != "" {
261+
result.Error = fmt.Sprintf("publish request %d failed: %s", result.PublishRequestID, info.Summary)
262+
return result, fmt.Errorf("publish request %d failed: %s", result.PublishRequestID, info.Summary)
263+
}
264+
}
258265
result.Error = fmt.Sprintf("publish request %d %s", result.PublishRequestID, result.PublishStatus)
259266
return result, fmt.Errorf("publish request %d %s", result.PublishRequestID, result.PublishStatus)
260267
}

internal/app/protected_root_recovery.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ func ensureProtectedRootHealthy(ctx context.Context, engine git.Engine, lockMana
3838
return report, nil
3939
}
4040

41-
repaired, _, repairErr := tryRepairCanonicalProtectedRootWithMode(ctx, engine, cfg, store, repoRecord, mode)
41+
repaired, repairNote, repairErr := tryRepairCanonicalProtectedRootWithMode(ctx, engine, cfg, store, repoRecord, mode)
4242
if repairErr != nil {
4343
return git.HealthReport{}, repairErr
4444
}
@@ -60,6 +60,9 @@ func ensureProtectedRootHealthy(ctx context.Context, engine git.Engine, lockMana
6060
return git.HealthReport{}, protectedWorktreeDirtyError(cfg.Repo.MainWorktree, report.ProtectedDirtyPaths, activity)
6161
}
6262
if report.HasDivergedUpstream {
63+
if repairNote != "" {
64+
return git.HealthReport{}, fmt.Errorf("%s", repairNote)
65+
}
6366
return git.HealthReport{}, fmt.Errorf("protected branch %q has diverged from upstream %s", cfg.Repo.ProtectedBranch, report.UpstreamRef)
6467
}
6568
return report, nil

internal/app/publish_test.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,69 @@ func TestDrainRepoUntilSettledSleepsThroughDelayedPublishRetry(t *testing.T) {
740740
}
741741
}
742742

743+
func TestRetryPublishAutoRebasesProtectedBranchAfterRemoteAdvance(t *testing.T) {
744+
repoRoot, remoteDir := createTestRepoWithRemote(t)
745+
initRepoForWorker(t, repoRoot)
746+
runTestCommand(t, repoRoot, "git", "push", "origin", "main")
747+
t.Setenv("MAINLINE_DISABLE_MUTATION_DRAIN", "1")
748+
749+
writeFileAndCommit(t, repoRoot, "local.txt", "local\n", "main change local")
750+
queuePublish(t, repoRoot)
751+
t.Setenv("MAINLINE_DISABLE_MUTATION_DRAIN", "")
752+
753+
layout, err := git.DiscoverRepositoryLayout(repoRoot)
754+
if err != nil {
755+
t.Fatalf("DiscoverRepositoryLayout: %v", err)
756+
}
757+
store := state.NewStore(state.DefaultPath(layout.GitDir))
758+
repoRecord, err := store.GetRepositoryByPath(context.Background(), layout.RepositoryRoot)
759+
if err != nil {
760+
t.Fatalf("GetRepositoryByPath: %v", err)
761+
}
762+
requests, err := store.ListPublishRequests(context.Background(), repoRecord.ID)
763+
if err != nil {
764+
t.Fatalf("ListPublishRequests: %v", err)
765+
}
766+
if len(requests) != 1 {
767+
t.Fatalf("expected 1 publish request, got %d", len(requests))
768+
}
769+
if _, err := store.UpdatePublishRequestStatus(context.Background(), requests[0].ID, domain.PublishStatusFailed, requests[0].SupersededBy); err != nil {
770+
t.Fatalf("UpdatePublishRequestStatus: %v", err)
771+
}
772+
773+
upstreamClone := filepath.Join(t.TempDir(), "upstream-clone")
774+
runTestCommand(t, t.TempDir(), "git", "clone", remoteDir, upstreamClone)
775+
runTestCommand(t, upstreamClone, "git", "config", "user.name", "Test User")
776+
runTestCommand(t, upstreamClone, "git", "config", "user.email", "test@example.com")
777+
runTestCommand(t, upstreamClone, "git", "config", "core.hooksPath", ".git/hooks")
778+
writeFileAndCommit(t, upstreamClone, "upstream.txt", "upstream\n", "upstream advance")
779+
upstreamHead := trimNewline(runTestCommand(t, upstreamClone, "git", "rev-parse", "HEAD"))
780+
runTestCommand(t, upstreamClone, "git", "push", "origin", "main")
781+
782+
var retryOut bytes.Buffer
783+
var retryErr bytes.Buffer
784+
if err := runRetry([]string{"--repo", repoRoot, "--publish", strconv.FormatInt(requests[0].ID, 10)}, newStepPrinter(&retryOut), &retryErr); err != nil {
785+
t.Fatalf("runRetry returned error: %v", err)
786+
}
787+
788+
localHead := trimNewline(runTestCommand(t, repoRoot, "git", "rev-parse", "HEAD"))
789+
remoteHead := trimNewline(runTestCommand(t, remoteDir, "git", "rev-parse", "refs/heads/main"))
790+
if remoteHead != localHead {
791+
t.Fatalf("expected remote head %q, got %q", localHead, remoteHead)
792+
}
793+
parent := trimNewline(runTestCommand(t, repoRoot, "git", "rev-parse", "HEAD^"))
794+
if parent != upstreamHead {
795+
t.Fatalf("expected rebased protected branch to sit on upstream head %q, got %q", upstreamHead, parent)
796+
}
797+
refreshed, err := store.GetPublishRequest(context.Background(), requests[0].ID)
798+
if err != nil {
799+
t.Fatalf("GetPublishRequest: %v", err)
800+
}
801+
if refreshed.Status != domain.PublishStatusSucceeded {
802+
t.Fatalf("expected publish request succeeded after retry, got %+v", refreshed)
803+
}
804+
}
805+
743806
func TestIsTransientPublishErrorRecognizesGitHTTPStatusShape(t *testing.T) {
744807
err := errors.New("fatal: unable to access 'https://github.com/acme/repo.git/': The requested URL returned error: 503")
745808
if !isTransientPublishError(err) {

0 commit comments

Comments
 (0)