|
8 | 8 | "io" |
9 | 9 | "log/slog" |
10 | 10 | "os" |
| 11 | + "strconv" |
11 | 12 | "strings" |
12 | 13 |
|
13 | 14 | "github.com/entireio/cli/cmd/entire/cli/agent" |
@@ -135,6 +136,116 @@ func (s *V2GitStore) findFullSessionArtifacts(checkpointID id.CheckpointID, sess |
135 | 136 | return fullSessionArtifacts{}, nil |
136 | 137 | } |
137 | 138 |
|
| 139 | +// FullSessionArtifactsIndex answers "does this session have complete /full/* |
| 140 | +// artifacts?" with an O(1) map lookup. Build it once via |
| 141 | +// BuildFullSessionArtifactsIndex. |
| 142 | +type FullSessionArtifactsIndex map[string]struct{} |
| 143 | + |
| 144 | +// Has reports whether the given session has a complete pair of |
| 145 | +// raw_transcript and raw_transcript_hash.txt entries in some /full/* ref. |
| 146 | +func (idx FullSessionArtifactsIndex) Has(checkpointID id.CheckpointID, sessionIndex int) bool { |
| 147 | + if idx == nil { |
| 148 | + return false |
| 149 | + } |
| 150 | + _, ok := idx[fullArtifactsIndexKey(checkpointID, sessionIndex)] |
| 151 | + return ok |
| 152 | +} |
| 153 | + |
| 154 | +func fullArtifactsIndexKey(checkpointID id.CheckpointID, sessionIndex int) string { |
| 155 | + return string(checkpointID) + "/" + strconv.Itoa(sessionIndex) |
| 156 | +} |
| 157 | + |
| 158 | +// BuildFullSessionArtifactsIndex walks every /full/* ref's tree once and |
| 159 | +// records sessions whose subtree contains both raw_transcript[/.NNN] and |
| 160 | +// raw_transcript_hash.txt. Amortizes per-session HasFullSessionArtifacts |
| 161 | +// calls — each of which would otherwise list every git ref and re-walk every |
| 162 | +// /full/* tree — across the rest of the run. |
| 163 | +func (s *V2GitStore) BuildFullSessionArtifactsIndex() (FullSessionArtifactsIndex, error) { |
| 164 | + refNames, err := s.fullRefSearchOrder() |
| 165 | + if err != nil { |
| 166 | + return nil, err |
| 167 | + } |
| 168 | + |
| 169 | + index := make(FullSessionArtifactsIndex) |
| 170 | + for _, refName := range refNames { |
| 171 | + _, rootTreeHash, refErr := s.GetRefState(refName) |
| 172 | + if refErr != nil { |
| 173 | + if errors.Is(refErr, plumbing.ErrReferenceNotFound) { |
| 174 | + continue |
| 175 | + } |
| 176 | + return nil, fmt.Errorf("read %s: %w", refName, refErr) |
| 177 | + } |
| 178 | + rootTree, treeErr := s.repo.TreeObject(rootTreeHash) |
| 179 | + if treeErr != nil { |
| 180 | + return nil, fmt.Errorf("read %s root tree: %w", refName, treeErr) |
| 181 | + } |
| 182 | + keys, err := s.listFullSessionsInTree(rootTree) |
| 183 | + if err != nil { |
| 184 | + return nil, fmt.Errorf("walk %s: %w", refName, err) |
| 185 | + } |
| 186 | + for _, key := range keys { |
| 187 | + index[key] = struct{}{} |
| 188 | + } |
| 189 | + } |
| 190 | + return index, nil |
| 191 | +} |
| 192 | + |
| 193 | +func (s *V2GitStore) listFullSessionsInTree(rootTree *object.Tree) ([]string, error) { |
| 194 | + var keys []string |
| 195 | + for _, shardEntry := range rootTree.Entries { |
| 196 | + if shardEntry.Mode != filemode.Dir || len(shardEntry.Name) != 2 { |
| 197 | + continue |
| 198 | + } |
| 199 | + shardTree, err := s.repo.TreeObject(shardEntry.Hash) |
| 200 | + if err != nil { |
| 201 | + return nil, fmt.Errorf("read shard %s: %w", shardEntry.Name, err) |
| 202 | + } |
| 203 | + for _, cpEntry := range shardTree.Entries { |
| 204 | + if cpEntry.Mode != filemode.Dir { |
| 205 | + continue |
| 206 | + } |
| 207 | + cpTree, err := s.repo.TreeObject(cpEntry.Hash) |
| 208 | + if err != nil { |
| 209 | + return nil, fmt.Errorf("read checkpoint tree %s/%s: %w", shardEntry.Name, cpEntry.Name, err) |
| 210 | + } |
| 211 | + cpid := id.CheckpointID(shardEntry.Name + cpEntry.Name) |
| 212 | + for _, sessionEntry := range cpTree.Entries { |
| 213 | + if sessionEntry.Mode != filemode.Dir { |
| 214 | + continue |
| 215 | + } |
| 216 | + sessionIdx, atoiErr := strconv.Atoi(sessionEntry.Name) |
| 217 | + if atoiErr != nil { |
| 218 | + continue |
| 219 | + } |
| 220 | + sessionTree, err := s.repo.TreeObject(sessionEntry.Hash) |
| 221 | + if err != nil { |
| 222 | + return nil, fmt.Errorf("read session tree %s/%s/%d: %w", shardEntry.Name, cpEntry.Name, sessionIdx, err) |
| 223 | + } |
| 224 | + if !sessionHasCompleteFullArtifacts(sessionTree.Entries) { |
| 225 | + continue |
| 226 | + } |
| 227 | + keys = append(keys, fullArtifactsIndexKey(cpid, sessionIdx)) |
| 228 | + } |
| 229 | + } |
| 230 | + } |
| 231 | + return keys, nil |
| 232 | +} |
| 233 | + |
| 234 | +func sessionHasCompleteFullArtifacts(entries []object.TreeEntry) bool { |
| 235 | + hasTranscript := false |
| 236 | + hasHash := false |
| 237 | + for _, entry := range entries { |
| 238 | + switch { |
| 239 | + case entry.Name == paths.V2RawTranscriptFileName, |
| 240 | + strings.HasPrefix(entry.Name, paths.V2RawTranscriptFileName+"."): |
| 241 | + hasTranscript = true |
| 242 | + case entry.Name == paths.V2RawTranscriptHashFileName: |
| 243 | + hasHash = true |
| 244 | + } |
| 245 | + } |
| 246 | + return hasTranscript && hasHash |
| 247 | +} |
| 248 | + |
138 | 249 | func (s *V2GitStore) fullRefSearchOrder() ([]plumbing.ReferenceName, error) { |
139 | 250 | refNames := []plumbing.ReferenceName{plumbing.ReferenceName(paths.V2FullCurrentRefName)} |
140 | 251 |
|
@@ -861,57 +972,3 @@ func (s *V2GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoi |
861 | 972 | commitMsg := fmt.Sprintf("Update summary for checkpoint %s (session: %s)", checkpointID, metadata.SessionID) |
862 | 973 | return s.updateRef(ctx, refName, newTreeHash, parentHash, commitMsg, authorName, authorEmail) |
863 | 974 | } |
864 | | - |
865 | | -// CleanupV1TranscriptFiles removes legacy v1-named transcript files (full.jsonl, |
866 | | -// full.jsonl.*, content_hash.txt) from /full/current for a given checkpoint. |
867 | | -// Older CLI versions wrote these before the rename to raw_transcript. |
868 | | -// Returns nil if /full/current doesn't exist or no v1 files were found. |
869 | | -func (s *V2GitStore) CleanupV1TranscriptFiles(ctx context.Context, checkpointID id.CheckpointID, sessionCount int) error { |
870 | | - refName := plumbing.ReferenceName(paths.V2FullCurrentRefName) |
871 | | - parentHash, rootTreeHash, err := s.GetRefState(refName) |
872 | | - if err != nil { |
873 | | - if errors.Is(err, plumbing.ErrReferenceNotFound) { |
874 | | - return nil // /full/current doesn't exist yet — nothing to clean |
875 | | - } |
876 | | - return err |
877 | | - } |
878 | | - |
879 | | - checkpointPath := checkpointID.Path() |
880 | | - basePath := checkpointPath + "/" |
881 | | - |
882 | | - entries, err := s.gs.flattenCheckpointEntries(rootTreeHash, checkpointPath) |
883 | | - if err != nil { |
884 | | - return err |
885 | | - } |
886 | | - |
887 | | - changed := false |
888 | | - for sessionIdx := range sessionCount { |
889 | | - sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIdx) |
890 | | - v1TranscriptPath := sessionPath + paths.TranscriptFileName |
891 | | - v1HashPath := sessionPath + paths.ContentHashFileName |
892 | | - |
893 | | - for key := range entries { |
894 | | - switch { |
895 | | - case key == v1TranscriptPath, |
896 | | - strings.HasPrefix(key, v1TranscriptPath+"."), |
897 | | - key == v1HashPath: |
898 | | - delete(entries, key) |
899 | | - changed = true |
900 | | - } |
901 | | - } |
902 | | - } |
903 | | - |
904 | | - if !changed { |
905 | | - return nil |
906 | | - } |
907 | | - |
908 | | - newTreeHash, err := s.gs.spliceCheckpointSubtree(ctx, rootTreeHash, checkpointID, basePath, entries) |
909 | | - if err != nil { |
910 | | - return fmt.Errorf("tree surgery failed: %w", err) |
911 | | - } |
912 | | - |
913 | | - authorName, authorEmail := GetGitAuthorFromRepo(s.repo) |
914 | | - return s.updateRef(ctx, refName, newTreeHash, parentHash, |
915 | | - fmt.Sprintf("Clean up v1 transcript files for %s\n", checkpointID), |
916 | | - authorName, authorEmail) |
917 | | -} |
|
0 commit comments