Skip to content

Commit 3577700

Browse files
committed
gopls/internal/lsp/cache: used time-based eviction in parseCache
Our arbitrary choice of caching 200 recent files has proven to be problematic when editing very large packages (golang/go#61207). Fix this by switching to time-based eviction, but with a minimum cache size to optimize the case where editing is resumed after a while. This caused an increase in high-water mark during initial workspace load, but that is mitigated by avoiding the parse cache when type checking for import (i.e. non-workspace files): such parsed files are almost never cache hits as they are only ever parsed once, and would instead benefit more from avoiding ParseComments. Move the ownership of the parseCache to the cache.Session (and pass it to each View) to make its lifecycle clearer and avoid passing it around to each snapshot. For golang/go#61207 Change-Id: I357d8b1fa36eabb516dbb7147266df0e5153ac11 Reviewed-on: https://go-review.googlesource.com/c/tools/+/511337 TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Alan Donovan <[email protected]> Run-TryBot: Robert Findley <[email protected]> gopls-CI: kokoro <[email protected]>
1 parent 36f607b commit 3577700

10 files changed

+204
-64
lines changed

gopls/internal/lsp/cache/cache.go

+2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"reflect"
1010
"strconv"
1111
"sync/atomic"
12+
"time"
1213

1314
"golang.org/x/tools/gopls/internal/lsp/source"
1415
"golang.org/x/tools/internal/event"
@@ -67,6 +68,7 @@ func NewSession(ctx context.Context, c *Cache, optionsOverrides func(*source.Opt
6768
gocmdRunner: &gocommand.Runner{},
6869
options: options,
6970
overlayFS: newOverlayFS(c),
71+
parseCache: newParseCache(1 * time.Minute), // keep recently parsed files for a minute, to optimize typing CPU
7072
}
7173
event.Log(ctx, "New session", KeyCreateSession.Of(s))
7274
return s

gopls/internal/lsp/cache/check.go

+27-4
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ func (s *snapshot) forEachPackage(ctx context.Context, ids []PackageID, pre preT
356356
// If a non-nil importGraph is provided, imports in this graph will be reused.
357357
func (s *snapshot) forEachPackageInternal(ctx context.Context, importGraph *importGraph, importIDs, syntaxIDs []PackageID, pre preTypeCheck, post postTypeCheck, handles map[PackageID]*packageHandle) (*typeCheckBatch, error) {
358358
b := &typeCheckBatch{
359-
parseCache: s.parseCache,
359+
parseCache: s.view.parseCache,
360360
pre: pre,
361361
post: post,
362362
handles: handles,
@@ -593,9 +593,32 @@ func (b *typeCheckBatch) checkPackageForImport(ctx context.Context, ph *packageH
593593
}
594594
cfg := b.typesConfig(ctx, ph.localInputs, onError)
595595
cfg.IgnoreFuncBodies = true
596-
pgfs, err := b.parseCache.parseFiles(ctx, b.fset, source.ParseFull, ph.localInputs.compiledGoFiles...)
597-
if err != nil {
598-
return nil, err
596+
597+
// Parse the compiled go files, bypassing the parse cache as packages checked
598+
// for import are unlikely to get cache hits. Additionally, we can optimize
599+
// parsing slightly by not passing parser.ParseComments.
600+
pgfs := make([]*source.ParsedGoFile, len(ph.localInputs.compiledGoFiles))
601+
{
602+
var group errgroup.Group
603+
// Set an arbitrary concurrency limit; we want some parallelism but don't
604+
// need GOMAXPROCS, as there is already a lot of concurrency among calls to
605+
// checkPackageForImport.
606+
//
607+
// TODO(rfindley): is there a better way to limit parallelism here? We could
608+
// have a global limit on the type-check batch, but would have to be very
609+
// careful to avoid starvation.
610+
group.SetLimit(4)
611+
for i, fh := range ph.localInputs.compiledGoFiles {
612+
i, fh := i, fh
613+
group.Go(func() error {
614+
pgf, err := parseGoImpl(ctx, b.fset, fh, parser.SkipObjectResolution)
615+
pgfs[i] = pgf
616+
return err
617+
})
618+
}
619+
if err := group.Wait(); err != nil {
620+
return nil, err // cancelled, or catastrophic error (e.g. missing file)
621+
}
599622
}
600623
pkg := types.NewPackage(string(ph.localInputs.pkgPath), string(ph.localInputs.name))
601624
check := types.NewChecker(cfg, b.fset, pkg, nil)

gopls/internal/lsp/cache/mod_tidy.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ func missingModuleForImport(pgf *source.ParsedGoFile, imp *ast.ImportSpec, req *
486486
//
487487
// TODO(rfindley): this should key off source.ImportPath.
488488
func parseImports(ctx context.Context, s *snapshot, files []source.FileHandle) (map[string]bool, error) {
489-
pgfs, err := s.parseCache.parseFiles(ctx, token.NewFileSet(), source.ParseHeader, files...)
489+
pgfs, err := s.view.parseCache.parseFiles(ctx, token.NewFileSet(), source.ParseHeader, files...)
490490
if err != nil { // e.g. context cancellation
491491
return nil, err
492492
}

gopls/internal/lsp/cache/parse.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import (
2727
// ParseGo parses the file whose contents are provided by fh, using a cache.
2828
// The resulting tree may have beeen fixed up.
2929
func (s *snapshot) ParseGo(ctx context.Context, fh source.FileHandle, mode parser.Mode) (*source.ParsedGoFile, error) {
30-
pgfs, err := s.parseCache.parseFiles(ctx, token.NewFileSet(), mode, fh)
30+
pgfs, err := s.view.parseCache.parseFiles(ctx, token.NewFileSet(), mode, fh)
3131
if err != nil {
3232
return nil, err
3333
}

gopls/internal/lsp/cache/parse_cache.go

+96-40
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,22 @@ import (
1414
"math/bits"
1515
"runtime"
1616
"sync"
17+
"time"
1718

1819
"golang.org/x/sync/errgroup"
1920
"golang.org/x/tools/gopls/internal/lsp/source"
21+
"golang.org/x/tools/gopls/internal/span"
2022
"golang.org/x/tools/internal/memoize"
2123
"golang.org/x/tools/internal/tokeninternal"
2224
)
2325

26+
// This file contains an implementation of an LRU parse cache, that offsets the
27+
// base token.Pos value of each cached file so that they may be later described
28+
// by a single dedicated FileSet.
29+
//
30+
// This is achieved by tracking a monotonic offset in the token.Pos space, that
31+
// is incremented before parsing allow room for the resulting parsed file.
32+
2433
// reservedForParsing defines the room in the token.Pos space reserved for
2534
// cached parsed files.
2635
//
@@ -58,21 +67,11 @@ func fileSetWithBase(base int) *token.FileSet {
5867
return fset
5968
}
6069

61-
// This file contains an implementation of a bounded-size parse cache, that
62-
// offsets the base token.Pos value of each cached file so that they may be
63-
// later described by a single dedicated FileSet.
64-
//
65-
// This is achieved by tracking a monotonic offset in the token.Pos space, that
66-
// is incremented before parsing allow room for the resulting parsed file.
67-
68-
// Keep 200 recently parsed files, based on the following rationale:
69-
// - One of the most important benefits of caching is avoiding re-parsing
70-
// everything in a package when working on a single file. No packages in
71-
// Kubernetes have > 200 files (only one has > 100).
72-
// - Experience has shown that ~1000 parsed files can use noticeable space.
73-
// 200 feels like a sweet spot between limiting cache size and optimizing
74-
// cache hits for low-latency operations.
75-
const parseCacheMaxFiles = 200
70+
const (
71+
// Always keep 100 recent files, independent of their wall-clock age, to
72+
// optimize the case where the user resumes editing after a delay.
73+
parseCacheMinFiles = 100
74+
)
7675

7776
// parsePadding is additional padding allocated to allow for increases in
7877
// length (such as appending missing braces) caused by fixAST.
@@ -89,31 +88,55 @@ const parseCacheMaxFiles = 200
8988
// This value is mutable for testing, so that we can exercise the slow path.
9089
var parsePadding = 1000 // mutable for testing
9190

92-
// A parseCache holds a bounded number of recently accessed parsed Go files. As
93-
// new files are stored, older files may be evicted from the cache.
91+
// A parseCache holds recently accessed parsed Go files. After new files are
92+
// stored, older files may be evicted from the cache via garbage collection.
9493
//
9594
// The parseCache.parseFiles method exposes a batch API for parsing (and
9695
// caching) multiple files. This is necessary for type-checking, where files
9796
// must be parsed in a common fileset.
9897
type parseCache struct {
98+
maxAge time.Duration // interval at which to collect expired cache entries
99+
done chan struct{} // closed when GC is stopped
100+
99101
mu sync.Mutex
100102
m map[parseKey]*parseCacheEntry
101103
lru queue // min-atime priority queue of *parseCacheEntry
102104
clock uint64 // clock time, incremented when the cache is updated
103105
nextBase int // base offset for the next parsed file
104106
}
105107

108+
// newParseCache creates a new parse cache and starts a goroutine to garbage
109+
// collect old entries that are older than maxAge.
110+
//
111+
// Callers must call parseCache.stop when the parse cache is no longer in use.
112+
func newParseCache(maxAge time.Duration) *parseCache {
113+
c := &parseCache{
114+
maxAge: maxAge,
115+
m: make(map[parseKey]*parseCacheEntry),
116+
done: make(chan struct{}),
117+
}
118+
go c.gc()
119+
return c
120+
}
121+
122+
// stop causes the GC goroutine to exit.
123+
func (c *parseCache) stop() {
124+
close(c.done)
125+
}
126+
106127
// parseKey uniquely identifies a parsed Go file.
107128
type parseKey struct {
108-
file source.FileIdentity
129+
uri span.URI
109130
mode parser.Mode
110131
}
111132

112133
type parseCacheEntry struct {
113134
key parseKey
135+
hash source.Hash
114136
promise *memoize.Promise // memoize.Promise[*source.ParsedGoFile]
115-
atime uint64 // clock time of last access
116-
lruIndex int
137+
atime uint64 // clock time of last access, for use in LRU sorting
138+
walltime time.Time // actual time of last access, for use in time-based eviction; too coarse for LRU on some systems
139+
lruIndex int // owned by the queue implementation
117140
}
118141

119142
// startParse prepares a parsing pass, creating new promises in the cache for
@@ -131,6 +154,7 @@ func (c *parseCache) startParse(mode parser.Mode, fhs ...source.FileHandle) ([]*
131154
//
132155
// All entries parsed from a single call get the same access time.
133156
c.clock++
157+
walltime := time.Now()
134158

135159
// Read file data and collect cacheable files.
136160
var (
@@ -149,15 +173,22 @@ func (c *parseCache) startParse(mode parser.Mode, fhs ...source.FileHandle) ([]*
149173
data[i] = content
150174

151175
key := parseKey{
152-
file: fh.FileIdentity(),
176+
uri: fh.URI(),
153177
mode: mode,
154178
}
155179

156-
if e, ok := c.m[key]; ok { // cache hit
157-
e.atime = c.clock
158-
heap.Fix(&c.lru, e.lruIndex)
159-
promises[i] = e.promise
160-
continue
180+
if e, ok := c.m[key]; ok {
181+
if e.hash == fh.FileIdentity().Hash { // cache hit
182+
e.atime = c.clock
183+
e.walltime = walltime
184+
heap.Fix(&c.lru, e.lruIndex)
185+
promises[i] = e.promise
186+
continue
187+
} else {
188+
// A cache hit, for a different version. Delete it.
189+
delete(c.m, e.key)
190+
heap.Remove(&c.lru, e.lruIndex)
191+
}
161192
}
162193

163194
uri := fh.URI()
@@ -200,21 +231,14 @@ func (c *parseCache) startParse(mode parser.Mode, fhs ...source.FileHandle) ([]*
200231
})
201232
promises[i] = promise
202233

203-
var e *parseCacheEntry
204-
if len(c.lru) < parseCacheMaxFiles {
205-
// add new entry
206-
e = new(parseCacheEntry)
207-
if c.m == nil {
208-
c.m = make(map[parseKey]*parseCacheEntry)
209-
}
210-
} else {
211-
// evict oldest entry
212-
e = heap.Pop(&c.lru).(*parseCacheEntry)
213-
delete(c.m, e.key)
234+
// add new entry; entries are gc'ed asynchronously
235+
e := &parseCacheEntry{
236+
key: key,
237+
hash: fh.FileIdentity().Hash,
238+
promise: promise,
239+
atime: c.clock,
240+
walltime: walltime,
214241
}
215-
e.key = key
216-
e.promise = promise
217-
e.atime = c.clock
218242
c.m[e.key] = e
219243
heap.Push(&c.lru, e)
220244
}
@@ -226,6 +250,38 @@ func (c *parseCache) startParse(mode parser.Mode, fhs ...source.FileHandle) ([]*
226250
return promises, firstReadError
227251
}
228252

253+
func (c *parseCache) gc() {
254+
const period = 10 * time.Second // gc period
255+
timer := time.NewTimer(period)
256+
defer timer.Stop()
257+
258+
for {
259+
select {
260+
case <-c.done:
261+
return
262+
case <-timer.C:
263+
}
264+
265+
c.gcOnce()
266+
}
267+
}
268+
269+
func (c *parseCache) gcOnce() {
270+
now := time.Now()
271+
c.mu.Lock()
272+
defer c.mu.Unlock()
273+
274+
for len(c.m) > parseCacheMinFiles {
275+
e := heap.Pop(&c.lru).(*parseCacheEntry)
276+
if now.Sub(e.walltime) > c.maxAge {
277+
delete(c.m, e.key)
278+
} else {
279+
heap.Push(&c.lru, e)
280+
break
281+
}
282+
}
283+
}
284+
229285
// allocateSpace reserves the next n bytes of token.Pos space in the
230286
// cache.
231287
//

0 commit comments

Comments
 (0)