Skip to content

Commit 3e07c35

Browse files
committed
Reduce redundant file parsing by deduplicating concurrent parses
1 parent 0548de7 commit 3e07c35

4 files changed

Lines changed: 49 additions & 29 deletions

File tree

docs/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
4747
- **Replace FQCN with import.** Now replaces all occurrences of the same FQCN throughout the file in one action, not just the one under the cursor. A new "Replace all FQCNs with imports" action appears when the file contains multiple distinct FQCNs, replacing all of them at once (skipping those with import conflicts).
4848
- **LSP responsiveness.** Hover, go-to-definition, signature help, code actions, rename, and other handlers now run on background threads. Slow requests no longer block other requests or cancellations.
4949
- **Faster analysis.** Analysis time cut significantly on large projects.
50+
- **Reduced redundant file parsing.** Concurrent threads resolving the same vendor class no longer parse the file in parallel; the second thread waits for the first to finish.
5051
- **Editing responsiveness.** Classes evicted from the cache after a file edit are now eagerly re-populated in dependency order.
5152
- **Diagnostic delivery model.** Editors that support pull diagnostics now get diagnostics on first file open without waiting for a debounce timer. Updates from external tools no longer re-run the entire native diagnostic pipeline.
5253
- **Virtual member resolution.** Mixins and virtual accessors are now resolved completely on every class, eliminating cases where they were missing after edits.

docs/todo/performance.md

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -220,27 +220,6 @@ path).
220220

221221
---
222222

223-
## P10. Redundant `parse_and_cache_file` from multiple threads
224-
225-
**Impact: Medium · Effort: Low**
226-
227-
When two threads simultaneously try to resolve the same vendor
228-
class, both miss `fqn_index`, both call `parse_and_cache_file`,
229-
and both parse the same file. The second parse is wasted work.
230-
This is most visible during the Phase 2 diagnostic pass when many
231-
threads resolve vendor classes for the first time.
232-
233-
### Fix
234-
235-
Add a `DashSet<String>` (or similar) of "currently being parsed"
236-
URIs. Before calling `parse_and_cache_file`, insert the URI into
237-
the set. If the insert fails (another thread is already parsing
238-
it), spin-wait or skip and let the other thread's result propagate
239-
through `fqn_index`. Remove the URI from the set after parsing
240-
completes.
241-
242-
---
243-
244223
## P11. Uncached base-resolution in `build_scope_methods_for_builder`
245224

246225
**Impact: Low-Medium · Effort: Low**

src/lib.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,13 @@ pub struct Backend {
343343
/// classmap contains a phar-based path (detected by a `!` separator,
344344
/// e.g. `/path/to/phpstan.phar!src/Type/Type.php`).
345345
pub(crate) phar_archives: Arc<RwLock<HashMap<PathBuf, phar::PharArchive>>>,
346+
/// Set of file URIs currently being parsed by another thread.
347+
///
348+
/// Used by [`parse_and_cache_file`](Self::parse_and_cache_file) to avoid
349+
/// redundant concurrent parses of the same file. Before parsing, the URI
350+
/// is inserted; if it was already present, the calling thread waits for
351+
/// the result to appear in `ast_map` instead of re-parsing.
352+
pub(crate) parse_inflight: Arc<Mutex<HashSet<String>>>,
346353
/// Embedded PHP stubs for built-in classes/interfaces (e.g. `UnitEnum`,
347354
/// `BackedEnum`, `Iterator`, `Countable`, …).
348355
/// Maps class short name → raw PHP source code.
@@ -638,6 +645,7 @@ impl Backend {
638645
class_not_found_cache: Arc::new(RwLock::new(HashSet::new())),
639646
classmap: Arc::new(RwLock::new(HashMap::new())),
640647
phar_archives: Arc::new(RwLock::new(HashMap::new())),
648+
parse_inflight: Arc::new(Mutex::new(HashSet::new())),
641649
stub_index: RwLock::new(stubs::build_stub_class_index()),
642650
stub_function_index: RwLock::new(stubs::build_stub_function_index()),
643651
stub_constant_index: RwLock::new(stubs::build_stub_constant_index()),
@@ -713,6 +721,7 @@ impl Backend {
713721
class_not_found_cache: Arc::new(RwLock::new(HashSet::new())),
714722
classmap: Arc::new(RwLock::new(HashMap::new())),
715723
phar_archives: Arc::new(RwLock::new(HashMap::new())),
724+
parse_inflight: Arc::new(Mutex::new(HashSet::new())),
716725
stub_index: RwLock::new(HashMap::new()),
717726
stub_function_index: RwLock::new(HashMap::new()),
718727
stub_constant_index: RwLock::new(HashMap::new()),
@@ -1009,6 +1018,7 @@ impl Backend {
10091018
fqn_index: Arc::clone(&self.fqn_index),
10101019
classmap: Arc::clone(&self.classmap),
10111020
phar_archives: Arc::clone(&self.phar_archives),
1021+
parse_inflight: Arc::clone(&self.parse_inflight),
10121022
class_not_found_cache: Arc::clone(&self.class_not_found_cache),
10131023
stub_index: RwLock::new(self.stub_index.read().clone()),
10141024
resolved_class_cache: Arc::clone(&self.resolved_class_cache),

src/resolution.rs

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -277,19 +277,49 @@ impl Backend {
277277
let phar_path = Path::new(&path_str[..sep]);
278278
let internal_path = &path_str[sep + 1..];
279279

280-
let archives = self.phar_archives.read();
281-
let archive = archives.get(phar_path)?;
282-
let bytes = archive.read_file(internal_path)?;
283-
let content = std::str::from_utf8(bytes).ok()?;
284-
285280
let uri = format!("phar://{}/{}", phar_path.display(), internal_path);
286-
return self.parse_and_cache_content(content, &uri);
281+
282+
// Deduplicate concurrent parses of the same phar entry.
283+
if !self.parse_inflight.lock().insert(uri.clone()) {
284+
return self.wait_for_cached_result(&uri);
285+
}
286+
let result = (|| {
287+
let archives = self.phar_archives.read();
288+
let archive = archives.get(phar_path)?;
289+
let bytes = archive.read_file(internal_path)?;
290+
let content = std::str::from_utf8(bytes).ok()?;
291+
self.parse_and_cache_content(content, &uri)
292+
})();
293+
self.parse_inflight.lock().remove(&uri);
294+
return result;
287295
}
288296

289297
// ── Regular file path ───────────────────────────────────
290-
let content = std::fs::read_to_string(file_path).ok()?;
291298
let uri = crate::util::path_to_uri(file_path);
292-
self.parse_and_cache_content(&content, &uri)
299+
300+
// Deduplicate concurrent parses of the same file.
301+
if !self.parse_inflight.lock().insert(uri.clone()) {
302+
return self.wait_for_cached_result(&uri);
303+
}
304+
let content = std::fs::read_to_string(file_path).ok();
305+
let result = content.and_then(|c| self.parse_and_cache_content(&c, &uri));
306+
self.parse_inflight.lock().remove(&uri);
307+
result
308+
}
309+
310+
/// Spin-wait for another thread to finish parsing a file and return
311+
/// the cached result from `ast_map`.
312+
fn wait_for_cached_result(&self, uri: &str) -> Option<Vec<Arc<ClassInfo>>> {
313+
for _ in 0..200 {
314+
// Check if parsing is complete (URI removed from inflight set).
315+
if !self.parse_inflight.lock().contains(uri) {
316+
return self.ast_map.read().get(uri).cloned();
317+
}
318+
std::thread::sleep(std::time::Duration::from_millis(1));
319+
}
320+
// Timeout: the other thread is still parsing. Return whatever is
321+
// in ast_map (may be stale or None).
322+
self.ast_map.read().get(uri).cloned()
293323
}
294324

295325
/// Parse PHP source text, cache the results in

0 commit comments

Comments
 (0)