|
1 | | -//! Comment and docblock position detection. |
| 1 | +//! Comment, docblock, and string position detection. |
2 | 2 | //! |
3 | 3 | //! This module provides utilities to determine whether a given cursor |
4 | | -//! position falls inside a comment or docblock. These are used early in |
5 | | -//! the completion pipeline to decide whether to suppress normal |
6 | | -//! completions (inside `//` / `/* */`) or to switch to PHPDoc tag |
7 | | -//! completion (inside `/** */`). |
| 4 | +//! position falls inside a comment, docblock, or string literal. These |
| 5 | +//! are used early in the completion pipeline to decide whether to suppress |
| 6 | +//! normal completions (inside `//` / `/* */` / string literals) or to |
| 7 | +//! switch to PHPDoc tag completion (inside `/** */`). |
8 | 8 | //! |
9 | 9 | //! The functions here are pure — they take `(content, Position)` and |
10 | 10 | //! return a result without any shared state. |
@@ -201,3 +201,293 @@ pub fn is_inside_non_doc_comment(content: &str, position: Position) -> bool { |
201 | 201 | // Cursor is at or past end of file |
202 | 202 | state == State::LineComment || state == State::BlockComment |
203 | 203 | } |
| 204 | + |
| 205 | +/// Classification of the string context at a given cursor position. |
| 206 | +#[derive(Debug, PartialEq, Eq)] |
| 207 | +pub enum StringContext { |
| 208 | + /// The cursor is not inside any string literal. |
| 209 | + NotInString, |
| 210 | + /// The cursor is inside a string literal where interpolation is not |
| 211 | + /// possible (single-quoted string, nowdoc) or where the cursor is at |
| 212 | + /// a position that is not an interpolation site (plain text inside a |
| 213 | + /// double-quoted string or heredoc). Completion should be suppressed. |
| 214 | + InStringLiteral, |
| 215 | + /// The cursor is inside a simple interpolation context without braces |
| 216 | + /// (`"$var->"`, `"$var"`). PHP only allows property access here (no |
| 217 | + /// method calls, no chaining), so completion should filter to |
| 218 | + /// properties only. |
| 219 | + SimpleInterpolation, |
| 220 | + /// The cursor is inside a `{$…}` brace interpolation context where |
| 221 | + /// full PHP expressions are allowed (`"{$user->getName()}"`). |
| 222 | + /// Completion should proceed normally with no filtering. |
| 223 | + BraceInterpolation, |
| 224 | +} |
| 225 | + |
| 226 | +/// Classifies whether the cursor is inside a string and, if so, whether |
| 227 | +/// it is at an interpolation site where completion should still fire. |
| 228 | +/// |
| 229 | +/// Returns [`StringContext::InStringLiteral`] when completion should be |
| 230 | +/// suppressed, [`StringContext::InInterpolation`] when inside a PHP |
| 231 | +/// interpolation expression, and [`StringContext::NotInString`] when the |
| 232 | +/// cursor is in normal code. |
| 233 | +pub fn classify_string_context(content: &str, position: Position) -> StringContext { |
| 234 | + let target = position_to_byte_offset(content, position); |
| 235 | + let bytes = content.as_bytes(); |
| 236 | + let len = bytes.len(); |
| 237 | + let mut i = 0; |
| 238 | + |
| 239 | + #[derive(PartialEq, Clone, Copy)] |
| 240 | + enum State { |
| 241 | + Code, |
| 242 | + SingleString, |
| 243 | + DoubleString, |
| 244 | + LineComment, |
| 245 | + BlockComment, |
| 246 | + Docblock, |
| 247 | + Heredoc, |
| 248 | + Nowdoc, |
| 249 | + } |
| 250 | + |
| 251 | + let mut state = State::Code; |
| 252 | + let mut heredoc_label: Vec<u8> = Vec::new(); |
| 253 | + // Brace depth for `{$...}` interpolation inside double-quoted strings |
| 254 | + // and heredocs. Zero means we are in the string body proper; > 0 means |
| 255 | + // we are inside a `{$…}` complex interpolation expression. |
| 256 | + let mut brace_depth: u32 = 0; |
| 257 | + |
| 258 | + while i < len { |
| 259 | + if i >= target { |
| 260 | + return match state { |
| 261 | + State::SingleString | State::Nowdoc => StringContext::InStringLiteral, |
| 262 | + State::DoubleString | State::Heredoc => { |
| 263 | + if brace_depth > 0 { |
| 264 | + StringContext::BraceInterpolation |
| 265 | + } else if is_simple_interpolation_site(bytes, target) { |
| 266 | + StringContext::SimpleInterpolation |
| 267 | + } else { |
| 268 | + StringContext::InStringLiteral |
| 269 | + } |
| 270 | + } |
| 271 | + _ => StringContext::NotInString, |
| 272 | + }; |
| 273 | + } |
| 274 | + |
| 275 | + match state { |
| 276 | + State::Code => { |
| 277 | + if bytes[i] == b'/' && i + 1 < len && bytes[i + 1] == b'/' { |
| 278 | + state = State::LineComment; |
| 279 | + i += 2; |
| 280 | + } else if bytes[i] == b'/' |
| 281 | + && i + 2 < len |
| 282 | + && bytes[i + 1] == b'*' |
| 283 | + && bytes[i + 2] == b'*' |
| 284 | + && (i + 3 >= len || bytes[i + 3] != b'*') |
| 285 | + { |
| 286 | + state = State::Docblock; |
| 287 | + i += 3; |
| 288 | + } else if bytes[i] == b'/' && i + 1 < len && bytes[i + 1] == b'*' { |
| 289 | + state = State::BlockComment; |
| 290 | + i += 2; |
| 291 | + } else if bytes[i] == b'\'' { |
| 292 | + state = State::SingleString; |
| 293 | + i += 1; |
| 294 | + } else if bytes[i] == b'"' { |
| 295 | + state = State::DoubleString; |
| 296 | + brace_depth = 0; |
| 297 | + i += 1; |
| 298 | + } else if bytes[i] == b'<' |
| 299 | + && i + 2 < len |
| 300 | + && bytes[i + 1] == b'<' |
| 301 | + && bytes[i + 2] == b'<' |
| 302 | + { |
| 303 | + i += 3; |
| 304 | + while i < len && bytes[i] == b' ' { |
| 305 | + i += 1; |
| 306 | + } |
| 307 | + let is_nowdoc = i < len && bytes[i] == b'\''; |
| 308 | + if is_nowdoc { |
| 309 | + i += 1; |
| 310 | + } |
| 311 | + heredoc_label.clear(); |
| 312 | + while i < len && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') { |
| 313 | + heredoc_label.push(bytes[i]); |
| 314 | + i += 1; |
| 315 | + } |
| 316 | + if !heredoc_label.is_empty() { |
| 317 | + if is_nowdoc { |
| 318 | + if i < len && bytes[i] == b'\'' { |
| 319 | + i += 1; |
| 320 | + } |
| 321 | + state = State::Nowdoc; |
| 322 | + } else { |
| 323 | + state = State::Heredoc; |
| 324 | + brace_depth = 0; |
| 325 | + } |
| 326 | + } |
| 327 | + } else { |
| 328 | + i += 1; |
| 329 | + } |
| 330 | + } |
| 331 | + State::LineComment => { |
| 332 | + if bytes[i] == b'\n' { |
| 333 | + state = State::Code; |
| 334 | + } |
| 335 | + i += 1; |
| 336 | + } |
| 337 | + State::BlockComment | State::Docblock => { |
| 338 | + if bytes[i] == b'*' && i + 1 < len && bytes[i + 1] == b'/' { |
| 339 | + state = State::Code; |
| 340 | + i += 2; |
| 341 | + } else { |
| 342 | + i += 1; |
| 343 | + } |
| 344 | + } |
| 345 | + State::SingleString => { |
| 346 | + if bytes[i] == b'\\' && i + 1 < len { |
| 347 | + i += 2; |
| 348 | + } else if bytes[i] == b'\'' { |
| 349 | + state = State::Code; |
| 350 | + i += 1; |
| 351 | + } else { |
| 352 | + i += 1; |
| 353 | + } |
| 354 | + } |
| 355 | + State::DoubleString => { |
| 356 | + if bytes[i] == b'\\' && i + 1 < len { |
| 357 | + i += 2; |
| 358 | + } else if bytes[i] == b'"' && brace_depth == 0 { |
| 359 | + state = State::Code; |
| 360 | + i += 1; |
| 361 | + } else if bytes[i] == b'{' |
| 362 | + && ((i + 1 < len && bytes[i + 1] == b'$') || brace_depth > 0) |
| 363 | + { |
| 364 | + brace_depth += 1; |
| 365 | + i += 1; |
| 366 | + } else if bytes[i] == b'}' && brace_depth > 0 { |
| 367 | + brace_depth -= 1; |
| 368 | + i += 1; |
| 369 | + } else { |
| 370 | + i += 1; |
| 371 | + } |
| 372 | + } |
| 373 | + State::Heredoc => { |
| 374 | + if bytes[i] == b'{' && ((i + 1 < len && bytes[i + 1] == b'$') || brace_depth > 0) { |
| 375 | + brace_depth += 1; |
| 376 | + i += 1; |
| 377 | + } else if bytes[i] == b'}' && brace_depth > 0 { |
| 378 | + brace_depth -= 1; |
| 379 | + i += 1; |
| 380 | + } else if bytes[i] == b'\n' { |
| 381 | + i += 1; |
| 382 | + let line_start = i; |
| 383 | + while i < len && (bytes[i] == b' ' || bytes[i] == b'\t') { |
| 384 | + i += 1; |
| 385 | + } |
| 386 | + if i + heredoc_label.len() <= len |
| 387 | + && &bytes[i..i + heredoc_label.len()] == heredoc_label.as_slice() |
| 388 | + { |
| 389 | + let after_label = i + heredoc_label.len(); |
| 390 | + if after_label >= len |
| 391 | + || bytes[after_label] == b';' |
| 392 | + || bytes[after_label] == b'\n' |
| 393 | + || bytes[after_label] == b'\r' |
| 394 | + { |
| 395 | + i = after_label; |
| 396 | + state = State::Code; |
| 397 | + brace_depth = 0; |
| 398 | + continue; |
| 399 | + } |
| 400 | + } |
| 401 | + let _ = line_start; |
| 402 | + } else { |
| 403 | + i += 1; |
| 404 | + } |
| 405 | + } |
| 406 | + State::Nowdoc => { |
| 407 | + if bytes[i] == b'\n' { |
| 408 | + i += 1; |
| 409 | + let line_start = i; |
| 410 | + while i < len && (bytes[i] == b' ' || bytes[i] == b'\t') { |
| 411 | + i += 1; |
| 412 | + } |
| 413 | + if i + heredoc_label.len() <= len |
| 414 | + && &bytes[i..i + heredoc_label.len()] == heredoc_label.as_slice() |
| 415 | + { |
| 416 | + let after_label = i + heredoc_label.len(); |
| 417 | + if after_label >= len |
| 418 | + || bytes[after_label] == b';' |
| 419 | + || bytes[after_label] == b'\n' |
| 420 | + || bytes[after_label] == b'\r' |
| 421 | + { |
| 422 | + i = after_label; |
| 423 | + state = State::Code; |
| 424 | + continue; |
| 425 | + } |
| 426 | + } |
| 427 | + let _ = line_start; |
| 428 | + } else { |
| 429 | + i += 1; |
| 430 | + } |
| 431 | + } |
| 432 | + } |
| 433 | + } |
| 434 | + |
| 435 | + // Cursor at or past end of file |
| 436 | + match state { |
| 437 | + State::SingleString | State::Nowdoc => StringContext::InStringLiteral, |
| 438 | + State::DoubleString | State::Heredoc => { |
| 439 | + if brace_depth > 0 { |
| 440 | + StringContext::BraceInterpolation |
| 441 | + } else if is_simple_interpolation_site(bytes, target) { |
| 442 | + StringContext::SimpleInterpolation |
| 443 | + } else { |
| 444 | + StringContext::InStringLiteral |
| 445 | + } |
| 446 | + } |
| 447 | + _ => StringContext::NotInString, |
| 448 | + } |
| 449 | +} |
| 450 | + |
| 451 | +/// Checks whether the bytes immediately before `target` look like a PHP |
| 452 | +/// interpolation site inside a double-quoted string or heredoc. |
| 453 | +/// |
| 454 | +/// Recognised patterns (cursor shown as `|`): |
| 455 | +/// - `$identifier->|` or `$identifier?->|` — member access |
| 456 | +/// - `$identifier|` — partially typed variable name |
| 457 | +/// - `$|` — bare dollar, user is starting an interpolation |
| 458 | +/// |
| 459 | +/// All of these are valid interpolation starts in PHP double-quoted |
| 460 | +/// strings, so completion should be allowed rather than suppressed. |
| 461 | +fn is_simple_interpolation_site(bytes: &[u8], target: usize) -> bool { |
| 462 | + let mut pos = target; |
| 463 | + |
| 464 | + // ── Case 1: member access `$identifier->` / `$identifier?->` ──── |
| 465 | + // Check `?->` first so the longer operator is not partially matched. |
| 466 | + let has_arrow = |
| 467 | + if pos >= 3 && bytes[pos - 3] == b'?' && bytes[pos - 2] == b'-' && bytes[pos - 1] == b'>' { |
| 468 | + pos -= 3; |
| 469 | + true |
| 470 | + } else if pos >= 2 && bytes[pos - 2] == b'-' && bytes[pos - 1] == b'>' { |
| 471 | + pos -= 2; |
| 472 | + true |
| 473 | + } else { |
| 474 | + false |
| 475 | + }; |
| 476 | + |
| 477 | + // Walk back over the identifier `[a-zA-Z0-9_]*`. |
| 478 | + let before_ident = pos; |
| 479 | + while pos > 0 && (bytes[pos - 1].is_ascii_alphanumeric() || bytes[pos - 1] == b'_') { |
| 480 | + pos -= 1; |
| 481 | + } |
| 482 | + let ident_len = before_ident - pos; |
| 483 | + |
| 484 | + // If we consumed an arrow, the identifier must be non-empty and |
| 485 | + // preceded by `$`. |
| 486 | + if has_arrow { |
| 487 | + return ident_len > 0 && pos > 0 && bytes[pos - 1] == b'$'; |
| 488 | + } |
| 489 | + |
| 490 | + // ── Case 2: bare `$` or `$partialName` (variable interpolation) ─ |
| 491 | + // The identifier may be empty (just `$`) or partially typed. |
| 492 | + pos > 0 && bytes[pos - 1] == b'$' |
| 493 | +} |
0 commit comments