Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ RUN apt-get update \
RUN groupadd --system --gid 999 semanticdog \
&& useradd --system --uid 999 --gid 999 --create-home semanticdog

# Prevent privilege escalation
RUN echo 'semanticdog ALL=(ALL) NOPASSWD: !ALL' >> /etc/sudoers.d/semanticdog \
# Prevent privilege escalation when sudo is present
RUN mkdir -p /etc/sudoers.d \
&& echo 'semanticdog ALL=(ALL) NOPASSWD: !ALL' > /etc/sudoers.d/semanticdog \
&& chmod 0440 /etc/sudoers.d/semanticdog

WORKDIR /app
Expand Down
23 changes: 23 additions & 0 deletions semanticdog/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,29 @@ def get_format_counts(self) -> list[tuple[str, int]]:
finally:
conn.close()

def get_format_status_counts(self) -> list[dict[str, Any]]:
"""Return file counts grouped by extension and status, sorted by count descending."""
conn = self._connect()
try:
rows = conn.execute(
"""
SELECT
CASE
WHEN path LIKE '%.%'
THEN lower(substr(path, length(path) - instr(reverse(path), '.') + 1))
ELSE '(no ext)'
END AS ext,
status,
COUNT(*) AS cnt
FROM files
GROUP BY ext, status
ORDER BY cnt DESC, ext ASC, status ASC
"""
).fetchall()
return [{"ext": r["ext"], "status": r["status"], "count": r["cnt"]} for r in rows]
finally:
conn.close()

def get_stale_count(self, days: int) -> int:
"""Return count of files not checked in the last `days` days."""
cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
Expand Down
3 changes: 2 additions & 1 deletion semanticdog/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,9 +455,10 @@ def scan(
raise
finally:
if not interrupted and not self._shutdown.is_set() and not failed:
files_examined = max(processed_count - stats.toctou_discards, 0)
self.db.finish_scan(
scan_id,
total=stats.total,
total=files_examined,
corrupt=stats.corrupt,
unreadable=stats.unreadable,
files_per_sec=stats.files_per_sec(),
Expand Down
106 changes: 106 additions & 0 deletions semanticdog/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,110 @@ def _dashboard_banner(status_payload: dict[str, Any], setup: dict[str, Any], run
}


def _file_type_breakdown(db: "Database | None", limit: int = 6) -> list[dict[str, Any]]:
if db is None:
return []

counts = db.get_format_counts()
if not counts:
return []

top_counts = counts[:limit]
other_total = sum(count for _, count in counts[limit:])
if other_total > 0:
top_counts.append(("other", other_total))

total = sum(count for _, count in top_counts)
payload = []
for ext, count in top_counts:
if ext == "(no ext)":
label = "No ext"
elif ext == "other":
label = "Others"
else:
label = ext[1:].upper() if ext.startswith(".") else ext.upper()
payload.append(
{
"label": label,
"count": int(count),
"percent": round((count / total * 100), 1) if total else 0.0,
}
)
return payload


def _format_extension_label(ext: str) -> str:
if ext == "(no ext)":
return "No ext"
if ext == "other":
return "Others"
return ext[1:].upper() if ext.startswith(".") else ext.upper()
Comment on lines +174 to +179
Copy link

Copilot AI Apr 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Labeling for the aggregated "other" bucket is inconsistent: _file_type_breakdown() uses "Other" while _format_extension_label() and _overview_breakdown() use "Others". This makes the API/UI harder to reason about and can lead to fragile tests. Consider standardizing on one label (singular vs plural) across both payloads/helpers.

Copilot uses AI. Check for mistakes.


def _overview_breakdown(db: "Database | None", limit: int = 6) -> list[dict[str, Any]]:
if db is None:
return []

rows = db.get_format_status_counts()
if not rows:
return []

ext_totals: dict[str, int] = {}
for row in rows:
ext = str(row["ext"])
ext_totals[ext] = ext_totals.get(ext, 0) + int(row["count"])

primary_exts = [ext for ext, _ in sorted(ext_totals.items(), key=lambda item: (-item[1], item[0]))[:limit]]
segments: list[dict[str, Any]] = []
other_total = 0
tone_by_status = {
"ok": "healthy",
"corrupt": "corrupt",
"unreadable": "unreadable",
"unsupported": "other",
"error": "other",
}
prefix_by_status = {
"ok": "Healthy",
"corrupt": "Corrupt",
"unreadable": "Unreadable",
"unsupported": "Unsupported",
"error": "Error",
}

for row in rows:
ext = str(row["ext"])
status = str(row["status"])
count = int(row["count"])
if ext not in primary_exts:
other_total += count
continue
segments.append(
{
"key": f"{ext}:{status}",
"label": f"{prefix_by_status.get(status, status.replace('_', ' ').title())} {_format_extension_label(ext)}",
"ext": ext,
"status": status,
"count": count,
"tone": tone_by_status.get(status, "other"),
}
)

if other_total > 0:
segments.append(
{
"key": "other:other",
"label": "Others",
"ext": "other",
"status": "other",
"count": other_total,
"tone": "other",
}
)

return segments


def _changed_restart_fields(payload: dict[str, Any], current_cfg: "Config | None") -> set[str]:
if current_cfg is None:
return set(payload) & RESTART_REQUIRED_CONFIG_FIELDS
Expand Down Expand Up @@ -388,6 +492,8 @@ async def status(request: Request) -> dict[str, Any]:
"files_indexed": stats.get("total", 0),
"by_status": stats.get("by_status", {}),
"last_scan": last_scan,
"file_types": _file_type_breakdown(db),
"overview_breakdown": _overview_breakdown(db),
"current_scan": None if current_scan is None else current_scan.__dict__,
"scheduler": None if scheduler is None else scheduler.as_dict(),
}
Expand Down
61 changes: 53 additions & 8 deletions semanticdog/services/scan_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from concurrent.futures import Future, ThreadPoolExecutor
from dataclasses import dataclass
from datetime import datetime
from typing import Any

from semanticdog.notify import Notifier, ScanSummary
from semanticdog.scanner import ScanProgressSnapshot, Scanner
Expand All @@ -29,10 +30,12 @@ def __init__(self, cfg, db) -> None:
self._lock = threading.Lock()
self._executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="sdog-scan")
self._active_future: Future | None = None
self._active_origin: str | None = None
self._current_snapshot: ScanProgressSnapshot | None = None
self._last_snapshot: ScanProgressSnapshot | None = None
self._last_error: str | None = None
self._last_notification_errors: list[str] = []
self._last_run_summaries: dict[str, dict[str, Any]] = {}

def is_running(self) -> bool:
with self._lock:
Expand All @@ -54,11 +57,28 @@ def last_notification_errors(self) -> list[str]:
with self._lock:
return list(self._last_notification_errors)

def start(self, scope: str | None = None) -> ScanStartResult:
return self._launch(scope=scope, resume_scan_id=None)
def active_origin(self) -> str | None:
with self._lock:
return self._active_origin

def resume(self, scan_id: str) -> ScanStartResult:
return self._launch(scope=None, resume_scan_id=scan_id)
def last_run_summary(self, origin: str | None = None) -> dict[str, Any] | None:
with self._lock:
if origin is not None:
summary = self._last_run_summaries.get(origin)
return None if summary is None else dict(summary)
if not self._last_run_summaries:
return None
_, summary = max(
self._last_run_summaries.items(),
key=lambda item: item[1].get("finished_at") or item[1].get("started_at") or "",
)
return dict(summary)

def start(self, scope: str | None = None, *, origin: str = "manual") -> ScanStartResult:
return self._launch(scope=scope, resume_scan_id=None, origin=origin)

def resume(self, scan_id: str, *, origin: str = "manual") -> ScanStartResult:
return self._launch(scope=None, resume_scan_id=scan_id, origin=origin)

def shutdown(self) -> None:
with self._lock:
Expand All @@ -69,7 +89,7 @@ def shutdown(self) -> None:
self._executor = ThreadPoolExecutor(max_workers=1, thread_name_prefix="sdog-scan")
executor.shutdown(wait=False)

def _launch(self, scope: str | None, resume_scan_id: str | None) -> ScanStartResult:
def _launch(self, scope: str | None, resume_scan_id: str | None, origin: str) -> ScanStartResult:
with self._lock:
if self._active_future is not None and not self._active_future.done():
active_scan_id = self._current_snapshot.scan_id if self._current_snapshot else None
Expand All @@ -82,12 +102,13 @@ def _launch(self, scope: str | None, resume_scan_id: str | None) -> ScanStartRes

self._current_snapshot = None
self._last_error = None
future = self._executor.submit(self._run_scan, scope, resume_scan_id)
self._active_origin = origin
future = self._executor.submit(self._run_scan, scope, resume_scan_id, origin)
self._active_future = future

return ScanStartResult(accepted=True)

def _run_scan(self, scope: str | None, resume_scan_id: str | None) -> None:
def _run_scan(self, scope: str | None, resume_scan_id: str | None, origin: str) -> None:
try:
scanner = Scanner(self._cfg, self._db)
if resume_scan_id:
Expand All @@ -99,6 +120,19 @@ def _run_scan(self, scope: str | None, resume_scan_id: str | None) -> None:
except Exception as e:
with self._lock:
self._last_error = str(e)
snapshot = self._last_snapshot
if self._active_origin == origin:
self._last_run_summaries[origin] = {
"state": "failed",
"scan_id": snapshot.scan_id if snapshot else None,
"started_at": snapshot.started_at if snapshot else None,
"finished_at": snapshot.finished_at if snapshot else None,
"processed": snapshot.processed if snapshot else 0,
"issues": (snapshot.corrupt + snapshot.unreadable) if snapshot else 0,
"last_error": str(e),
}
if self._active_origin == origin:
self._active_origin = None
raise

def _send_notifications(self, stats) -> None:
Expand Down Expand Up @@ -131,7 +165,7 @@ def _send_notifications(self, stats) -> None:
scan_id=stats.scan_id,
scope=scan.get("scope") or ",".join(self._cfg.paths),
duration_s=duration_s,
total_checked=stats.total,
total_checked=int(scan.get("total") or stats.total),
corrupt=corrupt,
unreadable=unreadable,
)
Expand All @@ -147,3 +181,14 @@ def _on_progress(self, snapshot: ScanProgressSnapshot) -> None:
self._last_snapshot = snapshot
if snapshot.state == "failed":
self._last_error = snapshot.last_error
if snapshot.state in {"completed", "failed", "interrupted"} and self._active_origin is not None:
self._last_run_summaries[self._active_origin] = {
"state": snapshot.state,
"scan_id": snapshot.scan_id,
"started_at": snapshot.started_at,
"finished_at": snapshot.finished_at,
"processed": snapshot.processed,
"issues": snapshot.corrupt + snapshot.unreadable,
"last_error": snapshot.last_error,
}
self._active_origin = None
14 changes: 12 additions & 2 deletions semanticdog/services/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def _trigger_due_run(self, now: datetime) -> None:
result_text = "started"
error_text = None
try:
result = self._scan_manager.start()
result = self._scan_manager.start(origin="scheduled")
if not result.accepted:
result_text = "skipped: scan already running"
except Exception as e:
Expand Down Expand Up @@ -130,11 +130,21 @@ def debug_set_next_run(self, value: datetime | None) -> None:

def as_dict(self) -> dict[str, Any]:
state = self.state()
completed = self._scan_manager.last_run_summary("scheduled")
last_result = state.last_trigger_result
if completed is not None:
if completed.get("state") == "completed":
issues = int(completed.get("issues") or 0)
last_result = "completed" if issues == 0 else f"completed with {issues} issue{'s' if issues != 1 else ''}"
elif completed.get("state") == "failed":
last_result = completed.get("last_error") or "failed"
elif completed.get("state") == "interrupted":
last_result = "interrupted"
return {
"enabled": state.enabled,
"cron": state.cron,
"next_run_at": state.next_run_at,
"last_run_at": state.last_run_at,
"last_trigger_result": state.last_trigger_result,
"last_trigger_result": last_result,
"last_error": state.last_error,
}
Loading
Loading