From 525b2bfc95df8029e003114940fd7dda0dbb8c77 Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Thu, 22 Jan 2026 01:42:53 -0600 Subject: [PATCH 01/15] Add Dispute Prediction Strategy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Strategy doc: exploit Polymarket resolution disputes via LLM analysis - Four paths: pre-dispute, post-proposal, active dispute, dispute initiation - Tiered LLM pipeline (fast screening → deep analysis with thinking models) - Kelly Criterion appendix with first-principles derivation - Strategy scaffold following existing TradingStrategy pattern --- docs/APPENDIX_KELLY_CRITERION.md | 85 +++++ docs/DISPUTE_PREDICTION_STRATEGY.md | 247 +++++++++++++ src/strategies/__init__.py | 4 +- src/strategies/dispute.py | 525 ++++++++++++++++++++++++++++ 4 files changed, 860 insertions(+), 1 deletion(-) create mode 100644 docs/APPENDIX_KELLY_CRITERION.md create mode 100644 docs/DISPUTE_PREDICTION_STRATEGY.md create mode 100644 src/strategies/dispute.py diff --git a/docs/APPENDIX_KELLY_CRITERION.md b/docs/APPENDIX_KELLY_CRITERION.md new file mode 100644 index 0000000..8beb9f3 --- /dev/null +++ b/docs/APPENDIX_KELLY_CRITERION.md @@ -0,0 +1,85 @@ +# Appendix: Kelly Criterion + +**Position Sizing From First Principles** + +--- + +## The Problem Kelly Was Solving + +In 1956, John Kelly was a physicist at Bell Labs working on information theory. He was interested in a practical question: + +> **If you have an edge in a repeated bet, how much should you wager each time to maximize long-term wealth?** + +The naive answer is "bet everything" — if you have an edge, more is better. But this leads to ruin: one loss and you're wiped out. + +The opposite extreme — betting a tiny fixed amount — is safe but leaves money on the table. + +Kelly wanted the optimal middle ground: **the bet size that maximizes the expected growth rate of your bankroll over many repeated bets.** + +--- + +## The Insight + +Kelly realized this was an information theory problem. He framed it as: you have a noisy channel (your edge) transmitting information (the correct bet). How do you maximize the information rate? + +His key insight: **you should bet a fraction of your bankroll proportional to your edge.** + +--- + +## The Formula + +For a simple bet with probability `p` of winning and odds `b` (you get back `b` times your bet if you win): + +``` +f* = (bp - q) / b + +where: + f* = fraction of bankroll to bet + p = probability of winning + q = probability of losing (1 - p) + b = decimal odds - 1 (what you win per dollar risked) +``` + +**Example:** +- You have 60% chance to win (p = 0.6) +- Odds are even money (b = 1, you win $1 for every $1 risked) + +``` +f* = (1 × 0.6 - 0.4) / 1 = 0.2 +``` + +You should bet 20% of your bankroll. + +--- + +## Why It Works + +Kelly sizing has a special property: it maximizes the **geometric mean** of returns (equivalently, the expected log of wealth). This means: + +1. **You never go broke** — You're always betting a fraction, never everything +2. **You grow faster than any other strategy** — In the long run, Kelly beats all other fixed-fraction approaches +3. **Bet size scales with edge** — Bigger edge = bigger bet, no edge = no bet + +--- + +## Why We Use Fractional Kelly + +Full Kelly is aggressive. In practice, we use a fraction (like 25%) because: +- Our probability estimates have uncertainty +- Drawdowns with full Kelly can be 50%+ (psychologically brutal) +- Model errors compound; fractional Kelly provides a buffer + +For dispute trading specifically, we also discount for: +- Confidence in our dispute prediction +- Probability of INVALID resolution (both sides lose) + +--- + +## Further Reading + +- Kelly, J.L. (1956). "A New Interpretation of Information Rate" — The original paper +- Thorp, E.O. "The Kelly Criterion in Blackjack, Sports Betting, and the Stock Market" — Practical applications + +--- + +*PR3DICT Documentation* diff --git a/docs/DISPUTE_PREDICTION_STRATEGY.md b/docs/DISPUTE_PREDICTION_STRATEGY.md new file mode 100644 index 0000000..d32d7c1 --- /dev/null +++ b/docs/DISPUTE_PREDICTION_STRATEGY.md @@ -0,0 +1,247 @@ +# Dispute Prediction Strategy + +**PR3DICT: Polymarket Resolution Edge** + +--- + +## Core Thesis + +``` +┌─────────────────────────────────────────────────────────────────────────────┐ +│ THE DISPUTE EDGE │ +├─────────────────────────────────────────────────────────────────────────────┤ +│ │ +│ Traditional Trader: │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ Predict │ ───► │ Trade │ ───► │ Wait for │ │ +│ │ Real Event │ │ YES/NO │ │ Resolution │ │ +│ └──────────────┘ └──────────────┘ └──────────────┘ │ +│ │ +│ OUR APPROACH: │ +│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ +│ │ Predict │ ───► │ Position │ ───► │ Profit from │ │ +│ │ RESOLUTION │ │ Accordingly │ │ Resolution │ │ +│ │ PROBLEMS │ │ │ │ Chaos │ │ +│ └──────────────┘ └──────────────┘ └──────────────┘ │ +│ │ +│ WHY THIS WORKS: │ +│ • Resolution ambiguity creates price volatility │ +│ • UMA voting is predictable (whale patterns, historical bias) │ +│ • Most traders don't model this risk → mispricing │ +│ │ +└─────────────────────────────────────────────────────────────────────────────┘ +``` + +--- + +## How Polymarket Works + +### Market Creation + +Anyone can create a Polymarket by defining: + +1. **Question** — A yes/no question (e.g., "Will Bitcoin reach $100K by March 1, 2026?") +2. **Resolution Rules** — Detailed criteria for what constitutes YES vs NO +3. **Resolution Source** — The authoritative source for determining the outcome (e.g., CoinGecko price, AP news call) +4. **End Date** — When the market closes and resolution begins + +Once created, traders buy YES or NO shares. Prices reflect the market's probability estimate. If YES wins, YES shares pay $1 each, NO shares pay $0 (and vice versa). + +### Resolution Process (UMA Optimistic Oracle) + +When a market's end date passes: + +``` +┌─────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Market │────►│ PROPOSAL │────►│ SETTLEMENT │ +│ Closes │ │ (2-4 hr wait) │ │ (Payout) │ +└─────────────┘ └────────┬────────┘ └─────────────────┘ + │ + Anyone disputes? + │ + ┌────────┴────────┐ + │ │ + NO YES + │ │ + ▼ ▼ + ┌─────────────┐ ┌─────────────────────┐ + │ Proposal │ │ UMA DVM VOTE │ + │ Accepted │ │ (48-96 hours) │ + │ │ │ │ + │ Normal │ │ UMA token holders │ + │ payout │ │ vote on correct │ + └─────────────┘ │ outcome │ + │ │ + │ Result is FINAL │ + └─────────────────────┘ +``` + +**Step by step:** + +1. **Proposal** — Anyone can propose an outcome (YES, NO, or INVALID). Proposer posts a bond (~$1,500 USDC). + +2. **Challenge Window** — 2-4 hours where anyone can dispute by posting a counter-bond. + +3. **If No Dispute** — Proposal is accepted, market settles, traders get paid. + +4. **If Disputed** — Escalates to UMA's Data Verification Mechanism (DVM). UMA token holders vote over 48-96 hours. The majority vote determines the outcome. Winning side gets losing side's bond. + +### Why Disputes Happen + +- **Ambiguous contract language** — "Will X happen?" but X isn't precisely defined +- **Edge cases** — Something happened that the contract didn't anticipate +- **Resolution source issues** — Source is unavailable, contradictory, or changed +- **Timing ambiguity** — Timezone issues, exact moment unclear +- **Subjective criteria** — Contract requires interpretation + +--- + +## Four Ways to Capitalize + +| # | Strategy | Trigger | Our Edge | +|---|----------|---------|----------| +| 1 | **Pre-Dispute Detection** | Market approaching close | Identify dispute-prone markets before others | +| 2 | **Post-Proposal Detection** | Proposal submitted, window open | Predict incoming dispute, exit/hedge | +| 3 | **Active Dispute Trading** | DVM voting in progress | Predict how UMA voters will rule | +| 4 | **Dispute Initiation** | We identify incorrect proposal | File dispute ourselves, profit from correction | + +### Strategy 1: Pre-Dispute Detection + +Scan markets approaching close. If our analysis says P(dispute) > `DISPUTE_THRESHOLD` (likely ~30%) and we can predict the DVM outcome, take a position before liquidity disappears. + +### Strategy 2: Post-Proposal Detection + +Monitor proposals during challenge window. If signals suggest dispute is imminent, exit unfavorable positions or enter at distressed prices. + +### Strategy 3: Active Dispute Trading + +Once a dispute is filed, predict how UMA token holders will vote. If we have high confidence (likely ~70%+) and the market is mispriced, trade the expected DVM outcome. + +### Strategy 4: Dispute Initiation + +The highest-edge opportunity: when we identify that a proposed outcome is **wrong** per the contract terms. We position, file the dispute ourselves (posting bond), and profit when the DVM rules in our favor. Requires high confidence (likely ~75%+) since we're risking the bond. + +--- + +## The Analysis Engine + +### Approach: LLM-First + +We use modern LLMs with structured thinking to analyze markets, not traditional NLP or feature engineering. The workflow: + +``` +┌──────────────────────────────────────────────────────────────────────────┐ +│ ANALYSIS PIPELINE │ +│ │ +│ Every ~6 hours: │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ Fetch markets in target range: │ │ +│ │ • Liquidity > MIN_LIQUIDITY (likely ~$5K) │ │ +│ │ • Closes within DAYS_TO_CLOSE (likely ~14 days) │ │ +│ │ • Not already analyzed this cycle │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ TIER 1: Fast LLM Screening │ │ +│ │ ───────────────────────── │ │ +│ │ Quick pass with lighter model (Claude Haiku / GPT-4o-mini) │ │ +│ │ "Does this contract have obvious ambiguity or edge cases?" │ │ +│ │ │ │ +│ │ Output: PASS (low risk) or FLAG (needs deeper review) │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────┴─────────┐ │ +│ │ │ │ +│ PASS FLAG │ +│ │ │ │ +│ ▼ ▼ │ +│ Log & skip ┌───────────────────────────────────┐ │ +│ │ TIER 2: Deep Analysis │ │ +│ │ ──────────────────── │ │ +│ │ Extended thinking model │ │ +│ │ (Claude Opus/gemini 3 pro thinking max)│ +│ │ │ │ +│ │ Analyze: │ │ +│ │ • Contract ambiguities │ │ +│ │ • Edge cases not covered │ │ +│ │ • Resolution source reliability │ │ +│ │ • Historical similar disputes │ │ +│ │ • Likely DVM outcome if disputed │ │ +│ │ │ │ +│ │ Output: │ │ +│ │ • P(dispute) │ │ +│ │ • P(YES|DVM), P(NO|DVM) │ │ +│ │ • Confidence │ │ +│ │ • Reasoning │ │ +│ └───────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌───────────────────────────────────┐ │ +│ │ Signal Generation │ │ +│ │ IF P(dispute) > DISPUTE_THRESHOLD│ │ +│ │ AND confidence > MIN_CONFIDENCE │ │ +│ │ AND edge vs market price exists │ │ +│ │ → Generate trade signal │ │ +│ └───────────────────────────────────┘ │ +│ │ +└──────────────────────────────────────────────────────────────────────────┘ +``` + +### Why LLM-First Works Here + +Contract ambiguity analysis is fundamentally a **language understanding** problem. Modern LLMs excel at: +- Identifying vague or undefined terms +- Reasoning about edge cases +- Understanding intent vs literal wording +- Comparing to similar past situations (via RAG) + +Traditional NLP (keyword counting, sentiment scores) misses nuance. A contract might have zero "ambiguous" keywords but still have a critical undefined edge case that an LLM can reason about. + +### RAG Knowledge Store + +We maintain a vector database of: +- Past market analyses (what we predicted, what happened) +- Historical disputes (contract text, dispute arguments, DVM outcomes) +- Our learnings (why we were wrong, what patterns we've found) + +When analyzing a new market, we retrieve similar past cases to inform the LLM's analysis. This creates a learning loop. + +--- + +## Position Sizing + +We use **Kelly Criterion** — bet a fraction of bankroll proportional to your edge. This maximizes long-term growth while avoiding ruin. We apply fractional Kelly (~25%) with additional discounts for prediction confidence and P(INVALID). Capped at `MAX_POSITION_PCT` (likely ~5%) per trade. + +See [Appendix: Kelly Criterion](./APPENDIX_KELLY_CRITERION.md) for full derivation and theory. + +--- + +## Parameters Summary + +| Parameter | Description | Likely Value | +|-----------|-------------|--------------| +| `SCAN_INTERVAL` | How often to fetch and screen markets | ~6 hours | +| `MIN_LIQUIDITY` | Minimum market liquidity to consider | ~$5,000 | +| `DAYS_TO_CLOSE` | How far out to scan for approaching markets | ~14 days | +| `DISPUTE_THRESHOLD` | Minimum P(dispute) to generate signal | ~30% | +| `MIN_CONFIDENCE` | Minimum model confidence to act | ~60% | +| `DVM_CONFIDENCE_THRESHOLD` | Confidence needed to trade DVM outcome | ~70% | +| `INITIATION_CONFIDENCE` | Confidence needed to file dispute ourselves | ~75% | +| `KELLY_FRACTION` | Fraction of full Kelly to use | ~25% | +| `MAX_POSITION_PCT` | Maximum position as % of bankroll | ~5% | + +--- + +## Next Steps + +1. **Build data pipeline** — Gamma API fetcher, store markets in Postgres +2. **Build Tier 1 screener** — Fast LLM pass to flag ambiguous contracts +3. **Build Tier 2 analyzer** — Extended thinking analysis with RAG +4. **Backtest on historical disputes** — How well would we have predicted? +5. **Paper trade** — Run live without real capital + +--- + +*Document Version: 0.3* +*Last Updated: 2026-01-22* diff --git a/src/strategies/__init__.py b/src/strategies/__init__.py index d7bb303..926d6c7 100644 --- a/src/strategies/__init__.py +++ b/src/strategies/__init__.py @@ -1,8 +1,10 @@ # Trading Strategies from .base import TradingStrategy, Signal from .arbitrage import ArbitrageStrategy, CrossPlatformArbitrage +from .dispute import DisputePredictionStrategy, DisputeAnalysis __all__ = [ "TradingStrategy", "Signal", - "ArbitrageStrategy", "CrossPlatformArbitrage" + "ArbitrageStrategy", "CrossPlatformArbitrage", + "DisputePredictionStrategy", "DisputeAnalysis" ] diff --git a/src/strategies/dispute.py b/src/strategies/dispute.py new file mode 100644 index 0000000..6f571d6 --- /dev/null +++ b/src/strategies/dispute.py @@ -0,0 +1,525 @@ +""" +PR3DICT: Dispute Prediction Strategy + +Exploits information asymmetry around Polymarket resolution: +1. Predict which markets will be disputed before other participants +2. Anticipate DVM voting outcomes via UMA token holder patterns +3. Trade resulting price dislocations during resolution uncertainty + +Key Insight: Most traders predict the underlying event. + We predict the resolution mechanism. +""" +from typing import List, Optional, Dict, Any +from decimal import Decimal +from datetime import datetime, timezone, timedelta +from dataclasses import dataclass +import logging + +from .base import TradingStrategy, Signal +from ..platforms.base import Market, Position, OrderSide + +logger = logging.getLogger(__name__) + + +@dataclass +class DisputeAnalysis: + """Analysis results for a single market's dispute potential.""" + market_id: str + + # Dispute probability components + ambiguity_score: float # 0-1, from LLM contract analysis + historical_dispute_rate: float # Category/creator history + market_dynamics_score: float # Volatility, concentration, etc. + sentiment_controversy: float # Social signal strength + + # Combined probability + dispute_probability: float # P(market will be disputed) + + # DVM outcome predictions (conditional on dispute) + dvm_yes_probability: float # P(YES | disputed) + dvm_no_probability: float # P(NO | disputed) + dvm_invalid_probability: float # P(INVALID | disputed) + + # Confidence and metadata + confidence: float # Model confidence in predictions + analysis_timestamp: datetime + reasoning: str # Human-readable explanation + + # Edge cases identified + edge_cases: List[str] + ambiguous_terms: List[str] + + +class DisputePredictionStrategy(TradingStrategy): + """ + Scans for markets likely to face resolution disputes and trades + the expected DVM outcome. + + THREE PROFIT WINDOWS: + + 1. PRE-DISPUTE DETECTION (highest edge) + - Identify dispute-prone markets before close + - Position for expected volatility or DVM outcome + + 2. POST-PROPOSAL, PRE-DISPUTE + - Detect high-probability disputes in challenge window + - Exit or hedge before liquidity dries up + + 3. ACTIVE DISPUTE (DVM VOTING) + - Predict UMA voter behavior + - Take position aligned with projected DVM outcome + """ + + def __init__( + self, + # Dispute detection thresholds + min_dispute_probability: float = 0.30, + min_dvm_confidence: float = 0.65, + + # Market filters + min_liquidity: Decimal = Decimal("5000"), + max_days_to_close: int = 14, + min_days_to_close: int = 0, # 0 = include closing today + + # Risk parameters + max_position_pct: float = 0.05, # Max 5% of bankroll per trade + kelly_fraction: float = 0.25, # Quarter Kelly for safety + + # Analysis components (injected dependencies) + contract_analyzer: Optional[Any] = None, + uma_analyzer: Optional[Any] = None, + sentiment_analyzer: Optional[Any] = None, + ): + self.min_dispute_probability = min_dispute_probability + self.min_dvm_confidence = min_dvm_confidence + self.min_liquidity = min_liquidity + self.max_days_to_close = max_days_to_close + self.min_days_to_close = min_days_to_close + self.max_position_pct = max_position_pct + self.kelly_fraction = kelly_fraction + + # Analysis components - will be implemented in Phase 2-3 + self._contract_analyzer = contract_analyzer + self._uma_analyzer = uma_analyzer + self._sentiment_analyzer = sentiment_analyzer + + # Cache for analyzed markets (avoid re-analyzing) + self._analysis_cache: Dict[str, DisputeAnalysis] = {} + + @property + def name(self) -> str: + return "dispute_prediction" + + async def scan_markets(self, markets: List[Market]) -> List[Signal]: + """ + Scan markets approaching resolution for dispute potential. + + Pipeline: + 1. Filter to markets in target window (approaching close) + 2. Run dispute probability model + 3. For high P(dispute) markets, predict DVM outcome + 4. Generate signals where we have edge vs current price + """ + signals = [] + now = datetime.now(timezone.utc) + + for market in markets: + # === FILTER STAGE === + if not self._is_candidate(market, now): + continue + + # === ANALYSIS STAGE === + analysis = await self._analyze_market(market) + + if analysis is None: + continue + + # === SIGNAL GENERATION === + signal = self._generate_signal(market, analysis) + + if signal is not None: + signals.append(signal) + logger.info( + f"DISPUTE SIGNAL: {market.ticker} | " + f"P(dispute)={analysis.dispute_probability:.2%} | " + f"P(YES|DVM)={analysis.dvm_yes_probability:.2%} | " + f"Current YES={market.yes_price}" + ) + + return signals + + def _is_candidate(self, market: Market, now: datetime) -> bool: + """Filter markets to those worth analyzing.""" + # Skip resolved markets + if market.resolved: + return False + + # Skip low liquidity + if market.liquidity < self.min_liquidity: + return False + + # Check time window + time_to_close = market.close_time - now + days_to_close = time_to_close.total_seconds() / 86400 + + if days_to_close < self.min_days_to_close: + return False + if days_to_close > self.max_days_to_close: + return False + + # Only Polymarket (UMA oracle) + if market.platform != "polymarket": + return False + + return True + + async def _analyze_market(self, market: Market) -> Optional[DisputeAnalysis]: + """ + Run full dispute analysis on a market. + + Combines: + - Contract text analysis (LLM) + - Historical patterns + - Market dynamics + - Sentiment signals + """ + # Check cache first + cache_key = f"{market.id}:{market.close_time.isoformat()}" + if cache_key in self._analysis_cache: + cached = self._analysis_cache[cache_key] + # Refresh if older than 1 hour + if (datetime.now(timezone.utc) - cached.analysis_timestamp).seconds < 3600: + return cached + + # === PLACEHOLDER: Implement in Phase 2-3 === + # + # This is where the real magic happens. For now, return None + # to skip all markets until analysis components are built. + # + # Real implementation will: + # 1. Call contract_analyzer.analyze(market.description, market.title) + # 2. Query uma_analyzer.get_historical_patterns(market.category) + # 3. Calculate market_dynamics from price/volume data + # 4. Get sentiment from sentiment_analyzer + # 5. Combine into DisputeAnalysis + + if self._contract_analyzer is None: + logger.debug(f"Skipping {market.ticker}: No analyzer configured") + return None + + # TODO: Implement actual analysis pipeline + # analysis = DisputeAnalysis( + # market_id=market.id, + # ambiguity_score=await self._contract_analyzer.analyze(market), + # ... + # ) + # self._analysis_cache[cache_key] = analysis + # return analysis + + return None + + def _generate_signal( + self, + market: Market, + analysis: DisputeAnalysis + ) -> Optional[Signal]: + """ + Generate trading signal based on dispute analysis. + + Logic: + 1. If P(dispute) < threshold, skip (no edge) + 2. If DVM outcome uncertain, skip (can't position) + 3. Compare predicted DVM outcome to current market price + 4. Signal if significant mispricing exists + """ + # Check dispute probability threshold + if analysis.dispute_probability < self.min_dispute_probability: + return None + + # Check DVM prediction confidence + dvm_probs = [ + analysis.dvm_yes_probability, + analysis.dvm_no_probability, + analysis.dvm_invalid_probability + ] + max_dvm_prob = max(dvm_probs) + + if max_dvm_prob < self.min_dvm_confidence: + # DVM outcome too uncertain + return None + + # Determine expected outcome + if analysis.dvm_yes_probability == max_dvm_prob: + expected_outcome = OrderSide.YES + expected_prob = analysis.dvm_yes_probability + current_price = market.yes_price + elif analysis.dvm_no_probability == max_dvm_prob: + expected_outcome = OrderSide.NO + expected_prob = analysis.dvm_no_probability + current_price = market.no_price + else: + # INVALID most likely - special case + # Both YES and NO should trade toward 0.50 + # Could short the overpriced side + logger.info( + f"INVALID likely for {market.ticker}: " + f"P(INVALID)={analysis.dvm_invalid_probability:.2%}" + ) + # For now, skip INVALID predictions - complex to trade + return None + + # Calculate edge: our expected value vs current price + # Account for dispute probability in expected value + # + # EV = P(no_dispute) * P(outcome|no_dispute) * payout + # + P(dispute) * P(outcome|DVM) * payout + # + # Simplified: assume P(outcome|no_dispute) ≈ current market price + p_no_dispute = 1 - analysis.dispute_probability + p_dispute = analysis.dispute_probability + + # Our edge comes from the dispute scenario + # If market prices dispute as less likely than we think, + # and we know the DVM outcome, we have edge + + # Calculate implied probability from current price + implied_prob = float(current_price) + + # Our probability estimate incorporates dispute scenario + our_prob = ( + p_no_dispute * implied_prob + # Normal resolution + p_dispute * expected_prob # Dispute → DVM outcome + ) + + # Edge = our prob - market implied prob + edge = our_prob - implied_prob + + # Only signal if edge exceeds threshold (e.g., 5%) + min_edge = 0.05 + if edge < min_edge: + return None + + # Calculate signal strength (0-1) + # Higher dispute probability + higher DVM confidence = stronger + strength = ( + analysis.dispute_probability * + analysis.confidence * + edge + ) + + return Signal( + market_id=market.id, + market=market, + side=expected_outcome, + strength=min(1.0, strength), + reason=( + f"Dispute prediction: P(dispute)={analysis.dispute_probability:.1%}, " + f"P({expected_outcome.value}|DVM)={expected_prob:.1%}, " + f"Edge={edge:.1%}. {analysis.reasoning}" + ), + target_price=current_price, + ) + + async def check_exit( + self, + position: Position, + market: Market + ) -> Optional[Signal]: + """ + Check exit conditions for dispute-based positions. + + Exit when: + 1. Market resolved (dispute didn't happen or DVM complete) + 2. Our dispute prediction confidence dropped significantly + 3. Dispute happened but DVM trending against us + 4. Position in profit and want to lock gains + """ + # Re-analyze the market + analysis = await self._analyze_market(market) + + if analysis is None: + # Can't analyze - hold for now + return None + + # If dispute probability dropped significantly, consider exit + if analysis.dispute_probability < self.min_dispute_probability * 0.5: + return Signal( + market_id=market.id, + market=market, + side=OrderSide.NO if position.side == OrderSide.YES else OrderSide.YES, + strength=0.5, + reason="Dispute probability dropped - thesis invalidated" + ) + + # If DVM outcome shifted against our position + if position.side == OrderSide.YES: + if analysis.dvm_no_probability > analysis.dvm_yes_probability + 0.2: + return Signal( + market_id=market.id, + market=market, + side=OrderSide.NO, + strength=0.7, + reason="DVM prediction shifted to NO - exiting YES position" + ) + + # Hold otherwise - let it play out + return None + + def get_position_size( + self, + signal: Signal, + account_balance: Decimal, + risk_pct: float = None + ) -> int: + """ + Calculate position size using Kelly Criterion with adjustments. + + Adjustments for dispute strategy: + - Apply confidence discount + - Apply P(INVALID) discount + - Cap at max_position_pct + - Use fractional Kelly for safety + """ + # Get analysis for this market + cache_key = f"{signal.market_id}:{signal.market.close_time.isoformat()}" + analysis = self._analysis_cache.get(cache_key) + + if analysis is None: + # Fall back to base implementation + return super().get_position_size(signal, account_balance) + + # Entry price + if signal.target_price: + entry_price = float(signal.target_price) + elif signal.side == OrderSide.YES: + entry_price = float(signal.market.yes_price) + else: + entry_price = float(signal.market.no_price) + + if entry_price <= 0 or entry_price >= 1: + return 1 # Minimum size + + # Implied odds: b = (1/price) - 1 + b = (1 / entry_price) - 1 + + # Our probability estimate + if signal.side == OrderSide.YES: + p = analysis.dvm_yes_probability + else: + p = analysis.dvm_no_probability + + q = 1 - p + + # Standard Kelly: f* = (bp - q) / b + if b <= 0: + return 1 + + kelly = (b * p - q) / b + + if kelly <= 0: + return 0 # No bet - negative edge + + # Apply adjustments + # 1. Confidence discount + kelly *= analysis.confidence + + # 2. P(INVALID) discount - could lose everything + kelly *= (1 - analysis.dvm_invalid_probability) + + # 3. Fractional Kelly for safety + kelly *= self.kelly_fraction + + # 4. Cap at max position + kelly = min(kelly, self.max_position_pct) + + # Calculate contract count + position_value = float(account_balance) * kelly + contracts = int(position_value / entry_price) + + return max(1, contracts) if kelly > 0 else 0 + + +# === STUB CLASSES FOR PHASE 2-3 IMPLEMENTATION === + +class ContractAnalyzer: + """ + LLM-based contract ambiguity analysis. + + Analyzes market question + description for: + - Ambiguous terms + - Edge cases not covered + - Resolution source reliability + - Subjective criteria + + TODO: Implement in Phase 2 + """ + + def __init__(self, llm_client=None): + self.llm_client = llm_client + + async def analyze(self, market: Market) -> Dict[str, Any]: + """ + Analyze contract for dispute potential. + + Returns dict with: + - ambiguity_score: 0-1 + - edge_cases: List[str] + - ambiguous_terms: List[str] + - resolution_source_risk: low/medium/high + - reasoning: str + """ + raise NotImplementedError("Implement in Phase 2") + + +class UMAVoterAnalyzer: + """ + Analyzes UMA token holder voting patterns. + + Tracks: + - Whale addresses and their historical votes + - Category-specific voting biases + - Vote momentum during active disputes + + TODO: Implement in Phase 3 + """ + + def __init__(self, subgraph_url: str = None): + self.subgraph_url = subgraph_url or "https://api.thegraph.com/subgraphs/name/umaprotocol/uma-voting" + + async def get_historical_dispute_rate(self, category: str) -> float: + """Get historical dispute rate for market category.""" + raise NotImplementedError("Implement in Phase 3") + + async def predict_dvm_outcome( + self, + market: Market, + dispute_context: Dict[str, Any] + ) -> Dict[str, float]: + """ + Predict DVM voting outcome. + + Returns: {"YES": p, "NO": p, "INVALID": p} + """ + raise NotImplementedError("Implement in Phase 3") + + +class SentimentAnalyzer: + """ + Monitors social channels for dispute signals. + + Sources: + - Polymarket Discord + - Twitter/X mentions + - UMA Discord + - Reddit r/Polymarket + + TODO: Implement in Phase 2-3 + """ + + async def get_controversy_score(self, market: Market) -> float: + """ + Calculate controversy score from social signals. + + Higher score = more disagreement about outcome + """ + raise NotImplementedError("Implement in Phase 2-3") From a35b758687977586d2e055cd9c92f08bc11f5b4e Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Sun, 25 Jan 2026 20:27:25 -0600 Subject: [PATCH 02/15] Add Polymarket market scanner for dispute prediction strategy Verified & working: - src/data/scanner.py: Fetches markets from Gamma API (tested) - src/data/database.py: SQLite storage for markets & analyses (tested) - AGENTS.md: AI assistant documentation Updated docs: - README.md: Accurate current state, multi-strategy structure - docs/DISPUTE_PREDICTION_STRATEGY.md: Marked data pipeline complete - .gitignore: Added database, credentials, local partner files - requirements.txt: Added aiosqlite Removed: - src/strategies/dispute.py: Unverified stub code (will rebuild when ready) Usage: python -m src.data.scanner --show-unanalyzed --- .gitignore | 16 + AGENTS.md | 86 +++++ README.md | 119 ++++--- data/.gitkeep | 0 docs/DISPUTE_PREDICTION_STRATEGY.md | 10 +- requirements.txt | 3 + src/data/__init__.py | 4 + src/data/database.py | 300 ++++++++++++++++ src/data/scanner.py | 252 +++++++++++++ src/strategies/__init__.py | 4 +- src/strategies/dispute.py | 525 ---------------------------- 11 files changed, 749 insertions(+), 570 deletions(-) create mode 100644 AGENTS.md create mode 100644 data/.gitkeep create mode 100644 src/data/database.py create mode 100644 src/data/scanner.py delete mode 100644 src/strategies/dispute.py diff --git a/.gitignore b/.gitignore index 607cb51..ce90d5c 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,19 @@ logs/ *.csv *.json !config/*.json + +# Database +*.db +*.sqlite +*.sqlite3 + +# Local partner-specific files (each partner creates their own) +# Create a file named .gitignore.local with your personal ignores +.gitignore.local +local/ + +# Personal notes/scratch (convention: prefix with your initials) +# e.g., nate_notes.md, alex_scratch.py +*_notes.md +*_scratch.* +*_local.* diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..d0c2ac8 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,86 @@ +# PR3DICT Agent Instructions + +This document helps AI coding assistants understand and work with this codebase. + +## Project Overview + +PR3DICT is a multi-strategy prediction market trading system. Current focus: **Dispute Prediction** on Polymarket. + +**Important Principles:** +1. This is a multi-strategy repo - only modify dispute strategy related code +2. Arbitrage strategy and trading engine are separate - don't touch them +3. Only build features that have been discussed and verified +4. All credentials must be gitignored + +## Architecture + +``` +src/ +├── data/ # Market data ingestion (DISPUTE FOCUS) +│ ├── scanner.py # Fetches markets from Polymarket Gamma API +│ └── database.py # SQLite storage for markets & analyses +├── strategies/ # Trading strategies (arbitrage is separate) +├── platforms/ # API wrappers (Polymarket, Kalshi) +├── engine/ # Core trading engine (not dispute-related) +└── risk/ # Position sizing (not dispute-related) +``` + +## What's Been Built & Verified (Dispute Strategy) + +### Market Scanner (`src/data/scanner.py`) +- Polls Polymarket Gamma API for markets in target liquidity range +- Stores markets in SQLite for tracking +- **Tested and working** - no auth needed for read-only access +- Run: `python -m src.data.scanner --show-unanalyzed` + +### Database (`src/data/database.py`) +- SQLite schema for `markets` and `analyses` tables +- Tracks which markets have been analyzed +- **Tested and working** + +### Strategy Documentation +- `docs/DISPUTE_PREDICTION_STRATEGY.md` - Strategy overview +- `docs/APPENDIX_KELLY_CRITERION.md` - Position sizing theory + +## What's NOT Built Yet (Dispute Strategy) +- LLM analysis pipeline (Tier 1 screening, Tier 2 deep analysis) +- Dispute probability scoring +- Trade execution for dispute strategy +- RAG/Vector database for learning + +## Setup & Commands + +```bash +# Create virtual environment +python3 -m venv venv +source venv/bin/activate + +# Install dependencies +pip install -r requirements.txt + +# Run market scanner +python -m src.data.scanner --show-unanalyzed + +# Query database +sqlite3 data/markets.db "SELECT question, liquidity FROM markets ORDER BY liquidity DESC LIMIT 10;" +``` + +## Polymarket API Notes + +- **Gamma API** (read-only, no auth): `https://gamma-api.polymarket.com` +- Requires `User-Agent` header or returns 403 +- Key fields: `question`, `description`, `resolutionSource`, `umaResolutionStatus`, `liquidityNum` + +## Credentials + +All credentials are gitignored. To set up: +```bash +cp config/example.env config/.env +# Edit with your credentials +``` + +## Development Guidelines + +- **Don't over-build** - only implement features that have been discussed +- **Test before committing** - verify new code works +- **Update this doc** - after adding verified features diff --git a/README.md b/README.md index e07d877..8be93e1 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,69 @@ # PR3DICT -**Automated Prediction Market Trading System** +**Multi-Strategy Prediction Market Trading System** --- -## Executive Summary +## Quick Start -PR3DICT applies the battle-tested ST0CK methodology to prediction markets. It leverages the unified trading engine architecture, systematic risk management, and multi-platform API integration to exploit inefficiencies in this rapidly growing $200B+ industry. +```bash +# 1. Clone the repo +git clone https://github.com/aerichmo/PR3DICT.git +cd PR3DICT -### Target Platforms -- **Kalshi** — CFTC-regulated, REST/WebSocket/FIX APIs, Market Maker Program -- **Polymarket** — Blockchain-native (Polygon/USDC), high liquidity on political/crypto events +# 2. Create virtual environment +python3 -m venv venv +source venv/bin/activate # Windows: venv\Scripts\activate -### Core Strategy Edges -1. **Arbitrage** — Binary complement, cross-platform, latency -2. **Market Making** — Bid-ask spread capture, inventory management -3. **Behavioral** — Longshot bias exploitation, overreaction reversion -4. **Informational** — AI-driven probability forecasting +# 3. Install dependencies +pip install -r requirements.txt -### Architecture (from ST0CK) -| Component | Application | -|-----------|-------------| -| Unified Engine | Strategy pattern for parallel signal testing | -| Redis Cache | Multi-TTL for orderbooks, probability trends, metadata | -| Risk Management | Kelly Criterion + Portfolio Heat + Daily Loss Limits | -| API Layer | Unified wrappers for cross-platform operations | +# 4. Configure credentials (for trading - not needed for scanning) +cp config/example.env config/.env +# Edit .env with your API keys +``` + +--- + +## Strategies + +| Strategy | Platform | Status | +|----------|----------|--------| +| **Arbitrage** | Kalshi, Polymarket | 🔵 Implemented | +| **Dispute Prediction** | Polymarket | 🔨 In Development | + +### Arbitrage Strategy +Exploits price inefficiencies: +- Binary complement (YES + NO < $1.00) +- Cross-platform differentials + +### Dispute Prediction Strategy (In Development) +Exploits Polymarket resolution mechanism: +- Identify markets likely to be disputed +- Position before resolution chaos +- See `docs/DISPUTE_PREDICTION_STRATEGY.md` + +--- + +## Dispute Strategy: Current Progress + +```bash +# Scan Polymarket for markets (no API key needed) +python -m src.data.scanner --show-unanalyzed + +# View stored markets +sqlite3 data/markets.db "SELECT question, liquidity FROM markets ORDER BY liquidity DESC LIMIT 10;" +``` + +**What's working:** +- [x] Market scanner (Polymarket Gamma API) +- [x] SQLite database for tracking markets +- [x] Strategy documentation + +**What's next:** +- [ ] LLM analysis pipeline +- [ ] Dispute probability scoring +- [ ] Trade execution --- @@ -33,39 +72,41 @@ PR3DICT applies the battle-tested ST0CK methodology to prediction markets. It le ``` PR3DICT/ ├── src/ +│ ├── data/ # Market scanner & database +│ ├── strategies/ # Trading strategies +│ ├── platforms/ # Kalshi, Polymarket APIs │ ├── engine/ # Core trading engine -│ ├── strategies/ # Arbitrage, market-making, behavioral -│ ├── platforms/ # Kalshi, Polymarket API wrappers -│ ├── data/ # Market data ingestion & caching -│ └── risk/ # Position sizing, kill-switches -├── config/ # Platform credentials, strategy params -├── tests/ # Unit + integration tests +│ └── risk/ # Position sizing +├── data/ # SQLite database (gitignored) +├── config/ # Environment config └── docs/ # Strategy documentation ``` --- -## Quick Start +## Documentation -```bash -# Clone and install -git clone -cd PR3DICT -pip install -r requirements.txt +| Document | Description | +|----------|-------------| +| `docs/DISPUTE_PREDICTION_STRATEGY.md` | Dispute strategy overview | +| `docs/APPENDIX_KELLY_CRITERION.md` | Position sizing theory | +| `AGENTS.md` | AI assistant instructions | -# Configure credentials -cp config/example.env config/.env -# Edit .env with Kalshi/Polymarket API keys +--- -# Run (paper mode) -python -m src.engine.main --mode paper -``` +## Collaboration ---- +Multi-contributor repo. Each partner can: +- Create `.gitignore.local` for personal ignores +- Use `local/` directory for scratch files +- Prefix personal files with initials (e.g., `nate_notes.md`) -## Status +### Security: Credentials Are Local Only -🚧 **Phase 1: Foundation** — Building core engine and platform integrations. +**Never commit credentials.** These are gitignored: +- `config/.env` — API keys, wallet private keys +- `*.env` — All environment files +- `data/*.db` — Local database --- diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/DISPUTE_PREDICTION_STRATEGY.md b/docs/DISPUTE_PREDICTION_STRATEGY.md index d32d7c1..d3af50d 100644 --- a/docs/DISPUTE_PREDICTION_STRATEGY.md +++ b/docs/DISPUTE_PREDICTION_STRATEGY.md @@ -235,13 +235,17 @@ See [Appendix: Kelly Criterion](./APPENDIX_KELLY_CRITERION.md) for full derivati ## Next Steps -1. **Build data pipeline** — Gamma API fetcher, store markets in Postgres -2. **Build Tier 1 screener** — Fast LLM pass to flag ambiguous contracts +1. ~~**Build data pipeline** — Gamma API fetcher, store markets~~ ✅ DONE + - `src/data/scanner.py` fetches from Gamma API + - `src/data/database.py` stores in SQLite + - Run: `python -m src.data.scanner --show-unanalyzed` + +2. **Build Tier 1 screener** — Fast LLM pass to flag ambiguous contracts ← NEXT 3. **Build Tier 2 analyzer** — Extended thinking analysis with RAG 4. **Backtest on historical disputes** — How well would we have predicted? 5. **Paper trade** — Run live without real capital --- -*Document Version: 0.3* +*Document Version: 0.4* *Last Updated: 2026-01-22* diff --git a/requirements.txt b/requirements.txt index e3d6f87..00cd46f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,9 @@ py-clob-client>=0.15.0 pandas>=2.0.0 numpy>=1.24.0 +# Database +aiosqlite>=0.19.0 + # Async asyncio>=3.4.3 diff --git a/src/data/__init__.py b/src/data/__init__.py index a3a3b1d..07c0c06 100644 --- a/src/data/__init__.py +++ b/src/data/__init__.py @@ -1 +1,5 @@ # Market Data Ingestion & Caching +from .database import MarketDatabase +from .scanner import MarketScanner, run_scanner + +__all__ = ["MarketDatabase", "MarketScanner", "run_scanner"] diff --git a/src/data/database.py b/src/data/database.py new file mode 100644 index 0000000..27c7580 --- /dev/null +++ b/src/data/database.py @@ -0,0 +1,300 @@ +""" +PR3DICT: SQLite Database for Market Tracking + +Stores Polymarket listings and their dispute analysis results. +""" +import aiosqlite +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional, List, Dict, Any +import logging + +logger = logging.getLogger(__name__) + +# Default database path (gitignored) +DEFAULT_DB_PATH = Path(__file__).parent.parent.parent / "data" / "markets.db" + + +class MarketDatabase: + """ + SQLite database for tracking Polymarket listings and dispute analyses. + + Tables: + - markets: Raw market data from Polymarket Gamma API + - analyses: LLM analysis results with dispute probability scores + """ + + def __init__(self, db_path: Optional[Path] = None): + self.db_path = db_path or DEFAULT_DB_PATH + self._connection: Optional[aiosqlite.Connection] = None + + async def connect(self) -> None: + """Initialize database connection and create tables.""" + # Ensure data directory exists + self.db_path.parent.mkdir(parents=True, exist_ok=True) + + self._connection = await aiosqlite.connect(self.db_path) + self._connection.row_factory = aiosqlite.Row + + await self._create_tables() + logger.info(f"Connected to database: {self.db_path}") + + async def close(self) -> None: + """Close database connection.""" + if self._connection: + await self._connection.close() + self._connection = None + + async def _create_tables(self) -> None: + """Create database schema.""" + await self._connection.executescript(""" + -- Raw market data from Polymarket + CREATE TABLE IF NOT EXISTS markets ( + id TEXT PRIMARY KEY, -- Polymarket condition_id + question TEXT NOT NULL, + description TEXT, + resolution_source TEXT, + slug TEXT, + + -- Pricing + yes_price REAL, + no_price REAL, + + -- Volume/Liquidity + volume REAL, + liquidity REAL, + + -- Dates + end_date TEXT, + created_at TEXT, + + -- UMA Resolution + uma_resolution_status TEXT, -- null, proposed, disputed, resolved + uma_bond TEXT, + uma_reward TEXT, + + -- Tracking + first_seen_at TEXT NOT NULL, + last_updated_at TEXT NOT NULL, + raw_json TEXT -- Full API response for reference + ); + + -- Analysis results from LLM pipeline + CREATE TABLE IF NOT EXISTS analyses ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + market_id TEXT NOT NULL, + + -- Dispute prediction + dispute_probability REAL, -- 0.0 to 1.0 + ambiguity_score REAL, -- 0.0 to 1.0 + + -- LLM outputs + ambiguous_terms TEXT, -- JSON array + edge_cases TEXT, -- JSON array + resolution_source_risk TEXT, -- low, medium, high + reasoning TEXT, + + -- Metadata + model_used TEXT, -- Which LLM analyzed this + analyzed_at TEXT NOT NULL, + + FOREIGN KEY (market_id) REFERENCES markets(id) + ); + + -- Indexes for common queries + CREATE INDEX IF NOT EXISTS idx_markets_liquidity ON markets(liquidity); + CREATE INDEX IF NOT EXISTS idx_markets_uma_status ON markets(uma_resolution_status); + CREATE INDEX IF NOT EXISTS idx_markets_end_date ON markets(end_date); + CREATE INDEX IF NOT EXISTS idx_analyses_market ON analyses(market_id); + CREATE INDEX IF NOT EXISTS idx_analyses_dispute_prob ON analyses(dispute_probability); + """) + await self._connection.commit() + + # --- Market Operations --- + + async def upsert_market(self, market: Dict[str, Any]) -> None: + """Insert or update a market from Polymarket API response.""" + now = datetime.now(timezone.utc).isoformat() + + await self._connection.execute(""" + INSERT INTO markets ( + id, question, description, resolution_source, slug, + yes_price, no_price, volume, liquidity, end_date, created_at, + uma_resolution_status, uma_bond, uma_reward, + first_seen_at, last_updated_at, raw_json + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(id) DO UPDATE SET + question = excluded.question, + description = excluded.description, + resolution_source = excluded.resolution_source, + yes_price = excluded.yes_price, + no_price = excluded.no_price, + volume = excluded.volume, + liquidity = excluded.liquidity, + end_date = excluded.end_date, + uma_resolution_status = excluded.uma_resolution_status, + uma_bond = excluded.uma_bond, + uma_reward = excluded.uma_reward, + last_updated_at = excluded.last_updated_at, + raw_json = excluded.raw_json + """, ( + market.get("conditionId") or market.get("id"), + market.get("question"), + market.get("description"), + market.get("resolutionSource"), + market.get("slug"), + self._parse_price(market.get("outcomePrices"), 0), + self._parse_price(market.get("outcomePrices"), 1), + float(market.get("volumeNum") or market.get("volume") or 0), + float(market.get("liquidityNum") or market.get("liquidity") or 0), + market.get("endDate"), + market.get("createdAt"), + market.get("umaResolutionStatus"), + market.get("umaBond"), + market.get("umaReward"), + now, # first_seen_at (ignored on conflict) + now, # last_updated_at + json.dumps(market) + )) + await self._connection.commit() + + def _parse_price(self, outcome_prices: Any, index: int) -> Optional[float]: + """Parse outcome prices from API response.""" + if not outcome_prices: + return None + try: + if isinstance(outcome_prices, str): + prices = json.loads(outcome_prices) + else: + prices = outcome_prices + return float(prices[index]) if len(prices) > index else None + except (json.JSONDecodeError, IndexError, TypeError): + return None + + async def get_market(self, market_id: str) -> Optional[Dict[str, Any]]: + """Get a single market by ID.""" + async with self._connection.execute( + "SELECT * FROM markets WHERE id = ?", (market_id,) + ) as cursor: + row = await cursor.fetchone() + return dict(row) if row else None + + async def get_unanalyzed_markets(self, limit: int = 100) -> List[Dict[str, Any]]: + """Get markets that haven't been analyzed yet.""" + async with self._connection.execute(""" + SELECT m.* FROM markets m + LEFT JOIN analyses a ON m.id = a.market_id + WHERE a.id IS NULL + ORDER BY m.liquidity DESC + LIMIT ? + """, (limit,)) as cursor: + rows = await cursor.fetchall() + return [dict(row) for row in rows] + + async def get_markets_by_liquidity( + self, + min_liquidity: float = 0, + max_liquidity: float = float('inf'), + limit: int = 100 + ) -> List[Dict[str, Any]]: + """Get markets within a liquidity range.""" + async with self._connection.execute(""" + SELECT * FROM markets + WHERE liquidity >= ? AND liquidity <= ? + ORDER BY liquidity DESC + LIMIT ? + """, (min_liquidity, max_liquidity, limit)) as cursor: + rows = await cursor.fetchall() + return [dict(row) for row in rows] + + # --- Analysis Operations --- + + async def save_analysis( + self, + market_id: str, + dispute_probability: float, + ambiguity_score: float, + ambiguous_terms: List[str], + edge_cases: List[str], + resolution_source_risk: str, + reasoning: str, + model_used: str + ) -> int: + """Save LLM analysis results for a market.""" + now = datetime.now(timezone.utc).isoformat() + + cursor = await self._connection.execute(""" + INSERT INTO analyses ( + market_id, dispute_probability, ambiguity_score, + ambiguous_terms, edge_cases, resolution_source_risk, + reasoning, model_used, analyzed_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + market_id, + dispute_probability, + ambiguity_score, + json.dumps(ambiguous_terms), + json.dumps(edge_cases), + resolution_source_risk, + reasoning, + model_used, + now + )) + await self._connection.commit() + return cursor.lastrowid + + async def get_analysis(self, market_id: str) -> Optional[Dict[str, Any]]: + """Get the most recent analysis for a market.""" + async with self._connection.execute(""" + SELECT * FROM analyses + WHERE market_id = ? + ORDER BY analyzed_at DESC + LIMIT 1 + """, (market_id,)) as cursor: + row = await cursor.fetchone() + if row: + result = dict(row) + result["ambiguous_terms"] = json.loads(result.get("ambiguous_terms") or "[]") + result["edge_cases"] = json.loads(result.get("edge_cases") or "[]") + return result + return None + + async def get_high_dispute_markets( + self, + min_probability: float = 0.5, + limit: int = 50 + ) -> List[Dict[str, Any]]: + """Get markets with high dispute probability.""" + async with self._connection.execute(""" + SELECT m.*, a.dispute_probability, a.ambiguity_score, + a.reasoning, a.analyzed_at + FROM markets m + JOIN analyses a ON m.id = a.market_id + WHERE a.dispute_probability >= ? + ORDER BY a.dispute_probability DESC + LIMIT ? + """, (min_probability, limit)) as cursor: + rows = await cursor.fetchall() + return [dict(row) for row in rows] + + # --- Stats --- + + async def get_stats(self) -> Dict[str, Any]: + """Get database statistics.""" + stats = {} + + async with self._connection.execute("SELECT COUNT(*) FROM markets") as cursor: + stats["total_markets"] = (await cursor.fetchone())[0] + + async with self._connection.execute("SELECT COUNT(*) FROM analyses") as cursor: + stats["total_analyses"] = (await cursor.fetchone())[0] + + async with self._connection.execute(""" + SELECT COUNT(*) FROM markets m + LEFT JOIN analyses a ON m.id = a.market_id + WHERE a.id IS NULL + """) as cursor: + stats["unanalyzed_markets"] = (await cursor.fetchone())[0] + + return stats diff --git a/src/data/scanner.py b/src/data/scanner.py new file mode 100644 index 0000000..ae25528 --- /dev/null +++ b/src/data/scanner.py @@ -0,0 +1,252 @@ +""" +PR3DICT: Polymarket Scanner + +Fetches markets from Polymarket Gamma API and stores them in SQLite. +Designed to run periodically (every 6 hours) to discover new markets. +""" +import asyncio +import httpx +import logging +from datetime import datetime, timezone +from typing import Optional, List, Dict, Any + +from .database import MarketDatabase + +logger = logging.getLogger(__name__) + +GAMMA_API_URL = "https://gamma-api.polymarket.com" + + +class MarketScanner: + """ + Scans Polymarket for markets in target liquidity range. + + No authentication required for read-only access to Gamma API. + """ + + def __init__( + self, + db: MarketDatabase, + min_liquidity: float = 5000, # $5k minimum + max_liquidity: float = 500000, # $500k maximum + batch_size: int = 100 + ): + self.db = db + self.min_liquidity = min_liquidity + self.max_liquidity = max_liquidity + self.batch_size = batch_size + self._client: Optional[httpx.AsyncClient] = None + + async def __aenter__(self): + """Async context manager entry.""" + self._client = httpx.AsyncClient( + timeout=30.0, + headers={ + "User-Agent": "PR3DICT/1.0", + "Accept": "application/json" + } + ) + await self.db.connect() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """Async context manager exit.""" + if self._client: + await self._client.aclose() + await self.db.close() + + async def fetch_markets( + self, + closed: bool = False, + offset: int = 0 + ) -> List[Dict[str, Any]]: + """ + Fetch markets from Gamma API with liquidity filters. + + No authentication needed - this is public read-only data. + """ + params = { + "liquidity_num_min": self.min_liquidity, + "liquidity_num_max": self.max_liquidity, + "closed": str(closed).lower(), + "limit": self.batch_size, + "offset": offset, + "order": "liquidityNum", + "ascending": "false" # Highest liquidity first + } + + try: + response = await self._client.get( + f"{GAMMA_API_URL}/markets", + params=params + ) + response.raise_for_status() + markets = response.json() + logger.info(f"Fetched {len(markets)} markets (offset={offset})") + return markets + + except httpx.HTTPError as e: + logger.error(f"Failed to fetch markets: {e}") + return [] + + async def fetch_all_markets(self, closed: bool = False) -> List[Dict[str, Any]]: + """Fetch all markets with pagination.""" + all_markets = [] + offset = 0 + + while True: + batch = await self.fetch_markets(closed=closed, offset=offset) + if not batch: + break + + all_markets.extend(batch) + + # Check if we got fewer than batch_size (last page) + if len(batch) < self.batch_size: + break + + offset += self.batch_size + + # Rate limiting - be nice to the API + await asyncio.sleep(0.5) + + return all_markets + + async def scan_and_store(self) -> Dict[str, Any]: + """ + Main scanning routine: fetch markets and store in database. + + Returns statistics about the scan. + """ + start_time = datetime.now(timezone.utc) + logger.info(f"Starting market scan at {start_time.isoformat()}") + logger.info(f"Liquidity range: ${self.min_liquidity:,.0f} - ${self.max_liquidity:,.0f}") + + # Fetch active markets + markets = await self.fetch_all_markets(closed=False) + + # Store each market + new_count = 0 + updated_count = 0 + + for market in markets: + market_id = market.get("conditionId") or market.get("id") + + # Check if this is a new market + existing = await self.db.get_market(market_id) + if existing is None: + new_count += 1 + else: + updated_count += 1 + + await self.db.upsert_market(market) + + # Get final stats + stats = await self.db.get_stats() + + result = { + "scan_time": start_time.isoformat(), + "markets_fetched": len(markets), + "new_markets": new_count, + "updated_markets": updated_count, + "total_in_db": stats["total_markets"], + "unanalyzed": stats["unanalyzed_markets"] + } + + logger.info(f"Scan complete: {result}") + return result + + async def get_markets_for_analysis(self, limit: int = 10) -> List[Dict[str, Any]]: + """ + Get unanalyzed markets ready for LLM analysis. + + Returns markets with all relevant fields for dispute prediction. + """ + markets = await self.db.get_unanalyzed_markets(limit=limit) + + # Format for analysis + formatted = [] + for m in markets: + formatted.append({ + "id": m["id"], + "question": m["question"], + "description": m["description"], + "resolution_source": m["resolution_source"], + "liquidity": m["liquidity"], + "volume": m["volume"], + "end_date": m["end_date"], + "uma_status": m["uma_resolution_status"], + "slug": m["slug"] # For building Polymarket URL + }) + + return formatted + + +async def run_scanner( + min_liquidity: float = 5000, + max_liquidity: float = 500000 +) -> Dict[str, Any]: + """ + Convenience function to run a single scan. + + Usage: + python -c "import asyncio; from src.data.scanner import run_scanner; print(asyncio.run(run_scanner()))" + """ + db = MarketDatabase() + scanner = MarketScanner( + db=db, + min_liquidity=min_liquidity, + max_liquidity=max_liquidity + ) + + async with scanner: + return await scanner.scan_and_store() + + +# CLI entry point +if __name__ == "__main__": + import argparse + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + + parser = argparse.ArgumentParser(description="Scan Polymarket for markets") + parser.add_argument("--min-liquidity", type=float, default=5000, + help="Minimum liquidity in USD (default: 5000)") + parser.add_argument("--max-liquidity", type=float, default=500000, + help="Maximum liquidity in USD (default: 500000)") + parser.add_argument("--show-unanalyzed", action="store_true", + help="Show unanalyzed markets after scan") + + args = parser.parse_args() + + async def main(): + result = await run_scanner( + min_liquidity=args.min_liquidity, + max_liquidity=args.max_liquidity + ) + + print("\n=== Scan Results ===") + print(f"Markets fetched: {result['markets_fetched']}") + print(f"New markets: {result['new_markets']}") + print(f"Updated markets: {result['updated_markets']}") + print(f"Total in database: {result['total_in_db']}") + print(f"Awaiting analysis: {result['unanalyzed']}") + + if args.show_unanalyzed: + db = MarketDatabase() + await db.connect() + + unanalyzed = await db.get_unanalyzed_markets(limit=10) + if unanalyzed: + print("\n=== Top Unanalyzed Markets ===") + for m in unanalyzed: + print(f"\n[{m['id'][:8]}...] ${m['liquidity']:,.0f} liquidity") + print(f" Q: {m['question'][:80]}...") + print(f" URL: https://polymarket.com/event/{m['slug']}") + + await db.close() + + asyncio.run(main()) diff --git a/src/strategies/__init__.py b/src/strategies/__init__.py index 926d6c7..d7bb303 100644 --- a/src/strategies/__init__.py +++ b/src/strategies/__init__.py @@ -1,10 +1,8 @@ # Trading Strategies from .base import TradingStrategy, Signal from .arbitrage import ArbitrageStrategy, CrossPlatformArbitrage -from .dispute import DisputePredictionStrategy, DisputeAnalysis __all__ = [ "TradingStrategy", "Signal", - "ArbitrageStrategy", "CrossPlatformArbitrage", - "DisputePredictionStrategy", "DisputeAnalysis" + "ArbitrageStrategy", "CrossPlatformArbitrage" ] diff --git a/src/strategies/dispute.py b/src/strategies/dispute.py deleted file mode 100644 index 6f571d6..0000000 --- a/src/strategies/dispute.py +++ /dev/null @@ -1,525 +0,0 @@ -""" -PR3DICT: Dispute Prediction Strategy - -Exploits information asymmetry around Polymarket resolution: -1. Predict which markets will be disputed before other participants -2. Anticipate DVM voting outcomes via UMA token holder patterns -3. Trade resulting price dislocations during resolution uncertainty - -Key Insight: Most traders predict the underlying event. - We predict the resolution mechanism. -""" -from typing import List, Optional, Dict, Any -from decimal import Decimal -from datetime import datetime, timezone, timedelta -from dataclasses import dataclass -import logging - -from .base import TradingStrategy, Signal -from ..platforms.base import Market, Position, OrderSide - -logger = logging.getLogger(__name__) - - -@dataclass -class DisputeAnalysis: - """Analysis results for a single market's dispute potential.""" - market_id: str - - # Dispute probability components - ambiguity_score: float # 0-1, from LLM contract analysis - historical_dispute_rate: float # Category/creator history - market_dynamics_score: float # Volatility, concentration, etc. - sentiment_controversy: float # Social signal strength - - # Combined probability - dispute_probability: float # P(market will be disputed) - - # DVM outcome predictions (conditional on dispute) - dvm_yes_probability: float # P(YES | disputed) - dvm_no_probability: float # P(NO | disputed) - dvm_invalid_probability: float # P(INVALID | disputed) - - # Confidence and metadata - confidence: float # Model confidence in predictions - analysis_timestamp: datetime - reasoning: str # Human-readable explanation - - # Edge cases identified - edge_cases: List[str] - ambiguous_terms: List[str] - - -class DisputePredictionStrategy(TradingStrategy): - """ - Scans for markets likely to face resolution disputes and trades - the expected DVM outcome. - - THREE PROFIT WINDOWS: - - 1. PRE-DISPUTE DETECTION (highest edge) - - Identify dispute-prone markets before close - - Position for expected volatility or DVM outcome - - 2. POST-PROPOSAL, PRE-DISPUTE - - Detect high-probability disputes in challenge window - - Exit or hedge before liquidity dries up - - 3. ACTIVE DISPUTE (DVM VOTING) - - Predict UMA voter behavior - - Take position aligned with projected DVM outcome - """ - - def __init__( - self, - # Dispute detection thresholds - min_dispute_probability: float = 0.30, - min_dvm_confidence: float = 0.65, - - # Market filters - min_liquidity: Decimal = Decimal("5000"), - max_days_to_close: int = 14, - min_days_to_close: int = 0, # 0 = include closing today - - # Risk parameters - max_position_pct: float = 0.05, # Max 5% of bankroll per trade - kelly_fraction: float = 0.25, # Quarter Kelly for safety - - # Analysis components (injected dependencies) - contract_analyzer: Optional[Any] = None, - uma_analyzer: Optional[Any] = None, - sentiment_analyzer: Optional[Any] = None, - ): - self.min_dispute_probability = min_dispute_probability - self.min_dvm_confidence = min_dvm_confidence - self.min_liquidity = min_liquidity - self.max_days_to_close = max_days_to_close - self.min_days_to_close = min_days_to_close - self.max_position_pct = max_position_pct - self.kelly_fraction = kelly_fraction - - # Analysis components - will be implemented in Phase 2-3 - self._contract_analyzer = contract_analyzer - self._uma_analyzer = uma_analyzer - self._sentiment_analyzer = sentiment_analyzer - - # Cache for analyzed markets (avoid re-analyzing) - self._analysis_cache: Dict[str, DisputeAnalysis] = {} - - @property - def name(self) -> str: - return "dispute_prediction" - - async def scan_markets(self, markets: List[Market]) -> List[Signal]: - """ - Scan markets approaching resolution for dispute potential. - - Pipeline: - 1. Filter to markets in target window (approaching close) - 2. Run dispute probability model - 3. For high P(dispute) markets, predict DVM outcome - 4. Generate signals where we have edge vs current price - """ - signals = [] - now = datetime.now(timezone.utc) - - for market in markets: - # === FILTER STAGE === - if not self._is_candidate(market, now): - continue - - # === ANALYSIS STAGE === - analysis = await self._analyze_market(market) - - if analysis is None: - continue - - # === SIGNAL GENERATION === - signal = self._generate_signal(market, analysis) - - if signal is not None: - signals.append(signal) - logger.info( - f"DISPUTE SIGNAL: {market.ticker} | " - f"P(dispute)={analysis.dispute_probability:.2%} | " - f"P(YES|DVM)={analysis.dvm_yes_probability:.2%} | " - f"Current YES={market.yes_price}" - ) - - return signals - - def _is_candidate(self, market: Market, now: datetime) -> bool: - """Filter markets to those worth analyzing.""" - # Skip resolved markets - if market.resolved: - return False - - # Skip low liquidity - if market.liquidity < self.min_liquidity: - return False - - # Check time window - time_to_close = market.close_time - now - days_to_close = time_to_close.total_seconds() / 86400 - - if days_to_close < self.min_days_to_close: - return False - if days_to_close > self.max_days_to_close: - return False - - # Only Polymarket (UMA oracle) - if market.platform != "polymarket": - return False - - return True - - async def _analyze_market(self, market: Market) -> Optional[DisputeAnalysis]: - """ - Run full dispute analysis on a market. - - Combines: - - Contract text analysis (LLM) - - Historical patterns - - Market dynamics - - Sentiment signals - """ - # Check cache first - cache_key = f"{market.id}:{market.close_time.isoformat()}" - if cache_key in self._analysis_cache: - cached = self._analysis_cache[cache_key] - # Refresh if older than 1 hour - if (datetime.now(timezone.utc) - cached.analysis_timestamp).seconds < 3600: - return cached - - # === PLACEHOLDER: Implement in Phase 2-3 === - # - # This is where the real magic happens. For now, return None - # to skip all markets until analysis components are built. - # - # Real implementation will: - # 1. Call contract_analyzer.analyze(market.description, market.title) - # 2. Query uma_analyzer.get_historical_patterns(market.category) - # 3. Calculate market_dynamics from price/volume data - # 4. Get sentiment from sentiment_analyzer - # 5. Combine into DisputeAnalysis - - if self._contract_analyzer is None: - logger.debug(f"Skipping {market.ticker}: No analyzer configured") - return None - - # TODO: Implement actual analysis pipeline - # analysis = DisputeAnalysis( - # market_id=market.id, - # ambiguity_score=await self._contract_analyzer.analyze(market), - # ... - # ) - # self._analysis_cache[cache_key] = analysis - # return analysis - - return None - - def _generate_signal( - self, - market: Market, - analysis: DisputeAnalysis - ) -> Optional[Signal]: - """ - Generate trading signal based on dispute analysis. - - Logic: - 1. If P(dispute) < threshold, skip (no edge) - 2. If DVM outcome uncertain, skip (can't position) - 3. Compare predicted DVM outcome to current market price - 4. Signal if significant mispricing exists - """ - # Check dispute probability threshold - if analysis.dispute_probability < self.min_dispute_probability: - return None - - # Check DVM prediction confidence - dvm_probs = [ - analysis.dvm_yes_probability, - analysis.dvm_no_probability, - analysis.dvm_invalid_probability - ] - max_dvm_prob = max(dvm_probs) - - if max_dvm_prob < self.min_dvm_confidence: - # DVM outcome too uncertain - return None - - # Determine expected outcome - if analysis.dvm_yes_probability == max_dvm_prob: - expected_outcome = OrderSide.YES - expected_prob = analysis.dvm_yes_probability - current_price = market.yes_price - elif analysis.dvm_no_probability == max_dvm_prob: - expected_outcome = OrderSide.NO - expected_prob = analysis.dvm_no_probability - current_price = market.no_price - else: - # INVALID most likely - special case - # Both YES and NO should trade toward 0.50 - # Could short the overpriced side - logger.info( - f"INVALID likely for {market.ticker}: " - f"P(INVALID)={analysis.dvm_invalid_probability:.2%}" - ) - # For now, skip INVALID predictions - complex to trade - return None - - # Calculate edge: our expected value vs current price - # Account for dispute probability in expected value - # - # EV = P(no_dispute) * P(outcome|no_dispute) * payout - # + P(dispute) * P(outcome|DVM) * payout - # - # Simplified: assume P(outcome|no_dispute) ≈ current market price - p_no_dispute = 1 - analysis.dispute_probability - p_dispute = analysis.dispute_probability - - # Our edge comes from the dispute scenario - # If market prices dispute as less likely than we think, - # and we know the DVM outcome, we have edge - - # Calculate implied probability from current price - implied_prob = float(current_price) - - # Our probability estimate incorporates dispute scenario - our_prob = ( - p_no_dispute * implied_prob + # Normal resolution - p_dispute * expected_prob # Dispute → DVM outcome - ) - - # Edge = our prob - market implied prob - edge = our_prob - implied_prob - - # Only signal if edge exceeds threshold (e.g., 5%) - min_edge = 0.05 - if edge < min_edge: - return None - - # Calculate signal strength (0-1) - # Higher dispute probability + higher DVM confidence = stronger - strength = ( - analysis.dispute_probability * - analysis.confidence * - edge - ) - - return Signal( - market_id=market.id, - market=market, - side=expected_outcome, - strength=min(1.0, strength), - reason=( - f"Dispute prediction: P(dispute)={analysis.dispute_probability:.1%}, " - f"P({expected_outcome.value}|DVM)={expected_prob:.1%}, " - f"Edge={edge:.1%}. {analysis.reasoning}" - ), - target_price=current_price, - ) - - async def check_exit( - self, - position: Position, - market: Market - ) -> Optional[Signal]: - """ - Check exit conditions for dispute-based positions. - - Exit when: - 1. Market resolved (dispute didn't happen or DVM complete) - 2. Our dispute prediction confidence dropped significantly - 3. Dispute happened but DVM trending against us - 4. Position in profit and want to lock gains - """ - # Re-analyze the market - analysis = await self._analyze_market(market) - - if analysis is None: - # Can't analyze - hold for now - return None - - # If dispute probability dropped significantly, consider exit - if analysis.dispute_probability < self.min_dispute_probability * 0.5: - return Signal( - market_id=market.id, - market=market, - side=OrderSide.NO if position.side == OrderSide.YES else OrderSide.YES, - strength=0.5, - reason="Dispute probability dropped - thesis invalidated" - ) - - # If DVM outcome shifted against our position - if position.side == OrderSide.YES: - if analysis.dvm_no_probability > analysis.dvm_yes_probability + 0.2: - return Signal( - market_id=market.id, - market=market, - side=OrderSide.NO, - strength=0.7, - reason="DVM prediction shifted to NO - exiting YES position" - ) - - # Hold otherwise - let it play out - return None - - def get_position_size( - self, - signal: Signal, - account_balance: Decimal, - risk_pct: float = None - ) -> int: - """ - Calculate position size using Kelly Criterion with adjustments. - - Adjustments for dispute strategy: - - Apply confidence discount - - Apply P(INVALID) discount - - Cap at max_position_pct - - Use fractional Kelly for safety - """ - # Get analysis for this market - cache_key = f"{signal.market_id}:{signal.market.close_time.isoformat()}" - analysis = self._analysis_cache.get(cache_key) - - if analysis is None: - # Fall back to base implementation - return super().get_position_size(signal, account_balance) - - # Entry price - if signal.target_price: - entry_price = float(signal.target_price) - elif signal.side == OrderSide.YES: - entry_price = float(signal.market.yes_price) - else: - entry_price = float(signal.market.no_price) - - if entry_price <= 0 or entry_price >= 1: - return 1 # Minimum size - - # Implied odds: b = (1/price) - 1 - b = (1 / entry_price) - 1 - - # Our probability estimate - if signal.side == OrderSide.YES: - p = analysis.dvm_yes_probability - else: - p = analysis.dvm_no_probability - - q = 1 - p - - # Standard Kelly: f* = (bp - q) / b - if b <= 0: - return 1 - - kelly = (b * p - q) / b - - if kelly <= 0: - return 0 # No bet - negative edge - - # Apply adjustments - # 1. Confidence discount - kelly *= analysis.confidence - - # 2. P(INVALID) discount - could lose everything - kelly *= (1 - analysis.dvm_invalid_probability) - - # 3. Fractional Kelly for safety - kelly *= self.kelly_fraction - - # 4. Cap at max position - kelly = min(kelly, self.max_position_pct) - - # Calculate contract count - position_value = float(account_balance) * kelly - contracts = int(position_value / entry_price) - - return max(1, contracts) if kelly > 0 else 0 - - -# === STUB CLASSES FOR PHASE 2-3 IMPLEMENTATION === - -class ContractAnalyzer: - """ - LLM-based contract ambiguity analysis. - - Analyzes market question + description for: - - Ambiguous terms - - Edge cases not covered - - Resolution source reliability - - Subjective criteria - - TODO: Implement in Phase 2 - """ - - def __init__(self, llm_client=None): - self.llm_client = llm_client - - async def analyze(self, market: Market) -> Dict[str, Any]: - """ - Analyze contract for dispute potential. - - Returns dict with: - - ambiguity_score: 0-1 - - edge_cases: List[str] - - ambiguous_terms: List[str] - - resolution_source_risk: low/medium/high - - reasoning: str - """ - raise NotImplementedError("Implement in Phase 2") - - -class UMAVoterAnalyzer: - """ - Analyzes UMA token holder voting patterns. - - Tracks: - - Whale addresses and their historical votes - - Category-specific voting biases - - Vote momentum during active disputes - - TODO: Implement in Phase 3 - """ - - def __init__(self, subgraph_url: str = None): - self.subgraph_url = subgraph_url or "https://api.thegraph.com/subgraphs/name/umaprotocol/uma-voting" - - async def get_historical_dispute_rate(self, category: str) -> float: - """Get historical dispute rate for market category.""" - raise NotImplementedError("Implement in Phase 3") - - async def predict_dvm_outcome( - self, - market: Market, - dispute_context: Dict[str, Any] - ) -> Dict[str, float]: - """ - Predict DVM voting outcome. - - Returns: {"YES": p, "NO": p, "INVALID": p} - """ - raise NotImplementedError("Implement in Phase 3") - - -class SentimentAnalyzer: - """ - Monitors social channels for dispute signals. - - Sources: - - Polymarket Discord - - Twitter/X mentions - - UMA Discord - - Reddit r/Polymarket - - TODO: Implement in Phase 2-3 - """ - - async def get_controversy_score(self, market: Market) -> float: - """ - Calculate controversy score from social signals. - - Higher score = more disagreement about outcome - """ - raise NotImplementedError("Implement in Phase 2-3") From bc57217afd5c476df7e7ae9ba9b9afc4604e6ff0 Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Sun, 25 Jan 2026 20:49:33 -0600 Subject: [PATCH 03/15] Add git workflow documentation to AGENTS.md Documents the required process for all features: 1. Create branch on fork 2. Push and create PR to fork's main 3. User reviews and approves manually 4. Then PR from fork main to upstream This ensures proper review before changes reach the shared repo. --- AGENTS.md | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index d0c2ac8..25c1bcd 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -2,6 +2,44 @@ This document helps AI coding assistants understand and work with this codebase. +--- + +## Git Workflow (IMPORTANT - Follow This Process) + +**Repository Structure:** +- **Your fork:** `https://github.com/Nate0-1999/pr3dict` (where you push) +- **Upstream:** `https://github.com/aerichmo/PR3DICT` (partner's repo, PRs go here) + +**For Every Feature:** + +``` +1. CREATE BRANCH on your fork + git checkout main + git pull origin main + git checkout -b feature-name + +2. WORK ON FEATURE + - Make changes + - Test thoroughly + - Commit with clear message + +3. PUSH BRANCH to your fork + git push -u origin feature-name + +4. CREATE PR to your fork's main + → PR: Nate0-1999/pr3dict feature-name → Nate0-1999/pr3dict main + → User manually reviews and approves in GitHub + → WAIT for approval before proceeding + +5. AFTER APPROVAL, PR to upstream + → PR: Nate0-1999/pr3dict main → aerichmo/PR3DICT main + → Partner reviews and merges +``` + +**Never push directly to main. Always use branches and PRs.** + +--- + ## Project Overview PR3DICT is a multi-strategy prediction market trading system. Current focus: **Dispute Prediction** on Polymarket. From d971b7b17e121bbfc593fc9e336b43b15bd83e2e Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Wed, 4 Feb 2026 01:47:16 -0600 Subject: [PATCH 04/15] Add resolution strategy data contracts and schema tests --- docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md | 178 ++++++++++++ docs/workstreams/WT-A_DATA_CONTRACTS.md | 17 ++ src/data/database.py | 303 +++++++++++++++++++++ test_resolution_schema.py | 90 ++++++ 4 files changed, 588 insertions(+) create mode 100644 docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md create mode 100644 docs/workstreams/WT-A_DATA_CONTRACTS.md create mode 100644 test_resolution_schema.py diff --git a/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md b/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md new file mode 100644 index 0000000..ccc240f --- /dev/null +++ b/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md @@ -0,0 +1,178 @@ +# Resolution Advantage Worktree Plan + +Execution-ready implementation plan for the dispute strategy using parallel git worktrees. + +## Scope + +This plan applies only to Polymarket dispute/resolution advantage components. + +Out of scope: +- Arbitrage, market making, or other strategy rewrites +- Production real-money auto-execution + +## Workstream Layout + +## WT-A: Data Contracts + Schema + +Branch: `codex/resolution-data-contracts` + +Owns: +- `src/data/database.py` +- dispute schema migration helpers + +Delivers: +- New tables: + - `analysis_runs` + - `analysis_outputs_t1` + - `analysis_outputs_t2` + - `signals` + - `market_outcomes` + - `calibration_metrics` +- Backward-compatible reads for existing `markets`/`analyses` +- DB-level constraints for probability bounds where practical + +Definition of done: +- Can replay one signal deterministically from stored artifacts +- Migration path works on fresh DB and existing DB +- Unit tests for insert/retrieve/validation pass + +## WT-B: Tier 1 Screen + +Branch: `codex/resolution-tier1` + +Owns: +- `src/strategies/dispute/tier1.py` +- Tier 1 prompt contract + schema validation + +Consumes: +- WT-A tables (`analysis_runs`, `analysis_outputs_t1`) + +Delivers: +- `screen_decision` (`PASS|FLAG`) +- `ambiguity_score`, `dispute_prob_prior`, `top_risks[]` +- strict JSON validation + rejection logging + +Definition of done: +- Invalid model output never crashes pipeline +- Deterministic `prompt_version` and `run_id` persisted +- Tests cover PASS, FLAG, malformed output, retry fallback + +## WT-C: Tier 2 Deep Analysis + +Branch: `codex/resolution-tier2` + +Owns: +- `src/strategies/dispute/tier2.py` +- Decision-path + probability normalization rules + +Consumes: +- WT-A tables +- WT-B `FLAG` outputs + +Delivers: +- `p_dispute` +- `p_yes_final`, `p_no_final`, `p_invalid_final` +- `confidence`, `decision_path`, `no_trade_reason`, `assumptions[]` +- hard checks for probability bounds and sum tolerance + +Definition of done: +- Sum tolerance enforced (example: `abs(sum-1.0) <= 0.01`) +- `no_trade_reason` always populated for no-trade paths +- Contract tests validate all decision paths + +## WT-D: Signal + Sizing + +Branch: `codex/resolution-signals` + +Owns: +- `src/strategies/dispute/signal_engine.py` +- dispute-specific sizing module (no generic risk rewrite) + +Consumes: +- WT-C normalized probabilities +- market snapshot prices/liquidity/spread + +Delivers: +- Actions: `ENTER_YES`, `ENTER_NO`, `EXIT`, `HOLD`, `NO_TRADE` +- Edge math after fees/slippage haircuts +- Fractional Kelly sizing with confidence/invalid/liquidity discounts +- Cap enforcement: + - `MAX_POSITION_PCT` + - `MAX_MARKET_EXPOSURE_USD` + - `MAX_STRATEGY_EXPOSURE_USD` + +Definition of done: +- Every signal has reason code + snapshot context +- No emitted size exceeds caps +- Tests cover low-liquidity rejection and invalid-risk discount + +## WT-E: Evaluation + Paper Runner + +Branch: `codex/resolution-eval` + +Owns: +- `src/strategies/dispute/eval.py` +- report generators for daily paper-trading summaries + +Consumes: +- WT-A/B/C/D artifacts + +Delivers: +- Backtest metrics: + - calibration (Brier/log loss) + - hit rate and realized EV + - failure buckets by reason code +- Daily paper-trade summary output + +Definition of done: +- Can run on historical settled markets dataset +- Report generated from CLI without manual editing +- Includes per-model-version calibration slice + +## Interface Freeze Points + +Freeze these interfaces before dependent merges: + +1. WT-A schema names/columns/indexes +2. WT-B output JSON schema +3. WT-C output JSON schema and decision taxonomy +4. WT-D signal event schema + +If an interface changes after freeze, owning stream must: +- publish changelog note in PR description +- provide adapter or migration in same PR + +## Merge Order + +1. WT-A +2. WT-B and WT-C (parallel, then merge) +3. WT-D +4. WT-E + +Each stream merges via repo policy: +- feature branch -> fork `main` PR (approved) +- fork `main` -> upstream `main` PR + +## Test Matrix (Minimum) + +- Unit: + - probability validation + - schema persistence + - sizing cap enforcement +- Integration: + - scanner -> Tier 1 -> Tier 2 -> signal path +- Regression: + - existing scanner/database behavior unchanged for current CLI usage + +## Initial Parameter Defaults (for paper mode) + +- `MIN_LIQUIDITY=5000` +- `DAYS_TO_CLOSE=14` +- `DISPUTE_THRESHOLD=0.30` +- `MIN_CONFIDENCE=0.60` +- `DVM_CONFIDENCE_THRESHOLD=0.70` +- `INITIATION_CONFIDENCE=0.75` +- `KELLY_FRACTION=0.25` +- `MAX_POSITION_PCT=0.05` + +All values remain config-driven and easy to tune after backtesting. diff --git a/docs/workstreams/WT-A_DATA_CONTRACTS.md b/docs/workstreams/WT-A_DATA_CONTRACTS.md new file mode 100644 index 0000000..d5553a5 --- /dev/null +++ b/docs/workstreams/WT-A_DATA_CONTRACTS.md @@ -0,0 +1,17 @@ +# WT-A Data Contracts + Schema + +Branch: `codex/resolution-data-contracts` + +## Scope +- Extend SQLite schema for resolution advantage pipeline. +- Preserve backward compatibility for existing scanner workflows. + +## First Tasks +1. Add tables: `analysis_runs`, `analysis_outputs_t1`, `analysis_outputs_t2`, `signals`, `market_outcomes`, `calibration_metrics`. +2. Add DAO methods for inserts/selects used by Tier 1/Tier 2/signal engine. +3. Add tests for probability bounds and deterministic replay. + +## Exit Criteria +- Fresh DB init succeeds. +- Existing DB migration path succeeds. +- Replay of one signal from stored artifacts is deterministic. diff --git a/src/data/database.py b/src/data/database.py index 27c7580..c03016a 100644 --- a/src/data/database.py +++ b/src/data/database.py @@ -23,6 +23,12 @@ class MarketDatabase: Tables: - markets: Raw market data from Polymarket Gamma API - analyses: LLM analysis results with dispute probability scores + - analysis_runs: Metadata for deterministic analysis replay + - analysis_outputs_t1: Tier 1 structured outputs + - analysis_outputs_t2: Tier 2 structured outputs + - signals: Generated trade signals and edge context + - market_outcomes: Ground truth settlement outcomes + - calibration_metrics: Offline model calibration artifacts """ def __init__(self, db_path: Optional[Path] = None): @@ -101,6 +107,109 @@ async def _create_tables(self) -> None: FOREIGN KEY (market_id) REFERENCES markets(id) ); + + -- Deterministic analysis replay metadata + CREATE TABLE IF NOT EXISTS analysis_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + market_id TEXT NOT NULL, + stage TEXT NOT NULL, -- tier1, tier2 + run_id TEXT NOT NULL, + model TEXT NOT NULL, + prompt_version TEXT NOT NULL, + strategy_version TEXT NOT NULL, + latency_ms INTEGER, + token_cost_usd REAL, + status TEXT NOT NULL, -- success, invalid, failed + error_message TEXT, + created_at TEXT NOT NULL, + UNIQUE(run_id), + FOREIGN KEY (market_id) REFERENCES markets(id) + ); + + -- Tier 1 output contract + CREATE TABLE IF NOT EXISTS analysis_outputs_t1 ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + analysis_run_id INTEGER NOT NULL, + market_id TEXT NOT NULL, + screen_decision TEXT NOT NULL, -- PASS, FLAG + ambiguity_score REAL NOT NULL CHECK (ambiguity_score >= 0 AND ambiguity_score <= 1), + dispute_prob_prior REAL NOT NULL CHECK (dispute_prob_prior >= 0 AND dispute_prob_prior <= 1), + top_risks TEXT NOT NULL, -- JSON array + rationale_short TEXT, + created_at TEXT NOT NULL, + FOREIGN KEY (analysis_run_id) REFERENCES analysis_runs(id), + FOREIGN KEY (market_id) REFERENCES markets(id) + ); + + -- Tier 2 output contract + CREATE TABLE IF NOT EXISTS analysis_outputs_t2 ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + analysis_run_id INTEGER NOT NULL, + market_id TEXT NOT NULL, + p_dispute REAL NOT NULL CHECK (p_dispute >= 0 AND p_dispute <= 1), + p_yes_final REAL NOT NULL CHECK (p_yes_final >= 0 AND p_yes_final <= 1), + p_no_final REAL NOT NULL CHECK (p_no_final >= 0 AND p_no_final <= 1), + p_invalid_final REAL NOT NULL CHECK (p_invalid_final >= 0 AND p_invalid_final <= 1), + confidence REAL NOT NULL CHECK (confidence >= 0 AND confidence <= 1), + resolution_source_risk TEXT NOT NULL, -- low, medium, high + edge_cases TEXT NOT NULL, -- JSON array + decision_path TEXT NOT NULL, -- pre_dispute, post_proposal, active_dispute, initiate_dispute, no_trade + no_trade_reason TEXT, + assumptions TEXT NOT NULL, -- JSON array + created_at TEXT NOT NULL, + FOREIGN KEY (analysis_run_id) REFERENCES analysis_runs(id), + FOREIGN KEY (market_id) REFERENCES markets(id) + ); + + -- Signal artifacts for execution + replay + CREATE TABLE IF NOT EXISTS signals ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + market_id TEXT NOT NULL, + analysis_run_id INTEGER, + action TEXT NOT NULL, -- ENTER_YES, ENTER_NO, EXIT, HOLD, NO_TRADE + side TEXT, -- yes, no, null for HOLD/NO_TRADE + confidence REAL CHECK (confidence >= 0 AND confidence <= 1), + edge_yes REAL, + edge_no REAL, + edge_selected REAL, + yes_price_snapshot REAL, + no_price_snapshot REAL, + liquidity_snapshot REAL, + reason_code TEXT NOT NULL, + reason_detail TEXT, + strategy_version TEXT NOT NULL, + created_at TEXT NOT NULL, + FOREIGN KEY (market_id) REFERENCES markets(id), + FOREIGN KEY (analysis_run_id) REFERENCES analysis_runs(id) + ); + + -- Settled market labels for calibration and backtesting + CREATE TABLE IF NOT EXISTS market_outcomes ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + market_id TEXT NOT NULL UNIQUE, + disputed INTEGER NOT NULL, -- 0,1 + final_resolution TEXT NOT NULL, -- YES, NO, INVALID + time_to_resolution_hours REAL, + source_run_id TEXT, + settled_at TEXT NOT NULL, + created_at TEXT NOT NULL, + FOREIGN KEY (market_id) REFERENCES markets(id) + ); + + -- Model calibration snapshots + CREATE TABLE IF NOT EXISTS calibration_metrics ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + model TEXT NOT NULL, + prompt_version TEXT NOT NULL, + strategy_version TEXT NOT NULL, + time_bucket TEXT NOT NULL, -- e.g. 2026-W05 + sample_size INTEGER NOT NULL, + brier_score REAL, + log_loss REAL, + calibration_error REAL, + metadata_json TEXT, -- arbitrary JSON + created_at TEXT NOT NULL + ); -- Indexes for common queries CREATE INDEX IF NOT EXISTS idx_markets_liquidity ON markets(liquidity); @@ -108,6 +217,12 @@ async def _create_tables(self) -> None: CREATE INDEX IF NOT EXISTS idx_markets_end_date ON markets(end_date); CREATE INDEX IF NOT EXISTS idx_analyses_market ON analyses(market_id); CREATE INDEX IF NOT EXISTS idx_analyses_dispute_prob ON analyses(dispute_probability); + CREATE INDEX IF NOT EXISTS idx_analysis_runs_market_stage ON analysis_runs(market_id, stage); + CREATE INDEX IF NOT EXISTS idx_t1_market_created ON analysis_outputs_t1(market_id, created_at); + CREATE INDEX IF NOT EXISTS idx_t2_market_created ON analysis_outputs_t2(market_id, created_at); + CREATE INDEX IF NOT EXISTS idx_signals_market_created ON signals(market_id, created_at); + CREATE INDEX IF NOT EXISTS idx_market_outcomes_resolution ON market_outcomes(final_resolution); + CREATE INDEX IF NOT EXISTS idx_calibration_model_bucket ON calibration_metrics(model, prompt_version, time_bucket); """) await self._connection.commit() @@ -277,6 +392,194 @@ async def get_high_dispute_markets( """, (min_probability, limit)) as cursor: rows = await cursor.fetchall() return [dict(row) for row in rows] + + async def save_analysis_run( + self, + market_id: str, + stage: str, + run_id: str, + model: str, + prompt_version: str, + strategy_version: str, + status: str, + latency_ms: Optional[int] = None, + token_cost_usd: Optional[float] = None, + error_message: Optional[str] = None + ) -> int: + """Persist run metadata for deterministic replay.""" + now = datetime.now(timezone.utc).isoformat() + cursor = await self._connection.execute(""" + INSERT INTO analysis_runs ( + market_id, stage, run_id, model, prompt_version, strategy_version, + latency_ms, token_cost_usd, status, error_message, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + market_id, stage, run_id, model, prompt_version, strategy_version, + latency_ms, token_cost_usd, status, error_message, now + )) + await self._connection.commit() + return cursor.lastrowid + + async def save_tier1_output( + self, + analysis_run_id: int, + market_id: str, + screen_decision: str, + ambiguity_score: float, + dispute_prob_prior: float, + top_risks: List[str], + rationale_short: str + ) -> int: + """Persist validated Tier 1 output contract.""" + now = datetime.now(timezone.utc).isoformat() + cursor = await self._connection.execute(""" + INSERT INTO analysis_outputs_t1 ( + analysis_run_id, market_id, screen_decision, ambiguity_score, + dispute_prob_prior, top_risks, rationale_short, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + """, ( + analysis_run_id, market_id, screen_decision, ambiguity_score, + dispute_prob_prior, json.dumps(top_risks), rationale_short, now + )) + await self._connection.commit() + return cursor.lastrowid + + async def save_tier2_output( + self, + analysis_run_id: int, + market_id: str, + p_dispute: float, + p_yes_final: float, + p_no_final: float, + p_invalid_final: float, + confidence: float, + resolution_source_risk: str, + edge_cases: List[str], + decision_path: str, + no_trade_reason: Optional[str], + assumptions: List[str] + ) -> int: + """Persist validated Tier 2 output contract.""" + now = datetime.now(timezone.utc).isoformat() + cursor = await self._connection.execute(""" + INSERT INTO analysis_outputs_t2 ( + analysis_run_id, market_id, p_dispute, p_yes_final, p_no_final, + p_invalid_final, confidence, resolution_source_risk, edge_cases, + decision_path, no_trade_reason, assumptions, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + analysis_run_id, market_id, p_dispute, p_yes_final, p_no_final, + p_invalid_final, confidence, resolution_source_risk, json.dumps(edge_cases), + decision_path, no_trade_reason, json.dumps(assumptions), now + )) + await self._connection.commit() + return cursor.lastrowid + + async def save_signal( + self, + market_id: str, + action: str, + reason_code: str, + strategy_version: str, + analysis_run_id: Optional[int] = None, + side: Optional[str] = None, + confidence: Optional[float] = None, + edge_yes: Optional[float] = None, + edge_no: Optional[float] = None, + edge_selected: Optional[float] = None, + yes_price_snapshot: Optional[float] = None, + no_price_snapshot: Optional[float] = None, + liquidity_snapshot: Optional[float] = None, + reason_detail: Optional[str] = None + ) -> int: + """Persist signal with edge context for replay.""" + now = datetime.now(timezone.utc).isoformat() + cursor = await self._connection.execute(""" + INSERT INTO signals ( + market_id, analysis_run_id, action, side, confidence, edge_yes, edge_no, + edge_selected, yes_price_snapshot, no_price_snapshot, liquidity_snapshot, + reason_code, reason_detail, strategy_version, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + market_id, analysis_run_id, action, side, confidence, edge_yes, edge_no, + edge_selected, yes_price_snapshot, no_price_snapshot, liquidity_snapshot, + reason_code, reason_detail, strategy_version, now + )) + await self._connection.commit() + return cursor.lastrowid + + async def save_market_outcome( + self, + market_id: str, + disputed: bool, + final_resolution: str, + settled_at: str, + time_to_resolution_hours: Optional[float] = None, + source_run_id: Optional[str] = None + ) -> int: + """Persist market settlement ground truth labels.""" + now = datetime.now(timezone.utc).isoformat() + cursor = await self._connection.execute(""" + INSERT INTO market_outcomes ( + market_id, disputed, final_resolution, time_to_resolution_hours, + source_run_id, settled_at, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(market_id) DO UPDATE SET + disputed = excluded.disputed, + final_resolution = excluded.final_resolution, + time_to_resolution_hours = excluded.time_to_resolution_hours, + source_run_id = excluded.source_run_id, + settled_at = excluded.settled_at + """, ( + market_id, int(disputed), final_resolution, time_to_resolution_hours, + source_run_id, settled_at, now + )) + await self._connection.commit() + return cursor.lastrowid + + async def save_calibration_metric( + self, + model: str, + prompt_version: str, + strategy_version: str, + time_bucket: str, + sample_size: int, + brier_score: Optional[float], + log_loss: Optional[float], + calibration_error: Optional[float], + metadata: Optional[Dict[str, Any]] = None + ) -> int: + """Persist calibration snapshots for model tracking.""" + now = datetime.now(timezone.utc).isoformat() + cursor = await self._connection.execute(""" + INSERT INTO calibration_metrics ( + model, prompt_version, strategy_version, time_bucket, sample_size, + brier_score, log_loss, calibration_error, metadata_json, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + model, prompt_version, strategy_version, time_bucket, sample_size, + brier_score, log_loss, calibration_error, json.dumps(metadata or {}), now + )) + await self._connection.commit() + return cursor.lastrowid + + async def get_signal_replay(self, signal_id: int) -> Optional[Dict[str, Any]]: + """Fetch a signal with run and model artifacts for deterministic replay.""" + async with self._connection.execute(""" + SELECT + s.*, + ar.run_id, + ar.stage, + ar.model, + ar.prompt_version, + ar.strategy_version AS run_strategy_version, + ar.status AS run_status + FROM signals s + LEFT JOIN analysis_runs ar ON s.analysis_run_id = ar.id + WHERE s.id = ? + """, (signal_id,)) as cursor: + row = await cursor.fetchone() + return dict(row) if row else None # --- Stats --- diff --git a/test_resolution_schema.py b/test_resolution_schema.py new file mode 100644 index 0000000..ace8463 --- /dev/null +++ b/test_resolution_schema.py @@ -0,0 +1,90 @@ +from pathlib import Path + +import pytest + +from src.data.database import MarketDatabase + + +@pytest.mark.asyncio +async def test_resolution_tables_created(tmp_path: Path): + db = MarketDatabase(db_path=tmp_path / "test_markets.db") + await db.connect() + + table_names = { + "analysis_runs", + "analysis_outputs_t1", + "analysis_outputs_t2", + "signals", + "market_outcomes", + "calibration_metrics", + } + + async with db._connection.execute( + "SELECT name FROM sqlite_master WHERE type='table'" + ) as cursor: + rows = await cursor.fetchall() + + existing = {row[0] for row in rows} + assert table_names.issubset(existing) + + await db.close() + + +@pytest.mark.asyncio +async def test_signal_replay_roundtrip(tmp_path: Path): + db = MarketDatabase(db_path=tmp_path / "test_markets.db") + await db.connect() + + await db.upsert_market( + { + "id": "mkt-1", + "conditionId": "mkt-1", + "question": "Will event X happen?", + "description": "test", + "resolutionSource": "https://example.com", + "slug": "event-x", + "outcomePrices": ["0.45", "0.55"], + "volumeNum": 1000, + "liquidityNum": 5000, + "endDate": "2026-12-31T00:00:00Z", + "createdAt": "2026-01-01T00:00:00Z", + "umaResolutionStatus": None, + "umaBond": None, + "umaReward": None, + } + ) + + run_id = await db.save_analysis_run( + market_id="mkt-1", + stage="tier2", + run_id="run-123", + model="test-model", + prompt_version="pv1", + strategy_version="sv1", + status="success", + ) + + signal_id = await db.save_signal( + market_id="mkt-1", + analysis_run_id=run_id, + action="ENTER_YES", + side="yes", + confidence=0.74, + edge_yes=0.08, + edge_no=-0.02, + edge_selected=0.08, + yes_price_snapshot=0.45, + no_price_snapshot=0.55, + liquidity_snapshot=5000, + reason_code="EDGE_OK", + reason_detail="test", + strategy_version="sv1", + ) + + replay = await db.get_signal_replay(signal_id) + assert replay is not None + assert replay["run_id"] == "run-123" + assert replay["model"] == "test-model" + assert replay["action"] == "ENTER_YES" + + await db.close() From 9aa61cce3bbce7204566124e2ce5fec3b835b719 Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Wed, 4 Feb 2026 01:47:21 -0600 Subject: [PATCH 05/15] Bootstrap Tier 1 contract module and tests --- docs/workstreams/WT-B_TIER1.md | 16 ++++++++++ src/strategies/dispute/__init__.py | 1 + src/strategies/dispute/tier1.py | 49 ++++++++++++++++++++++++++++++ test_tier1_contract.py | 31 +++++++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 docs/workstreams/WT-B_TIER1.md create mode 100644 src/strategies/dispute/__init__.py create mode 100644 src/strategies/dispute/tier1.py create mode 100644 test_tier1_contract.py diff --git a/docs/workstreams/WT-B_TIER1.md b/docs/workstreams/WT-B_TIER1.md new file mode 100644 index 0000000..605da28 --- /dev/null +++ b/docs/workstreams/WT-B_TIER1.md @@ -0,0 +1,16 @@ +# WT-B Tier 1 Screen + +Branch: `codex/resolution-tier1` + +## Scope +- Implement fast Tier 1 screen with strict JSON contract. + +## First Tasks +1. Create `src/strategies/dispute/tier1.py` runner interface. +2. Define schema validator for `PASS|FLAG`, scores, risks, and metadata. +3. Persist run metadata and Tier 1 output through WT-A contracts. + +## Exit Criteria +- Malformed model output does not crash pipeline. +- PASS/FLAG behavior tested. +- Prompt/model/version metadata always stored. diff --git a/src/strategies/dispute/__init__.py b/src/strategies/dispute/__init__.py new file mode 100644 index 0000000..6eddb09 --- /dev/null +++ b/src/strategies/dispute/__init__.py @@ -0,0 +1 @@ +"""Dispute strategy modules.""" diff --git a/src/strategies/dispute/tier1.py b/src/strategies/dispute/tier1.py new file mode 100644 index 0000000..58ff7ef --- /dev/null +++ b/src/strategies/dispute/tier1.py @@ -0,0 +1,49 @@ +"""Tier 1 fast screening contract for resolution-advantage markets.""" +from dataclasses import dataclass +from typing import Any, Dict, List + + +@dataclass +class Tier1Output: + screen_decision: str # PASS | FLAG + ambiguity_score: float + dispute_prob_prior: float + top_risks: List[str] + rationale_short: str + prompt_version: str + model: str + run_id: str + + +def tier1_from_dict(payload: Dict[str, Any]) -> Tier1Output: + """Parse a model payload into the Tier 1 contract object.""" + return Tier1Output( + screen_decision=str(payload["screen_decision"]), + ambiguity_score=float(payload["ambiguity_score"]), + dispute_prob_prior=float(payload["dispute_prob_prior"]), + top_risks=[str(v) for v in payload.get("top_risks", [])], + rationale_short=str(payload.get("rationale_short", "")), + prompt_version=str(payload["prompt_version"]), + model=str(payload["model"]), + run_id=str(payload["run_id"]), + ) + + +def validate_tier1_output(output: Tier1Output) -> None: + """Raise ValueError on contract violations.""" + if output.screen_decision not in {"PASS", "FLAG"}: + raise ValueError("screen_decision must be PASS or FLAG") + if not 0.0 <= output.ambiguity_score <= 1.0: + raise ValueError("ambiguity_score out of bounds") + if not 0.0 <= output.dispute_prob_prior <= 1.0: + raise ValueError("dispute_prob_prior out of bounds") + if len(output.top_risks) > 5: + raise ValueError("top_risks max length is 5") + if len(output.rationale_short) > 400: + raise ValueError("rationale_short max length is 400") + if not output.prompt_version: + raise ValueError("prompt_version is required") + if not output.model: + raise ValueError("model is required") + if not output.run_id: + raise ValueError("run_id is required") diff --git a/test_tier1_contract.py b/test_tier1_contract.py new file mode 100644 index 0000000..ce57b53 --- /dev/null +++ b/test_tier1_contract.py @@ -0,0 +1,31 @@ +import pytest + +from src.strategies.dispute.tier1 import Tier1Output, tier1_from_dict, validate_tier1_output + + +def test_tier1_valid_payload_parses_and_validates(): + payload = { + "screen_decision": "FLAG", + "ambiguity_score": 0.7, + "dispute_prob_prior": 0.55, + "top_risks": ["timing ambiguity", "source risk"], + "rationale_short": "Potential edge case in resolution wording.", + "prompt_version": "t1.v1", + "model": "gpt-4o-mini", + "run_id": "run-1", + } + out = tier1_from_dict(payload) + validate_tier1_output(out) + assert out.screen_decision == "FLAG" + + +def test_tier1_invalid_screen_decision_rejected(): + out = Tier1Output("MAYBE", 0.5, 0.5, [], "ok", "t1.v1", "model", "run-1") + with pytest.raises(ValueError, match="screen_decision"): + validate_tier1_output(out) + + +def test_tier1_too_many_risks_rejected(): + out = Tier1Output("PASS", 0.2, 0.1, ["a", "b", "c", "d", "e", "f"], "ok", "t1.v1", "model", "run-1") + with pytest.raises(ValueError, match="top_risks"): + validate_tier1_output(out) From 5381dcf553a7adffbdc2b8d8e0c15af0e04dabaa Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Wed, 4 Feb 2026 01:47:23 -0600 Subject: [PATCH 06/15] Bootstrap Tier 2 contract module and tests --- docs/workstreams/WT-C_TIER2.md | 16 ++++++ src/strategies/dispute/__init__.py | 1 + src/strategies/dispute/tier2.py | 87 ++++++++++++++++++++++++++++++ test_tier2_contract.py | 40 ++++++++++++++ 4 files changed, 144 insertions(+) create mode 100644 docs/workstreams/WT-C_TIER2.md create mode 100644 src/strategies/dispute/__init__.py create mode 100644 src/strategies/dispute/tier2.py create mode 100644 test_tier2_contract.py diff --git a/docs/workstreams/WT-C_TIER2.md b/docs/workstreams/WT-C_TIER2.md new file mode 100644 index 0000000..b122a11 --- /dev/null +++ b/docs/workstreams/WT-C_TIER2.md @@ -0,0 +1,16 @@ +# WT-C Tier 2 Deep Analysis + +Branch: `codex/resolution-tier2` + +## Scope +- Implement Tier 2 probability + decision-path output contract. + +## First Tasks +1. Create `src/strategies/dispute/tier2.py` analyzer interface. +2. Enforce probability bounds and sum tolerance. +3. Enforce `no_trade_reason` for no-trade decisions. + +## Exit Criteria +- Valid outputs persist. +- Invalid outputs are rejected with reason logging. +- Decision path taxonomy covered by tests. diff --git a/src/strategies/dispute/__init__.py b/src/strategies/dispute/__init__.py new file mode 100644 index 0000000..6eddb09 --- /dev/null +++ b/src/strategies/dispute/__init__.py @@ -0,0 +1 @@ +"""Dispute strategy modules.""" diff --git a/src/strategies/dispute/tier2.py b/src/strategies/dispute/tier2.py new file mode 100644 index 0000000..63d3c07 --- /dev/null +++ b/src/strategies/dispute/tier2.py @@ -0,0 +1,87 @@ +"""Tier 2 deep-analysis contract for resolution-advantage markets.""" +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + + +DECISION_PATHS = { + "pre_dispute", + "post_proposal", + "active_dispute", + "initiate_dispute", + "no_trade", +} +RISK_LEVELS = {"low", "medium", "high"} + + +@dataclass +class Tier2Output: + p_dispute: float + p_yes_final: float + p_no_final: float + p_invalid_final: float + confidence: float + resolution_source_risk: str + edge_cases: List[str] + decision_path: str + no_trade_reason: Optional[str] + assumptions: List[str] + prompt_version: str + model: str + run_id: str + + +def tier2_from_dict(payload: Dict[str, Any]) -> Tier2Output: + """Parse a model payload into the Tier 2 contract object.""" + return Tier2Output( + p_dispute=float(payload["p_dispute"]), + p_yes_final=float(payload["p_yes_final"]), + p_no_final=float(payload["p_no_final"]), + p_invalid_final=float(payload["p_invalid_final"]), + confidence=float(payload["confidence"]), + resolution_source_risk=str(payload["resolution_source_risk"]).lower(), + edge_cases=[str(v) for v in payload.get("edge_cases", [])], + decision_path=str(payload["decision_path"]), + no_trade_reason=payload.get("no_trade_reason"), + assumptions=[str(v) for v in payload.get("assumptions", [])], + prompt_version=str(payload["prompt_version"]), + model=str(payload["model"]), + run_id=str(payload["run_id"]), + ) + + +def normalize_final_probabilities(output: Tier2Output) -> Tier2Output: + """Renormalize final outcome probabilities if model drift is small.""" + total = output.p_yes_final + output.p_no_final + output.p_invalid_final + if total <= 0: + raise ValueError("final probability sum must be positive") + output.p_yes_final = output.p_yes_final / total + output.p_no_final = output.p_no_final / total + output.p_invalid_final = output.p_invalid_final / total + return output + + +def validate_tier2_output(output: Tier2Output, tolerance: float = 0.01) -> None: + """Raise ValueError on probability or taxonomy violations.""" + probs = [output.p_dispute, output.p_yes_final, output.p_no_final, output.p_invalid_final, output.confidence] + if any(p < 0.0 or p > 1.0 for p in probs): + raise ValueError("probability value out of bounds") + + prob_sum = output.p_yes_final + output.p_no_final + output.p_invalid_final + if abs(prob_sum - 1.0) > tolerance: + raise ValueError("final outcome probabilities must sum to 1") + + if output.decision_path not in DECISION_PATHS: + raise ValueError("invalid decision_path") + + if output.decision_path == "no_trade" and not output.no_trade_reason: + raise ValueError("no_trade_reason required when decision_path=no_trade") + if output.decision_path != "no_trade" and output.no_trade_reason: + raise ValueError("no_trade_reason must be empty when decision_path is not no_trade") + if output.resolution_source_risk not in RISK_LEVELS: + raise ValueError("resolution_source_risk must be low, medium, or high") + if not output.prompt_version: + raise ValueError("prompt_version is required") + if not output.model: + raise ValueError("model is required") + if not output.run_id: + raise ValueError("run_id is required") diff --git a/test_tier2_contract.py b/test_tier2_contract.py new file mode 100644 index 0000000..f43e503 --- /dev/null +++ b/test_tier2_contract.py @@ -0,0 +1,40 @@ +import pytest + +from src.strategies.dispute.tier2 import ( + Tier2Output, + normalize_final_probabilities, + tier2_from_dict, + validate_tier2_output, +) + + +def test_tier2_valid_payload_parses_and_validates(): + payload = { + "p_dispute": 0.35, + "p_yes_final": 0.52, + "p_no_final": 0.43, + "p_invalid_final": 0.05, + "confidence": 0.72, + "resolution_source_risk": "medium", + "edge_cases": ["timezone ambiguity"], + "decision_path": "pre_dispute", + "no_trade_reason": None, + "assumptions": ["source remains available"], + "prompt_version": "t2.v1", + "model": "gpt-5", + "run_id": "run-2", + } + out = tier2_from_dict(payload) + validate_tier2_output(out) + + +def test_tier2_no_trade_requires_reason(): + out = Tier2Output(0.2, 0.6, 0.3, 0.1, 0.7, "low", [], "no_trade", None, [], "v1", "m", "r") + with pytest.raises(ValueError, match="no_trade_reason"): + validate_tier2_output(out) + + +def test_tier2_normalization_fixes_small_drift(): + out = Tier2Output(0.2, 0.5, 0.4, 0.11, 0.7, "low", [], "pre_dispute", None, [], "v1", "m", "r") + out = normalize_final_probabilities(out) + assert pytest.approx(out.p_yes_final + out.p_no_final + out.p_invalid_final, abs=1e-9) == 1.0 From af70f3d97f19a885c965eb75b6bac8293b29707f Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Wed, 4 Feb 2026 01:47:23 -0600 Subject: [PATCH 07/15] Add signal engine primitives and sizing tests --- docs/workstreams/WT-D_SIGNALS.md | 17 +++++ src/strategies/dispute/__init__.py | 1 + src/strategies/dispute/signal_engine.py | 84 +++++++++++++++++++++++++ test_signal_engine.py | 45 +++++++++++++ 4 files changed, 147 insertions(+) create mode 100644 docs/workstreams/WT-D_SIGNALS.md create mode 100644 src/strategies/dispute/__init__.py create mode 100644 src/strategies/dispute/signal_engine.py create mode 100644 test_signal_engine.py diff --git a/docs/workstreams/WT-D_SIGNALS.md b/docs/workstreams/WT-D_SIGNALS.md new file mode 100644 index 0000000..5792be6 --- /dev/null +++ b/docs/workstreams/WT-D_SIGNALS.md @@ -0,0 +1,17 @@ +# WT-D Signal + Sizing + +Branch: `codex/resolution-signals` + +## Scope +- Build edge calculation and position sizing with risk caps. + +## First Tasks +1. Create `src/strategies/dispute/signal_engine.py`. +2. Implement edge after fee/slippage haircut. +3. Implement fractional Kelly sizing with confidence/invalid/liquidity discounts. +4. Enforce hard exposure caps. + +## Exit Criteria +- Emits `ENTER_YES`, `ENTER_NO`, `EXIT`, `HOLD`, `NO_TRADE`. +- Every signal has reason code + snapshot context. +- Cap enforcement tests pass. diff --git a/src/strategies/dispute/__init__.py b/src/strategies/dispute/__init__.py new file mode 100644 index 0000000..6eddb09 --- /dev/null +++ b/src/strategies/dispute/__init__.py @@ -0,0 +1 @@ +"""Dispute strategy modules.""" diff --git a/src/strategies/dispute/signal_engine.py b/src/strategies/dispute/signal_engine.py new file mode 100644 index 0000000..9f80719 --- /dev/null +++ b/src/strategies/dispute/signal_engine.py @@ -0,0 +1,84 @@ +"""Signal generation and sizing primitives for resolution-advantage strategy.""" +from dataclasses import dataclass +from typing import Optional + + +ACTIONS = {"ENTER_YES", "ENTER_NO", "EXIT", "HOLD", "NO_TRADE"} + + +@dataclass +class SignalDecision: + action: str + reason_code: str + confidence: Optional[float] = None + edge_yes: Optional[float] = None + edge_no: Optional[float] = None + edge_selected: Optional[float] = None + + +@dataclass +class SizingConfig: + kelly_fraction: float = 0.25 + max_position_pct: float = 0.05 + max_market_exposure_usd: float = 1500.0 + max_strategy_exposure_usd: float = 5000.0 + + +def compute_edge(prob: float, price: float, fee_haircut: float, slippage_haircut: float) -> float: + """Simple expected-value edge after execution haircuts.""" + return prob - price - fee_haircut - slippage_haircut + + +def validate_action(action: str) -> None: + if action not in ACTIONS: + raise ValueError(f"unsupported action: {action}") + + +def choose_action( + edge_yes: float, + edge_no: float, + confidence: float, + min_edge: float, + min_confidence: float, +) -> SignalDecision: + """Choose trading action based on edge and confidence gates.""" + if confidence < min_confidence: + return SignalDecision(action="NO_TRADE", reason_code="LOW_CONFIDENCE", confidence=confidence, edge_yes=edge_yes, edge_no=edge_no) + + best_edge = max(edge_yes, edge_no) + if best_edge < min_edge: + return SignalDecision(action="NO_TRADE", reason_code="INSUFFICIENT_EDGE", confidence=confidence, edge_yes=edge_yes, edge_no=edge_no) + + if edge_yes >= edge_no: + return SignalDecision(action="ENTER_YES", reason_code="EDGE_YES", confidence=confidence, edge_yes=edge_yes, edge_no=edge_no, edge_selected=edge_yes) + return SignalDecision(action="ENTER_NO", reason_code="EDGE_NO", confidence=confidence, edge_yes=edge_yes, edge_no=edge_no, edge_selected=edge_no) + + +def compute_position_size_usd( + win_probability: float, + payout_multiple: float, + bankroll_usd: float, + confidence_discount: float, + invalid_discount: float, + liquidity_discount: float, + current_market_exposure_usd: float, + current_strategy_exposure_usd: float, + config: Optional[SizingConfig] = None, +) -> float: + """ + Fractional Kelly with discounts and hard exposure caps. + + Returns a USD target size capped by per-market and per-strategy limits. + """ + cfg = config or SizingConfig() + loss_probability = 1.0 - win_probability + kelly_full = ((payout_multiple * win_probability) - loss_probability) / max(payout_multiple, 1e-9) + kelly_fractional = max(0.0, kelly_full) * cfg.kelly_fraction + + discounted_fraction = kelly_fractional * confidence_discount * invalid_discount * liquidity_discount + discounted_fraction = max(0.0, min(discounted_fraction, cfg.max_position_pct)) + + raw_size = bankroll_usd * discounted_fraction + market_remaining = max(0.0, cfg.max_market_exposure_usd - current_market_exposure_usd) + strategy_remaining = max(0.0, cfg.max_strategy_exposure_usd - current_strategy_exposure_usd) + return max(0.0, min(raw_size, market_remaining, strategy_remaining)) diff --git a/test_signal_engine.py b/test_signal_engine.py new file mode 100644 index 0000000..7136fdf --- /dev/null +++ b/test_signal_engine.py @@ -0,0 +1,45 @@ +import pytest + +from src.strategies.dispute.signal_engine import ( + ACTIONS, + SizingConfig, + choose_action, + compute_edge, + compute_position_size_usd, +) + + +def test_actions_include_expected_values(): + assert {"ENTER_YES", "ENTER_NO", "EXIT", "HOLD", "NO_TRADE"}.issubset(ACTIONS) + + +def test_choose_action_prefers_best_edge_when_confident(): + signal = choose_action(edge_yes=0.08, edge_no=0.02, confidence=0.75, min_edge=0.03, min_confidence=0.6) + assert signal.action == "ENTER_YES" + assert signal.reason_code == "EDGE_YES" + + +def test_choose_action_blocks_low_confidence(): + signal = choose_action(edge_yes=0.08, edge_no=0.07, confidence=0.4, min_edge=0.03, min_confidence=0.6) + assert signal.action == "NO_TRADE" + assert signal.reason_code == "LOW_CONFIDENCE" + + +def test_position_size_respects_caps(): + size = compute_position_size_usd( + win_probability=0.70, + payout_multiple=1.0, + bankroll_usd=10000.0, + confidence_discount=1.0, + invalid_discount=1.0, + liquidity_discount=1.0, + current_market_exposure_usd=1490.0, + current_strategy_exposure_usd=1000.0, + config=SizingConfig(max_market_exposure_usd=1500.0, max_strategy_exposure_usd=5000.0), + ) + assert size <= 10.0 + + +def test_compute_edge_applies_haircuts(): + edge = compute_edge(prob=0.62, price=0.55, fee_haircut=0.01, slippage_haircut=0.02) + assert pytest.approx(edge, abs=1e-9) == 0.04 From 3adbf9678994480c05d69723eb16be1a766d4f5b Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Wed, 4 Feb 2026 01:47:24 -0600 Subject: [PATCH 08/15] Add evaluation metric helpers and tests --- docs/workstreams/WT-E_EVAL.md | 16 ++++++++ src/strategies/dispute/__init__.py | 1 + src/strategies/dispute/eval.py | 59 ++++++++++++++++++++++++++++++ test_eval_metrics.py | 29 +++++++++++++++ 4 files changed, 105 insertions(+) create mode 100644 docs/workstreams/WT-E_EVAL.md create mode 100644 src/strategies/dispute/__init__.py create mode 100644 src/strategies/dispute/eval.py create mode 100644 test_eval_metrics.py diff --git a/docs/workstreams/WT-E_EVAL.md b/docs/workstreams/WT-E_EVAL.md new file mode 100644 index 0000000..5ea7305 --- /dev/null +++ b/docs/workstreams/WT-E_EVAL.md @@ -0,0 +1,16 @@ +# WT-E Evaluation + Paper Runner + +Branch: `codex/resolution-eval` + +## Scope +- Backtest and paper-trade evaluation for resolution advantage. + +## First Tasks +1. Create `src/strategies/dispute/eval.py`. +2. Build calibration metrics (Brier/log loss) and hit-rate/EV reporting. +3. Create daily paper-trade summary generator. + +## Exit Criteria +- CLI report generation works from stored artifacts. +- Includes per-model-version calibration slices. +- Includes failure buckets by signal reason code. diff --git a/src/strategies/dispute/__init__.py b/src/strategies/dispute/__init__.py new file mode 100644 index 0000000..6eddb09 --- /dev/null +++ b/src/strategies/dispute/__init__.py @@ -0,0 +1 @@ +"""Dispute strategy modules.""" diff --git a/src/strategies/dispute/eval.py b/src/strategies/dispute/eval.py new file mode 100644 index 0000000..7c40c5e --- /dev/null +++ b/src/strategies/dispute/eval.py @@ -0,0 +1,59 @@ +"""Evaluation helpers for resolution-advantage backtests and paper runs.""" +from dataclasses import dataclass +import math +from typing import Iterable, List, Sequence, Tuple + + +@dataclass +class CalibrationSummary: + sample_size: int + brier_score: float + log_loss: float + + +def brier_score(predicted: float, observed: int) -> float: + """Binary Brier score for one observation.""" + return (predicted - observed) ** 2 + + +def log_loss(predicted: float, observed: int, eps: float = 1e-12) -> float: + """Binary log loss for one observation.""" + p = min(max(predicted, eps), 1.0 - eps) + if observed not in (0, 1): + raise ValueError("observed must be 0 or 1") + return -(observed * math.log(p) + (1 - observed) * math.log(1 - p)) + + +def summarize_calibration(observations: Sequence[Tuple[float, int]]) -> CalibrationSummary: + """Aggregate Brier score and log loss across observations.""" + if not observations: + return CalibrationSummary(sample_size=0, brier_score=0.0, log_loss=0.0) + + brier_values = [brier_score(p, y) for p, y in observations] + log_values = [log_loss(p, y) for p, y in observations] + n = len(observations) + return CalibrationSummary( + sample_size=n, + brier_score=sum(brier_values) / n, + log_loss=sum(log_values) / n, + ) + + +def bucket_by_confidence(observations: Iterable[Tuple[float, int]], bucket_size: float = 0.1) -> List[Tuple[str, CalibrationSummary]]: + """ + Group observations by probability bucket for calibration inspection. + + Returns tuples of (bucket_label, summary). + """ + if bucket_size <= 0 or bucket_size > 1: + raise ValueError("bucket_size must be in (0,1]") + + buckets: dict[str, list[Tuple[float, int]]] = {} + for predicted, observed in observations: + idx = min(int(predicted / bucket_size), int(1.0 / bucket_size) - 1) + low = idx * bucket_size + high = low + bucket_size + label = f"{low:.1f}-{high:.1f}" + buckets.setdefault(label, []).append((predicted, observed)) + + return [(label, summarize_calibration(values)) for label, values in sorted(buckets.items())] diff --git a/test_eval_metrics.py b/test_eval_metrics.py new file mode 100644 index 0000000..9bda998 --- /dev/null +++ b/test_eval_metrics.py @@ -0,0 +1,29 @@ +import pytest + +from src.strategies.dispute.eval import ( + brier_score, + bucket_by_confidence, + log_loss, + summarize_calibration, +) + + +def test_single_point_metrics(): + assert brier_score(0.8, 1) == pytest.approx(0.04) + assert log_loss(0.8, 1) < 0.25 + + +def test_summary_non_empty(): + obs = [(0.8, 1), (0.2, 0), (0.6, 1), (0.3, 0)] + summary = summarize_calibration(obs) + assert summary.sample_size == 4 + assert summary.brier_score >= 0 + assert summary.log_loss >= 0 + + +def test_bucketed_summary(): + obs = [(0.82, 1), (0.78, 1), (0.24, 0), (0.28, 0)] + buckets = bucket_by_confidence(obs, bucket_size=0.2) + labels = [label for label, _ in buckets] + assert "0.2-0.4" in labels + assert "0.8-1.0" in labels From d7e6b068b49ad91f07a2079f07207787abe3df54 Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Thu, 5 Feb 2026 09:07:59 -0600 Subject: [PATCH 09/15] Add collaboration protocol for parallel worktrees --- AGENTS.md | 1 + docs/WORKTREE_COLLAB_PROTOCOL.md | 51 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 docs/WORKTREE_COLLAB_PROTOCOL.md diff --git a/AGENTS.md b/AGENTS.md index 25c1bcd..7ff9ff1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -79,6 +79,7 @@ src/ ### Strategy Documentation - `docs/DISPUTE_PREDICTION_STRATEGY.md` - Strategy overview - `docs/APPENDIX_KELLY_CRITERION.md` - Position sizing theory +- `docs/WORKTREE_COLLAB_PROTOCOL.md` - Parallel branch/worktree operating rules ## What's NOT Built Yet (Dispute Strategy) - LLM analysis pipeline (Tier 1 screening, Tier 2 deep analysis) diff --git a/docs/WORKTREE_COLLAB_PROTOCOL.md b/docs/WORKTREE_COLLAB_PROTOCOL.md new file mode 100644 index 0000000..88358eb --- /dev/null +++ b/docs/WORKTREE_COLLAB_PROTOCOL.md @@ -0,0 +1,51 @@ +# Worktree Collaboration Protocol + +Use this when multiple threads/agents work in parallel on the same repo. + +## Baseline Rules + +1. Keep root workspace as a control surface (status checks only). +2. Every stream gets: + - dedicated branch + - dedicated worktree +3. Do not mix unrelated changes in one stream. +4. Merge dependency-first. +5. Rebase each stream onto `main` after dependency merges. + +## Branch/Worktree Map + +- `codex/resolution-data-contracts` -> `.worktrees/wt-a` +- `codex/resolution-tier1` -> `.worktrees/wt-b` +- `codex/resolution-tier2` -> `.worktrees/wt-c` +- `codex/resolution-signals` -> `.worktrees/wt-d` +- `codex/resolution-eval` -> `.worktrees/wt-e` +- `codex/resolution-baseline` -> `.worktrees/wt-baseline` (clean integration lane) + +## Merge Order + +1. data contracts/schema +2. tier1 + tier2 +3. signal engine/sizing +4. eval/reporting + +## Copyable Status Snippet (for other thread) + +```text +Parallel execution setup is active. + +We are preserving all existing work and isolating streams with dedicated branches/worktrees: +- wt-a: codex/resolution-data-contracts +- wt-b: codex/resolution-tier1 +- wt-c: codex/resolution-tier2 +- wt-d: codex/resolution-signals +- wt-e: codex/resolution-eval +- wt-baseline: codex/resolution-baseline (clean lane from origin/main) + +Dependency merge order: +1) data contracts +2) tier1 + tier2 +3) signals +4) eval + +Please do not work in the root workspace; use your assigned worktree/branch only. +``` From 147829b2b66bd190142cd91c3bf0cbc1a6a5a261 Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Thu, 5 Feb 2026 10:02:22 -0600 Subject: [PATCH 10/15] Add Polymarket arb v1 M1/M2 plumbing and lifecycle --- docs/specs/polymarket_arb_master.md | 76 ++++++ src/engine/main.py | 18 +- src/execution/__init__.py | 5 + src/execution/arb_v1_state_machine.py | 60 +++++ src/strategies/__init__.py | 23 ++ src/strategies/arb_v1_ports.py | 33 +++ src/strategies/arbitrage_v1_plumbing.py | 326 ++++++++++++++++++++++++ src/strategies/polymarket_arb_v1.py | 129 ++++++++++ tests/test_arb_v1_state_machine.py | 22 ++ tests/test_arbitrage_v1_plumbing.py | 110 ++++++++ 10 files changed, 798 insertions(+), 4 deletions(-) create mode 100644 docs/specs/polymarket_arb_master.md create mode 100644 src/execution/arb_v1_state_machine.py create mode 100644 src/strategies/arb_v1_ports.py create mode 100644 src/strategies/arbitrage_v1_plumbing.py create mode 100644 src/strategies/polymarket_arb_v1.py create mode 100644 tests/test_arb_v1_state_machine.py create mode 100644 tests/test_arbitrage_v1_plumbing.py diff --git a/docs/specs/polymarket_arb_master.md b/docs/specs/polymarket_arb_master.md new file mode 100644 index 0000000..5148cba --- /dev/null +++ b/docs/specs/polymarket_arb_master.md @@ -0,0 +1,76 @@ +# Polymarket Arbitrage Master Spec + +Status: Active +Owner: PR3DICT arbitrage planning thread +Last updated: 2026-02-04 + +## Implementation Status +- M1 in progress: executable pricing, staleness checks, and risk-gate plumbing added in Python. +- M2 in progress: opportunity engine ports and execution lifecycle state machine scaffolded. + +## Purpose +This is the single source of truth for Polymarket arbitrage planning and implementation decisions. + +## Scope Lock +- Current: Polymarket-only arbitrage, paper mode, backtest/replay required +- Future hook: cross-platform expansion after v1 validation + +## Locked Decisions +- Opportunity classes: binary complement + structural mispricing +- Net edge threshold: hard minimum 100 bps, preferred 120-150 bps +- Opportunity TTL: default 500 ms, adaptive by liquidity/volatility +- Slippage guardrails: 75 bps soft per leg, 100 bps hard per leg, 150 bps multi-leg hard +- Sizing: fractional Kelly (0.25x) with hard portfolio caps +- Partial fill policy: conservative immediate hedge/flatten +- Resolution thread dependency: consume `risk_multiplier` only in v1 +- Release gate: 14-day forward paper + backtest/replay criteria + +## Shared Contract Summary (v1) + +### Opportunity +- Required fields: `opportunity_id`, `opportunity_type`, `market_id`, `side`, `quoted_price`, + `executable_price`, `size_max_contracts`, `edge_bps_net`, `confidence`, `ttl_ms`, + `created_at_ms`, `expires_at_ms`, `risk_multiplier`, `reasons[]` +- Rules: net edge must be post-fee and post-slippage estimate; `confidence` in [0,1]. + +### RiskDecision +- Required fields: `opportunity_id`, `decision` (`allow|adjust|deny`), `size_adjusted_contracts`, + `reason_code`, `details` +- Minimum reason codes: `RISK_OK`, `RISK_DAILY_LOSS`, `RISK_EXPOSURE`, `RISK_SLIPPAGE`, `RISK_STALE`. + +### ExecutionResult +- Required fields: `opportunity_id`, `status` (`filled|partial_fill|failed|rejected`), `filled_qty`, + `avg_fill_price`, `slippage_bps`, `latency_ms`, `pnl_estimate`, `reject_reason`, `reason_code` +- Minimum reason codes: `EXEC_OK`, `EXEC_TIMEOUT`, `EXEC_PARTIAL`, `EXEC_STALE`, `EXEC_SLIPPAGE`, `EXEC_PLATFORM`. + +## Milestones +- M0: Master spec freeze +- M1: Data and risk plumbing +- M2: Opportunity engine + execution state machine +- M3: Replay/backtest + paper telemetry +- M4: v1 gate review and launch-readiness memo + +## Change Control +Any change to locked decisions requires: +1. Add an entry to Decision History (below) +2. Update Shared Contract Summary if schema-affecting +3. Add a short implementation note in the relevant PR description + +## Non-Goals for v1 +- Live capital auto-trading +- Cross-platform smart order routing +- Full adjudication model ingestion + +## Decision History + +### 2026-02-04 | DEC-001 | Polymarket v1 scope +- Decision: Scope v1 to Polymarket-only, paper mode, with backtest/replay. +- Why: Reduce integration complexity and unblock parallel implementation. + +### 2026-02-04 | DEC-002 | Execution guardrails +- Decision: Hard edge floor 100 bps net, TTL 500 ms default, hard slippage caps. +- Why: Prevent quoted-edge mirage and reduce execution-driven drawdowns. + +### 2026-02-04 | DEC-003 | Resolution dependency boundary +- Decision: Consume only `risk_multiplier` from resolution/adjudication thread in v1. +- Why: Keep arb thread decoupled while allowing risk-aware sizing. diff --git a/src/engine/main.py b/src/engine/main.py index 2ecff7c..60aa866 100644 --- a/src/engine/main.py +++ b/src/engine/main.py @@ -14,7 +14,7 @@ from src.engine import TradingEngine, EngineConfig from src.platforms import KalshiPlatform -from src.strategies import ArbitrageStrategy +from src.strategies import ArbitrageStrategy, PolymarketArbitrageV1Strategy from src.risk import RiskManager, RiskConfig from src.notifications import NotificationManager, load_notification_config @@ -37,6 +37,7 @@ async def main(): parser = argparse.ArgumentParser(description="PR3DICT Trading Engine") parser.add_argument("--mode", choices=["paper", "live"], default="paper") parser.add_argument("--platform", choices=["kalshi", "polymarket", "all"], default="kalshi") + parser.add_argument("--strategy", choices=["arbitrage", "polymarket_arb_v1"], default="arbitrage") args = parser.parse_args() paper_mode = args.mode == "paper" @@ -61,9 +62,18 @@ async def main(): return # Initialize strategies - strategies = [ - ArbitrageStrategy(min_spread=0.025) - ] + if args.strategy == "polymarket_arb_v1": + polymarket_platform = next((p for p in platforms if p.name == "polymarket"), None) + if polymarket_platform is None: + logger.error("polymarket_arb_v1 strategy requires --platform polymarket or --platform all") + return + strategies = [ + PolymarketArbitrageV1Strategy(orderbook_provider=polymarket_platform.get_orderbook) + ] + else: + strategies = [ + ArbitrageStrategy(min_spread=0.025) + ] # Initialize risk manager risk_config = RiskConfig( diff --git a/src/execution/__init__.py b/src/execution/__init__.py index 5609841..91169c0 100644 --- a/src/execution/__init__.py +++ b/src/execution/__init__.py @@ -15,6 +15,7 @@ StrategyVWAPIntegration, get_vwap_gate, ) +from .arb_v1_state_machine import ArbV1State, ArbV1StateMachine, TransitionResult __all__ = [ # Execution @@ -27,4 +28,8 @@ "VWAPEnrichedSignal", "StrategyVWAPIntegration", "get_vwap_gate", + # Arb v1 lifecycle + "ArbV1State", + "ArbV1StateMachine", + "TransitionResult", ] diff --git a/src/execution/arb_v1_state_machine.py b/src/execution/arb_v1_state_machine.py new file mode 100644 index 0000000..7f99393 --- /dev/null +++ b/src/execution/arb_v1_state_machine.py @@ -0,0 +1,60 @@ +""" +Arbitrage v1 execution state machine. + +Keeps lifecycle transitions explicit and testable. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +from typing import Dict, Set + + +class ArbV1State(str, Enum): + DISCOVERED = "DISCOVERED" + PRICED_EXECUTABLE = "PRICED_EXECUTABLE" + RISK_APPROVED = "RISK_APPROVED" + RISK_REJECTED = "RISK_REJECTED" + EXECUTION_SUBMITTED = "EXECUTION_SUBMITTED" + FILLED = "FILLED" + PARTIAL_FILL = "PARTIAL_FILL" + FAILED = "FAILED" + HEDGED_OR_FLATTENED = "HEDGED_OR_FLATTENED" + CLOSED = "CLOSED" + + +@dataclass(frozen=True) +class TransitionResult: + from_state: ArbV1State + to_state: ArbV1State + valid: bool + reason: str = "" + + +class ArbV1StateMachine: + """Strict transition checker for v1 lifecycle.""" + + _allowed: Dict[ArbV1State, Set[ArbV1State]] = { + ArbV1State.DISCOVERED: {ArbV1State.PRICED_EXECUTABLE}, + ArbV1State.PRICED_EXECUTABLE: {ArbV1State.RISK_APPROVED, ArbV1State.RISK_REJECTED}, + ArbV1State.RISK_APPROVED: {ArbV1State.EXECUTION_SUBMITTED}, + ArbV1State.RISK_REJECTED: {ArbV1State.CLOSED}, + ArbV1State.EXECUTION_SUBMITTED: {ArbV1State.FILLED, ArbV1State.PARTIAL_FILL, ArbV1State.FAILED}, + ArbV1State.FILLED: {ArbV1State.CLOSED}, + ArbV1State.PARTIAL_FILL: {ArbV1State.HEDGED_OR_FLATTENED}, + ArbV1State.HEDGED_OR_FLATTENED: {ArbV1State.CLOSED}, + ArbV1State.FAILED: {ArbV1State.CLOSED}, + ArbV1State.CLOSED: set(), + } + + def transition(self, from_state: ArbV1State, to_state: ArbV1State) -> TransitionResult: + allowed = self._allowed.get(from_state, set()) + if to_state in allowed: + return TransitionResult(from_state=from_state, to_state=to_state, valid=True) + return TransitionResult( + from_state=from_state, + to_state=to_state, + valid=False, + reason=f"invalid transition {from_state.value}->{to_state.value}", + ) diff --git a/src/strategies/__init__.py b/src/strategies/__init__.py index 5e5c645..f66cc17 100644 --- a/src/strategies/__init__.py +++ b/src/strategies/__init__.py @@ -1,12 +1,35 @@ # Trading Strategies from .base import TradingStrategy, Signal from .arbitrage import ArbitrageStrategy, CrossPlatformArbitrage +from .arb_v1_ports import ComplementPricing, ExecutablePricerPort, RiskGatePort +from .arbitrage_v1_plumbing import ( + ArbV1RiskConfig, + ArbV1RiskGate, + ExecutablePrice, + OpportunityV1, + OrderbookExecutablePricer, + RiskAction, + RiskDecision, + RiskReason, + estimate_executable_price, + is_snapshot_stale, + snapshot_age_ms, +) +from .polymarket_arb_v1 import PolymarketArbV1Config, PolymarketArbitrageV1Strategy from .market_making import MarketMakingStrategy, MarketMakingConfig, InventoryTracker from .market_rebalancing import MarketRebalancingStrategy, RebalancingConfig, RebalancingOpportunity __all__ = [ "TradingStrategy", "Signal", "ArbitrageStrategy", "CrossPlatformArbitrage", + "PolymarketArbV1Config", "PolymarketArbitrageV1Strategy", + "ExecutablePricerPort", "RiskGatePort", + "ComplementPricing", + "ArbV1RiskConfig", "ArbV1RiskGate", + "OrderbookExecutablePricer", + "ExecutablePrice", "OpportunityV1", + "RiskAction", "RiskDecision", "RiskReason", + "estimate_executable_price", "is_snapshot_stale", "snapshot_age_ms", "MarketMakingStrategy", "MarketMakingConfig", "InventoryTracker", "MarketRebalancingStrategy", "RebalancingConfig", "RebalancingOpportunity" ] diff --git a/src/strategies/arb_v1_ports.py b/src/strategies/arb_v1_ports.py new file mode 100644 index 0000000..94d88af --- /dev/null +++ b/src/strategies/arb_v1_ports.py @@ -0,0 +1,33 @@ +""" +Ports for Polymarket arb v1 components. + +These interfaces keep compute-heavy logic swappable (e.g., Rust implementation later) +while strategy orchestration remains in Python. +""" + +from __future__ import annotations + +from typing import Protocol + +from ..platforms.base import OrderBook +from .arbitrage_v1_plumbing import ComplementPricing, OpportunityV1, RiskDecision + + +class ExecutablePricerPort(Protocol): + """Boundary for depth-based pricing.""" + + def estimate_complement(self, orderbook: OrderBook, quantity: int) -> ComplementPricing: + """Estimate executable YES+NO cost for a target quantity.""" + + +class RiskGatePort(Protocol): + """Boundary for risk decisioning.""" + + def evaluate( + self, + opportunity: OpportunityV1, + requested_size_contracts: int, + predicted_slippage_bps: int, + snapshot_age_ms_value: int, + ) -> RiskDecision: + """Evaluate opportunity and return normalized decision.""" diff --git a/src/strategies/arbitrage_v1_plumbing.py b/src/strategies/arbitrage_v1_plumbing.py new file mode 100644 index 0000000..dd5c50b --- /dev/null +++ b/src/strategies/arbitrage_v1_plumbing.py @@ -0,0 +1,326 @@ +""" +PR3DICT: Polymarket Arbitrage v1 Plumbing + +M1 scope: +- executable pricing from order book depth +- snapshot freshness checks +- lightweight risk gate with reason codes +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime, timezone +from decimal import Decimal +from enum import Enum +from typing import List, Optional + +from ..platforms.base import OrderBook, OrderSide + + +class RiskAction(str, Enum): + """Risk decision action.""" + + ALLOW = "allow" + ADJUST = "adjust" + DENY = "deny" + + +class RiskReason(str, Enum): + """Normalized reason codes for arb v1 decisions.""" + + RISK_OK = "RISK_OK" + RISK_DAILY_LOSS = "RISK_DAILY_LOSS" + RISK_EXPOSURE = "RISK_EXPOSURE" + RISK_SLIPPAGE = "RISK_SLIPPAGE" + RISK_STALE = "RISK_STALE" + RISK_EDGE = "RISK_EDGE" + RISK_UNKNOWN = "RISK_UNKNOWN" + + +@dataclass(frozen=True) +class OpportunityV1: + """Minimal contract for opportunity evaluation in v1.""" + + opportunity_id: str + market_id: str + side: OrderSide + edge_bps_net: int + confidence: float + ttl_ms: int + created_at_ms: int + expires_at_ms: int + risk_multiplier: Decimal = Decimal("1.0") + reasons: List[str] = field(default_factory=list) + + +@dataclass(frozen=True) +class ExecutablePrice: + """Executable pricing estimate from depth.""" + + market_id: str + side: OrderSide + target_quantity: int + quoted_price: Decimal + executable_price: Decimal + slippage_bps: int + filled_quantity: int + depth_levels_used: int + snapshot_age_ms: int + is_stale: bool + liquidity_sufficient: bool + + +@dataclass(frozen=True) +class ComplementPricing: + """Executable pricing for binary complement validation.""" + + yes_buy: ExecutablePrice + no_buy: ExecutablePrice + total_cost: float + predicted_slippage_bps: int + + +@dataclass(frozen=True) +class RiskDecision: + """Decision returned by arb v1 risk gate.""" + + opportunity_id: str + action: RiskAction + size_adjusted_contracts: int + reason_code: RiskReason + details: str = "" + + +@dataclass +class ArbV1RiskConfig: + """Risk thresholds from master spec.""" + + min_edge_bps_net_hard: int = 100 + max_snapshot_age_ms: int = 750 + max_slippage_bps_hard_per_leg: int = 100 + max_position_contracts: int = 100 + + +def epoch_ms(now: Optional[datetime] = None) -> int: + """Return unix epoch in milliseconds.""" + moment = now or datetime.now(timezone.utc) + return int(moment.timestamp() * 1000) + + +def snapshot_age_ms(orderbook: OrderBook, now: Optional[datetime] = None) -> int: + """Compute snapshot age in milliseconds.""" + moment = now or datetime.now(timezone.utc) + return max(0, int((moment - orderbook.timestamp).total_seconds() * 1000)) + + +def is_snapshot_stale(orderbook: OrderBook, max_age_ms: int = 750, now: Optional[datetime] = None) -> bool: + """Snapshot staleness check.""" + return snapshot_age_ms(orderbook, now=now) > max_age_ms + + +def estimate_executable_price( + orderbook: OrderBook, + side: OrderSide, + quantity: int, + quoted_price: Optional[Decimal] = None, + stale_after_ms: int = 750, + now: Optional[datetime] = None, +) -> ExecutablePrice: + """ + Estimate executable average price for a requested quantity. + + Mapping follows current strategy conventions: + - YES consumes asks (buy side) + - NO consumes bids (sell side equivalent for NO exposure) + """ + if quantity <= 0: + raise ValueError("quantity must be > 0") + + age_ms = snapshot_age_ms(orderbook, now=now) + stale = age_ms > stale_after_ms + + raw_levels = orderbook.asks if side == OrderSide.YES else orderbook.bids + levels = sorted(raw_levels, key=lambda x: x[0], reverse=side == OrderSide.NO) + + if not levels: + return ExecutablePrice( + market_id=orderbook.market_id, + side=side, + target_quantity=quantity, + quoted_price=Decimal("0"), + executable_price=Decimal("0"), + slippage_bps=0, + filled_quantity=0, + depth_levels_used=0, + snapshot_age_ms=age_ms, + is_stale=stale, + liquidity_sufficient=False, + ) + + reference_price = quoted_price if quoted_price is not None else levels[0][0] + + remaining = quantity + total_cost = Decimal("0") + filled = 0 + depth_used = 0 + + for price, size in levels: + if remaining <= 0: + break + fill_qty = min(remaining, size) + total_cost += price * fill_qty + filled += fill_qty + remaining -= fill_qty + depth_used += 1 + + executable = total_cost / filled if filled > 0 else Decimal("0") + if reference_price > 0 and executable > 0: + slippage_bps = int(abs((executable - reference_price) / reference_price) * 10000) + else: + slippage_bps = 0 + + return ExecutablePrice( + market_id=orderbook.market_id, + side=side, + target_quantity=quantity, + quoted_price=reference_price, + executable_price=executable, + slippage_bps=slippage_bps, + filled_quantity=filled, + depth_levels_used=depth_used, + snapshot_age_ms=age_ms, + is_stale=stale, + liquidity_sufficient=(filled == quantity), + ) + + +class ArbV1RiskGate: + """Rule-based risk gate for v1 opportunities.""" + + def __init__(self, config: Optional[ArbV1RiskConfig] = None): + self.config = config or ArbV1RiskConfig() + + def evaluate( + self, + opportunity: OpportunityV1, + requested_size_contracts: int, + predicted_slippage_bps: int, + snapshot_age_ms_value: int, + current_time_ms: Optional[int] = None, + ) -> RiskDecision: + """Evaluate opportunity against v1 hard/soft constraints.""" + now_ms = current_time_ms if current_time_ms is not None else epoch_ms() + + if snapshot_age_ms_value > self.config.max_snapshot_age_ms: + return RiskDecision( + opportunity_id=opportunity.opportunity_id, + action=RiskAction.DENY, + size_adjusted_contracts=0, + reason_code=RiskReason.RISK_STALE, + details=f"snapshot_age_ms={snapshot_age_ms_value}", + ) + + if now_ms > opportunity.expires_at_ms: + return RiskDecision( + opportunity_id=opportunity.opportunity_id, + action=RiskAction.DENY, + size_adjusted_contracts=0, + reason_code=RiskReason.RISK_STALE, + details="opportunity expired", + ) + + if opportunity.edge_bps_net < self.config.min_edge_bps_net_hard: + return RiskDecision( + opportunity_id=opportunity.opportunity_id, + action=RiskAction.DENY, + size_adjusted_contracts=0, + reason_code=RiskReason.RISK_EDGE, + details=f"edge_bps_net={opportunity.edge_bps_net}", + ) + + if predicted_slippage_bps > self.config.max_slippage_bps_hard_per_leg: + return RiskDecision( + opportunity_id=opportunity.opportunity_id, + action=RiskAction.DENY, + size_adjusted_contracts=0, + reason_code=RiskReason.RISK_SLIPPAGE, + details=f"predicted_slippage_bps={predicted_slippage_bps}", + ) + + scaled_size = int(Decimal(str(requested_size_contracts)) * opportunity.risk_multiplier) + scaled_size = max(1, scaled_size) + + if scaled_size > self.config.max_position_contracts: + return RiskDecision( + opportunity_id=opportunity.opportunity_id, + action=RiskAction.ADJUST, + size_adjusted_contracts=self.config.max_position_contracts, + reason_code=RiskReason.RISK_EXPOSURE, + details=f"scaled_size={scaled_size}", + ) + + return RiskDecision( + opportunity_id=opportunity.opportunity_id, + action=RiskAction.ALLOW, + size_adjusted_contracts=scaled_size, + reason_code=RiskReason.RISK_OK, + details="all checks passed", + ) + + +class OrderbookExecutablePricer: + """ + Default Python implementation of executable pricing boundary. + + This class is intentionally small and deterministic so it can be replaced + by a Rust implementation behind the same method signature later. + """ + + def __init__(self, stale_after_ms: int = 750): + self.stale_after_ms = stale_after_ms + + def estimate_complement(self, orderbook: OrderBook, quantity: int) -> ComplementPricing: + """ + Estimate binary complement executable cost. + + YES buy cost uses ask-side depth. + NO buy cost is derived from YES sell executable price: NO = 1 - YES_sell. + """ + yes_buy = estimate_executable_price( + orderbook=orderbook, + side=OrderSide.YES, + quantity=quantity, + stale_after_ms=self.stale_after_ms, + ) + yes_sell = estimate_executable_price( + orderbook=orderbook, + side=OrderSide.NO, + quantity=quantity, + stale_after_ms=self.stale_after_ms, + ) + + no_buy_price = Decimal("1.0") - yes_sell.executable_price if yes_sell.executable_price > 0 else Decimal("0") + no_buy = ExecutablePrice( + market_id=yes_sell.market_id, + side=OrderSide.NO, + target_quantity=yes_sell.target_quantity, + quoted_price=(Decimal("1.0") - yes_sell.quoted_price) if yes_sell.quoted_price > 0 else Decimal("0"), + executable_price=no_buy_price, + slippage_bps=yes_sell.slippage_bps, + filled_quantity=yes_sell.filled_quantity, + depth_levels_used=yes_sell.depth_levels_used, + snapshot_age_ms=yes_sell.snapshot_age_ms, + is_stale=yes_sell.is_stale, + liquidity_sufficient=yes_sell.liquidity_sufficient, + ) + + total_cost = float(yes_buy.executable_price + no_buy.executable_price) + predicted_slippage = max(yes_buy.slippage_bps, no_buy.slippage_bps) + + return ComplementPricing( + yes_buy=yes_buy, + no_buy=no_buy, + total_cost=total_cost, + predicted_slippage_bps=predicted_slippage, + ) diff --git a/src/strategies/polymarket_arb_v1.py b/src/strategies/polymarket_arb_v1.py new file mode 100644 index 0000000..eb073dd --- /dev/null +++ b/src/strategies/polymarket_arb_v1.py @@ -0,0 +1,129 @@ +""" +Polymarket Arbitrage v1 opportunity engine. + +Focus: binary complement opportunities using executable depth pricing and +hard risk gates. Designed with ports so pricing/risk internals can be replaced +without changing orchestration code. +""" + +from __future__ import annotations + +import time +from dataclasses import dataclass +from datetime import datetime +from decimal import Decimal +from typing import Awaitable, Callable, List, Optional + +from .arb_v1_ports import ExecutablePricerPort, RiskGatePort +from .arbitrage_v1_plumbing import ( + ArbV1RiskConfig, + ArbV1RiskGate, + OpportunityV1, + OrderbookExecutablePricer, + RiskAction, +) +from .base import Signal, TradingStrategy +from ..platforms.base import Market, OrderBook, OrderSide, Position + + +@dataclass +class PolymarketArbV1Config: + """Config for v1 opportunity generation.""" + + min_liquidity: Decimal = Decimal("1000") + max_time_to_resolution_hours: int = 24 * 30 + probe_quantity_contracts: int = 25 + fee_buffer_bps: int = 10 + + +class PolymarketArbitrageV1Strategy(TradingStrategy): + """Depth-aware Polymarket arb strategy (v1 opportunity engine).""" + + def __init__( + self, + orderbook_provider: Callable[[str], Awaitable[OrderBook]], + config: Optional[PolymarketArbV1Config] = None, + pricer: Optional[ExecutablePricerPort] = None, + risk_gate: Optional[RiskGatePort] = None, + ): + self.orderbook_provider = orderbook_provider + self.config = config or PolymarketArbV1Config() + self.pricer = pricer or OrderbookExecutablePricer(stale_after_ms=750) + self.risk_gate = risk_gate or ArbV1RiskGate(ArbV1RiskConfig()) + + @property + def name(self) -> str: + return "polymarket_arb_v1" + + async def scan_markets(self, markets: List[Market]) -> List[Signal]: + signals: List[Signal] = [] + now = int(time.time() * 1000) + + for market in markets: + if market.platform != "polymarket": + continue + if market.resolved or market.liquidity < self.config.min_liquidity: + continue + + time_to_close_s = (market.close_time - datetime.now(market.close_time.tzinfo)).total_seconds() + if time_to_close_s <= 0 or time_to_close_s > self.config.max_time_to_resolution_hours * 3600: + continue + + try: + orderbook = await self.orderbook_provider(market.id) + except Exception: + continue + + complement = self.pricer.estimate_complement(orderbook, self.config.probe_quantity_contracts) + if ( + complement.yes_buy.is_stale + or complement.no_buy.is_stale + or not complement.yes_buy.liquidity_sufficient + or not complement.no_buy.liquidity_sufficient + ): + continue + + gross_edge_bps = int((1.0 - complement.total_cost) * 10000) + net_edge_bps = gross_edge_bps - self.config.fee_buffer_bps + + opportunity = OpportunityV1( + opportunity_id=f"pm_{market.id}_{now}", + market_id=market.id, + side=OrderSide.YES, + edge_bps_net=net_edge_bps, + confidence=1.0, + ttl_ms=500, + created_at_ms=now, + expires_at_ms=now + 500, + reasons=["binary_complement_executable"], + ) + decision = self.risk_gate.evaluate( + opportunity=opportunity, + requested_size_contracts=self.config.probe_quantity_contracts, + predicted_slippage_bps=complement.predicted_slippage_bps, + snapshot_age_ms_value=max(complement.yes_buy.snapshot_age_ms, complement.no_buy.snapshot_age_ms), + ) + if decision.action == RiskAction.DENY: + continue + + # Engine currently consumes single-leg signals; we annotate paired leg requirements in reason. + signals.append( + Signal( + market_id=market.id, + market=market, + side=OrderSide.YES, + strength=min(1.0, max(0.0, net_edge_bps / 300.0)), + reason=( + f"arb_v1 executable complement edge={net_edge_bps}bps " + f"qty={decision.size_adjusted_contracts} action={decision.action.value} " + f"(paired NO leg required)" + ), + target_price=complement.yes_buy.executable_price, + ) + ) + + return signals + + async def check_exit(self, position: Position, market: Market) -> Optional[Signal]: + # v1 focuses on entry plumbing; existing exit behavior remains unchanged. + return None diff --git a/tests/test_arb_v1_state_machine.py b/tests/test_arb_v1_state_machine.py new file mode 100644 index 0000000..ffbb225 --- /dev/null +++ b/tests/test_arb_v1_state_machine.py @@ -0,0 +1,22 @@ +from src.execution.arb_v1_state_machine import ArbV1State, ArbV1StateMachine + + +def test_valid_state_transition(): + sm = ArbV1StateMachine() + result = sm.transition(ArbV1State.DISCOVERED, ArbV1State.PRICED_EXECUTABLE) + assert result.valid is True + + +def test_invalid_state_transition(): + sm = ArbV1StateMachine() + result = sm.transition(ArbV1State.DISCOVERED, ArbV1State.FILLED) + assert result.valid is False + assert "invalid transition" in result.reason + + +def test_partial_fill_must_flatten_before_close(): + sm = ArbV1StateMachine() + bad = sm.transition(ArbV1State.PARTIAL_FILL, ArbV1State.CLOSED) + good = sm.transition(ArbV1State.PARTIAL_FILL, ArbV1State.HEDGED_OR_FLATTENED) + assert bad.valid is False + assert good.valid is True diff --git a/tests/test_arbitrage_v1_plumbing.py b/tests/test_arbitrage_v1_plumbing.py new file mode 100644 index 0000000..5040039 --- /dev/null +++ b/tests/test_arbitrage_v1_plumbing.py @@ -0,0 +1,110 @@ +from datetime import datetime, timedelta, timezone +from decimal import Decimal + +from src.platforms.base import OrderBook, OrderSide +from src.strategies.arbitrage_v1_plumbing import ( + ArbV1RiskConfig, + ArbV1RiskGate, + OpportunityV1, + OrderbookExecutablePricer, + RiskAction, + RiskReason, + estimate_executable_price, + snapshot_age_ms, +) + + +def _book(age_ms: int = 0) -> OrderBook: + ts = datetime.now(timezone.utc) - timedelta(milliseconds=age_ms) + return OrderBook( + market_id="m1", + bids=[(Decimal("0.48"), 100), (Decimal("0.47"), 200)], + asks=[(Decimal("0.52"), 100), (Decimal("0.54"), 200)], + timestamp=ts, + ) + + +def test_estimate_executable_price_yes_side_depth(): + est = estimate_executable_price(_book(), OrderSide.YES, quantity=150) + # 100 @ 0.52 + 50 @ 0.54 => 0.5266... + assert est.liquidity_sufficient is True + assert est.filled_quantity == 150 + assert est.depth_levels_used == 2 + assert est.executable_price > Decimal("0.526") + assert est.executable_price < Decimal("0.527") + assert est.slippage_bps > 100 + + +def test_estimate_executable_price_stale_and_insufficient(): + est = estimate_executable_price(_book(age_ms=1500), OrderSide.YES, quantity=500, stale_after_ms=750) + assert est.is_stale is True + assert est.liquidity_sufficient is False + assert est.filled_quantity == 300 + + +def test_snapshot_age_ms_non_negative(): + age = snapshot_age_ms(_book(age_ms=250)) + assert age >= 200 + + +def test_risk_gate_deny_edge(): + gate = ArbV1RiskGate(ArbV1RiskConfig(min_edge_bps_net_hard=100)) + opp = OpportunityV1( + opportunity_id="o1", + market_id="m1", + side=OrderSide.YES, + edge_bps_net=90, + confidence=0.7, + ttl_ms=500, + created_at_ms=1, + expires_at_ms=9999999999999, + ) + decision = gate.evaluate(opp, requested_size_contracts=10, predicted_slippage_bps=20, snapshot_age_ms_value=100) + assert decision.action == RiskAction.DENY + assert decision.reason_code == RiskReason.RISK_EDGE + + +def test_risk_gate_adjust_exposure(): + gate = ArbV1RiskGate(ArbV1RiskConfig(max_position_contracts=50)) + opp = OpportunityV1( + opportunity_id="o2", + market_id="m1", + side=OrderSide.YES, + edge_bps_net=130, + confidence=0.9, + ttl_ms=500, + created_at_ms=1, + expires_at_ms=9999999999999, + risk_multiplier=Decimal("1.5"), + ) + decision = gate.evaluate(opp, requested_size_contracts=40, predicted_slippage_bps=50, snapshot_age_ms_value=100) + assert decision.action == RiskAction.ADJUST + assert decision.reason_code == RiskReason.RISK_EXPOSURE + assert decision.size_adjusted_contracts == 50 + + +def test_risk_gate_allow(): + gate = ArbV1RiskGate() + opp = OpportunityV1( + opportunity_id="o3", + market_id="m1", + side=OrderSide.NO, + edge_bps_net=150, + confidence=0.8, + ttl_ms=500, + created_at_ms=1, + expires_at_ms=9999999999999, + ) + decision = gate.evaluate(opp, requested_size_contracts=20, predicted_slippage_bps=30, snapshot_age_ms_value=100) + assert decision.action == RiskAction.ALLOW + assert decision.reason_code == RiskReason.RISK_OK + assert decision.size_adjusted_contracts == 20 + + +def test_orderbook_executable_pricer_complement_cost(): + pricer = OrderbookExecutablePricer(stale_after_ms=750) + result = pricer.estimate_complement(_book(), quantity=50) + assert result.yes_buy.liquidity_sufficient is True + assert result.no_buy.liquidity_sufficient is True + assert result.total_cost > 0 + assert result.total_cost < 1.1 From 369abdd5b3856e4e0fa6827758748dce2a528725 Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Thu, 5 Feb 2026 10:04:57 -0600 Subject: [PATCH 11/15] Wire Tier1/Tier2 persistence to analysis run contracts --- docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md | 15 ++ docs/workstreams/WT-B_TIER1.md | 6 + docs/workstreams/WT-C_TIER2.md | 6 + src/strategies/dispute/__init__.py | 4 + src/strategies/dispute/pipeline.py | 158 +++++++++++++++++++++ test_dispute_pipeline_persistence.py | 141 ++++++++++++++++++ 6 files changed, 330 insertions(+) create mode 100644 src/strategies/dispute/pipeline.py create mode 100644 test_dispute_pipeline_persistence.py diff --git a/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md b/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md index ccc240f..2c788e2 100644 --- a/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md +++ b/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md @@ -10,6 +10,21 @@ Out of scope: - Arbitrage, market making, or other strategy rewrites - Production real-money auto-execution +## Current Status (2026-02-05) + +- Completed and merged: + - WT-A schema + data contracts + - WT-B Tier 1 contract scaffold + - WT-C Tier 2 contract scaffold + - WT-D signal/sizing primitives + - WT-E evaluation helpers +- New baseline integration step added: + - Tier 1/Tier 2 persistence wiring to WT-A contracts via `src/strategies/dispute/pipeline.py` + - Persistence integration tests in `test_dispute_pipeline_persistence.py` +- Next focus: + - connect live Tier 1/Tier 2 model runners to `persist_tier1_result` / `persist_tier2_result` + - add signal emission persistence from `signal_engine` into `signals` + ## Workstream Layout ## WT-A: Data Contracts + Schema diff --git a/docs/workstreams/WT-B_TIER1.md b/docs/workstreams/WT-B_TIER1.md index 605da28..79ac55b 100644 --- a/docs/workstreams/WT-B_TIER1.md +++ b/docs/workstreams/WT-B_TIER1.md @@ -14,3 +14,9 @@ Branch: `codex/resolution-tier1` - Malformed model output does not crash pipeline. - PASS/FLAG behavior tested. - Prompt/model/version metadata always stored. + +## Progress + +- Contract parser/validator implemented and merged. +- Contract tests implemented and passing. +- Baseline integration now persists Tier 1 outputs through `persist_tier1_result`. diff --git a/docs/workstreams/WT-C_TIER2.md b/docs/workstreams/WT-C_TIER2.md index b122a11..da604bd 100644 --- a/docs/workstreams/WT-C_TIER2.md +++ b/docs/workstreams/WT-C_TIER2.md @@ -14,3 +14,9 @@ Branch: `codex/resolution-tier2` - Valid outputs persist. - Invalid outputs are rejected with reason logging. - Decision path taxonomy covered by tests. + +## Progress + +- Contract parser/validator implemented and merged. +- Contract tests implemented and passing. +- Baseline integration now persists Tier 2 outputs through `persist_tier2_result`. diff --git a/src/strategies/dispute/__init__.py b/src/strategies/dispute/__init__.py index 6eddb09..26b1610 100644 --- a/src/strategies/dispute/__init__.py +++ b/src/strategies/dispute/__init__.py @@ -1 +1,5 @@ """Dispute strategy modules.""" + +from .pipeline import persist_tier1_result, persist_tier2_result + +__all__ = ["persist_tier1_result", "persist_tier2_result"] diff --git a/src/strategies/dispute/pipeline.py b/src/strategies/dispute/pipeline.py new file mode 100644 index 0000000..1cb5268 --- /dev/null +++ b/src/strategies/dispute/pipeline.py @@ -0,0 +1,158 @@ +"""Persistence wiring for Tier 1/Tier 2 contract outputs.""" +from dataclasses import dataclass +from typing import Any, Dict, Optional +from uuid import uuid4 + +from ...data.database import MarketDatabase +from .tier1 import tier1_from_dict, validate_tier1_output +from .tier2 import tier2_from_dict, validate_tier2_output + + +@dataclass +class PersistResult: + stage: str + status: str + run_db_id: int + output_db_id: Optional[int] + run_id: str + error_message: Optional[str] = None + + +def _derive_run_fields(payload: Dict[str, Any], stage: str) -> tuple[str, str, str]: + run_id = str(payload.get("run_id") or f"{stage}-{uuid4().hex[:12]}") + model = str(payload.get("model") or "unknown") + prompt_version = str(payload.get("prompt_version") or "unknown") + return run_id, model, prompt_version + + +async def persist_tier1_result( + db: MarketDatabase, + market_id: str, + payload: Dict[str, Any], + strategy_version: str, + latency_ms: Optional[int] = None, + token_cost_usd: Optional[float] = None, +) -> PersistResult: + """Validate and persist Tier 1 output with deterministic run metadata.""" + run_id, model, prompt_version = _derive_run_fields(payload, "tier1") + try: + output = tier1_from_dict(payload) + validate_tier1_output(output) + except (KeyError, TypeError, ValueError) as exc: + run_db_id = await db.save_analysis_run( + market_id=market_id, + stage="tier1", + run_id=run_id, + model=model, + prompt_version=prompt_version, + strategy_version=strategy_version, + status="invalid", + latency_ms=latency_ms, + token_cost_usd=token_cost_usd, + error_message=str(exc), + ) + return PersistResult( + stage="tier1", + status="invalid", + run_db_id=run_db_id, + output_db_id=None, + run_id=run_id, + error_message=str(exc), + ) + + run_db_id = await db.save_analysis_run( + market_id=market_id, + stage="tier1", + run_id=output.run_id, + model=output.model, + prompt_version=output.prompt_version, + strategy_version=strategy_version, + status="success", + latency_ms=latency_ms, + token_cost_usd=token_cost_usd, + ) + output_db_id = await db.save_tier1_output( + analysis_run_id=run_db_id, + market_id=market_id, + screen_decision=output.screen_decision, + ambiguity_score=output.ambiguity_score, + dispute_prob_prior=output.dispute_prob_prior, + top_risks=output.top_risks, + rationale_short=output.rationale_short, + ) + return PersistResult( + stage="tier1", + status="success", + run_db_id=run_db_id, + output_db_id=output_db_id, + run_id=output.run_id, + ) + + +async def persist_tier2_result( + db: MarketDatabase, + market_id: str, + payload: Dict[str, Any], + strategy_version: str, + latency_ms: Optional[int] = None, + token_cost_usd: Optional[float] = None, +) -> PersistResult: + """Validate and persist Tier 2 output with deterministic run metadata.""" + run_id, model, prompt_version = _derive_run_fields(payload, "tier2") + try: + output = tier2_from_dict(payload) + validate_tier2_output(output) + except (KeyError, TypeError, ValueError) as exc: + run_db_id = await db.save_analysis_run( + market_id=market_id, + stage="tier2", + run_id=run_id, + model=model, + prompt_version=prompt_version, + strategy_version=strategy_version, + status="invalid", + latency_ms=latency_ms, + token_cost_usd=token_cost_usd, + error_message=str(exc), + ) + return PersistResult( + stage="tier2", + status="invalid", + run_db_id=run_db_id, + output_db_id=None, + run_id=run_id, + error_message=str(exc), + ) + + run_db_id = await db.save_analysis_run( + market_id=market_id, + stage="tier2", + run_id=output.run_id, + model=output.model, + prompt_version=output.prompt_version, + strategy_version=strategy_version, + status="success", + latency_ms=latency_ms, + token_cost_usd=token_cost_usd, + ) + output_db_id = await db.save_tier2_output( + analysis_run_id=run_db_id, + market_id=market_id, + p_dispute=output.p_dispute, + p_yes_final=output.p_yes_final, + p_no_final=output.p_no_final, + p_invalid_final=output.p_invalid_final, + confidence=output.confidence, + resolution_source_risk=output.resolution_source_risk, + edge_cases=output.edge_cases, + decision_path=output.decision_path, + no_trade_reason=output.no_trade_reason, + assumptions=output.assumptions, + ) + return PersistResult( + stage="tier2", + status="success", + run_db_id=run_db_id, + output_db_id=output_db_id, + run_id=output.run_id, + ) diff --git a/test_dispute_pipeline_persistence.py b/test_dispute_pipeline_persistence.py new file mode 100644 index 0000000..ec8125e --- /dev/null +++ b/test_dispute_pipeline_persistence.py @@ -0,0 +1,141 @@ +from pathlib import Path + +import pytest + +from src.data.database import MarketDatabase +from src.strategies.dispute.pipeline import persist_tier1_result, persist_tier2_result + + +async def _seed_market(db: MarketDatabase, market_id: str = "mkt-pipeline") -> None: + await db.upsert_market( + { + "id": market_id, + "conditionId": market_id, + "question": "Will event X happen?", + "description": "test", + "resolutionSource": "https://example.com", + "slug": "event-x", + "outcomePrices": ["0.45", "0.55"], + "volumeNum": 1000, + "liquidityNum": 5000, + "endDate": "2026-12-31T00:00:00Z", + "createdAt": "2026-01-01T00:00:00Z", + "umaResolutionStatus": None, + "umaBond": None, + "umaReward": None, + } + ) + + +@pytest.mark.asyncio +async def test_persist_tier1_success(tmp_path: Path): + db = MarketDatabase(db_path=tmp_path / "pipeline.db") + await db.connect() + await _seed_market(db) + + payload = { + "screen_decision": "FLAG", + "ambiguity_score": 0.62, + "dispute_prob_prior": 0.41, + "top_risks": ["source inconsistency"], + "rationale_short": "Potential interpretation ambiguity.", + "prompt_version": "t1.v1", + "model": "gpt-4o-mini", + "run_id": "tier1-run-1", + } + result = await persist_tier1_result(db, "mkt-pipeline", payload, "sv1") + assert result.status == "success" + assert result.output_db_id is not None + + async with db._connection.execute("SELECT COUNT(*) FROM analysis_outputs_t1") as cursor: + count = (await cursor.fetchone())[0] + assert count == 1 + await db.close() + + +@pytest.mark.asyncio +async def test_persist_tier1_invalid_payload(tmp_path: Path): + db = MarketDatabase(db_path=tmp_path / "pipeline.db") + await db.connect() + await _seed_market(db) + + payload = { + "screen_decision": "MAYBE", + "ambiguity_score": 0.62, + "dispute_prob_prior": 0.41, + "top_risks": [], + "rationale_short": "bad decision", + "prompt_version": "t1.v1", + "model": "gpt-4o-mini", + "run_id": "tier1-run-invalid", + } + result = await persist_tier1_result(db, "mkt-pipeline", payload, "sv1") + assert result.status == "invalid" + assert result.output_db_id is None + + async with db._connection.execute("SELECT COUNT(*) FROM analysis_outputs_t1") as cursor: + count = (await cursor.fetchone())[0] + assert count == 0 + await db.close() + + +@pytest.mark.asyncio +async def test_persist_tier2_success(tmp_path: Path): + db = MarketDatabase(db_path=tmp_path / "pipeline.db") + await db.connect() + await _seed_market(db) + + payload = { + "p_dispute": 0.35, + "p_yes_final": 0.50, + "p_no_final": 0.45, + "p_invalid_final": 0.05, + "confidence": 0.73, + "resolution_source_risk": "medium", + "edge_cases": ["timezone boundary"], + "decision_path": "pre_dispute", + "no_trade_reason": None, + "assumptions": ["source remains online"], + "prompt_version": "t2.v1", + "model": "gpt-5", + "run_id": "tier2-run-1", + } + result = await persist_tier2_result(db, "mkt-pipeline", payload, "sv1") + assert result.status == "success" + assert result.output_db_id is not None + + async with db._connection.execute("SELECT COUNT(*) FROM analysis_outputs_t2") as cursor: + count = (await cursor.fetchone())[0] + assert count == 1 + await db.close() + + +@pytest.mark.asyncio +async def test_persist_tier2_invalid_payload(tmp_path: Path): + db = MarketDatabase(db_path=tmp_path / "pipeline.db") + await db.connect() + await _seed_market(db) + + payload = { + "p_dispute": 0.35, + "p_yes_final": 0.50, + "p_no_final": 0.45, + "p_invalid_final": 0.05, + "confidence": 0.73, + "resolution_source_risk": "medium", + "edge_cases": [], + "decision_path": "no_trade", + "no_trade_reason": None, + "assumptions": [], + "prompt_version": "t2.v1", + "model": "gpt-5", + "run_id": "tier2-run-invalid", + } + result = await persist_tier2_result(db, "mkt-pipeline", payload, "sv1") + assert result.status == "invalid" + assert result.output_db_id is None + + async with db._connection.execute("SELECT COUNT(*) FROM analysis_outputs_t2") as cursor: + count = (await cursor.fetchone())[0] + assert count == 0 + await db.close() From f389a692844ccdaa071fba1ec9038bdf1a9cd371 Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Thu, 5 Feb 2026 10:27:51 -0600 Subject: [PATCH 12/15] Normalize small Tier2 probability drift before persistence --- docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md | 3 + src/strategies/dispute/pipeline.py | 17 +++++- test_dispute_pipeline_persistence.py | 68 ++++++++++++++++++++++ 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md b/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md index 2c788e2..838bcb5 100644 --- a/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md +++ b/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md @@ -21,6 +21,9 @@ Out of scope: - New baseline integration step added: - Tier 1/Tier 2 persistence wiring to WT-A contracts via `src/strategies/dispute/pipeline.py` - Persistence integration tests in `test_dispute_pipeline_persistence.py` + - Tier 2 probability simplex policy: + - normalize when `abs(sum-1.0) <= 0.01` + - reject as invalid when drift exceeds `0.01` - Next focus: - connect live Tier 1/Tier 2 model runners to `persist_tier1_result` / `persist_tier2_result` - add signal emission persistence from `signal_engine` into `signals` diff --git a/src/strategies/dispute/pipeline.py b/src/strategies/dispute/pipeline.py index 1cb5268..9faa29b 100644 --- a/src/strategies/dispute/pipeline.py +++ b/src/strategies/dispute/pipeline.py @@ -5,7 +5,9 @@ from ...data.database import MarketDatabase from .tier1 import tier1_from_dict, validate_tier1_output -from .tier2 import tier2_from_dict, validate_tier2_output +from .tier2 import normalize_final_probabilities, tier2_from_dict, validate_tier2_output + +PROBABILITY_SUM_EPSILON = 0.01 @dataclass @@ -16,6 +18,7 @@ class PersistResult: output_db_id: Optional[int] run_id: str error_message: Optional[str] = None + normalization_applied: bool = False def _derive_run_fields(payload: Dict[str, Any], stage: str) -> tuple[str, str, str]: @@ -99,9 +102,17 @@ async def persist_tier2_result( ) -> PersistResult: """Validate and persist Tier 2 output with deterministic run metadata.""" run_id, model, prompt_version = _derive_run_fields(payload, "tier2") + normalization_applied = False try: output = tier2_from_dict(payload) - validate_tier2_output(output) + final_sum = output.p_yes_final + output.p_no_final + output.p_invalid_final + drift = abs(final_sum - 1.0) + if drift > PROBABILITY_SUM_EPSILON: + raise ValueError(f"final probability sum drift too large: {drift:.6f}") + if drift > 0.0: + output = normalize_final_probabilities(output) + normalization_applied = True + validate_tier2_output(output, tolerance=1e-9) except (KeyError, TypeError, ValueError) as exc: run_db_id = await db.save_analysis_run( market_id=market_id, @@ -122,6 +133,7 @@ async def persist_tier2_result( output_db_id=None, run_id=run_id, error_message=str(exc), + normalization_applied=False, ) run_db_id = await db.save_analysis_run( @@ -155,4 +167,5 @@ async def persist_tier2_result( run_db_id=run_db_id, output_db_id=output_db_id, run_id=output.run_id, + normalization_applied=normalization_applied, ) diff --git a/test_dispute_pipeline_persistence.py b/test_dispute_pipeline_persistence.py index ec8125e..9a613ad 100644 --- a/test_dispute_pipeline_persistence.py +++ b/test_dispute_pipeline_persistence.py @@ -103,6 +103,7 @@ async def test_persist_tier2_success(tmp_path: Path): result = await persist_tier2_result(db, "mkt-pipeline", payload, "sv1") assert result.status == "success" assert result.output_db_id is not None + assert result.normalization_applied is False async with db._connection.execute("SELECT COUNT(*) FROM analysis_outputs_t2") as cursor: count = (await cursor.fetchone())[0] @@ -110,6 +111,73 @@ async def test_persist_tier2_success(tmp_path: Path): await db.close() +@pytest.mark.asyncio +async def test_persist_tier2_small_drift_is_normalized(tmp_path: Path): + db = MarketDatabase(db_path=tmp_path / "pipeline.db") + await db.connect() + await _seed_market(db) + + payload = { + "p_dispute": 0.35, + "p_yes_final": 0.499, + "p_no_final": 0.451, + "p_invalid_final": 0.049, # sum=0.999 -> normalize + "confidence": 0.73, + "resolution_source_risk": "medium", + "edge_cases": ["timezone boundary"], + "decision_path": "pre_dispute", + "no_trade_reason": None, + "assumptions": ["source remains online"], + "prompt_version": "t2.v1", + "model": "gpt-5", + "run_id": "tier2-run-normalized", + } + result = await persist_tier2_result(db, "mkt-pipeline", payload, "sv1") + assert result.status == "success" + assert result.output_db_id is not None + assert result.normalization_applied is True + + async with db._connection.execute( + "SELECT p_yes_final, p_no_final, p_invalid_final FROM analysis_outputs_t2 WHERE id = ?", + (result.output_db_id,), + ) as cursor: + row = await cursor.fetchone() + assert row is not None + assert pytest.approx(sum(row), abs=1e-9) == 1.0 + await db.close() + + +@pytest.mark.asyncio +async def test_persist_tier2_large_drift_is_invalid(tmp_path: Path): + db = MarketDatabase(db_path=tmp_path / "pipeline.db") + await db.connect() + await _seed_market(db) + + payload = { + "p_dispute": 0.35, + "p_yes_final": 0.50, + "p_no_final": 0.45, + "p_invalid_final": 0.10, # sum=1.05 -> invalid + "confidence": 0.73, + "resolution_source_risk": "medium", + "edge_cases": [], + "decision_path": "pre_dispute", + "no_trade_reason": None, + "assumptions": [], + "prompt_version": "t2.v1", + "model": "gpt-5", + "run_id": "tier2-run-large-drift", + } + result = await persist_tier2_result(db, "mkt-pipeline", payload, "sv1") + assert result.status == "invalid" + assert result.output_db_id is None + + async with db._connection.execute("SELECT COUNT(*) FROM analysis_outputs_t2") as cursor: + count = (await cursor.fetchone())[0] + assert count == 0 + await db.close() + + @pytest.mark.asyncio async def test_persist_tier2_invalid_payload(tmp_path: Path): db = MarketDatabase(db_path=tmp_path / "pipeline.db") From 0f2b49187e3d92fc7a109f0697d5c8913b4ad80d Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Thu, 5 Feb 2026 21:34:07 -0600 Subject: [PATCH 13/15] Persist normalization metadata and add stop-loss signal params --- docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md | 2 + docs/workstreams/WT-D_SIGNALS.md | 6 +++ src/data/database.py | 31 +++++++++-- src/strategies/dispute/pipeline.py | 1 + src/strategies/dispute/signal_engine.py | 61 ++++++++++++++++++++-- test_dispute_pipeline_persistence.py | 14 +++-- test_resolution_schema.py | 4 ++ test_signal_engine.py | 20 ++++++- 8 files changed, 124 insertions(+), 15 deletions(-) diff --git a/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md b/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md index 838bcb5..eb220ad 100644 --- a/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md +++ b/docs/RESOLUTION_ADVANTAGE_WORKTREE_PLAN.md @@ -113,6 +113,7 @@ Consumes: Delivers: - Actions: `ENTER_YES`, `ENTER_NO`, `EXIT`, `HOLD`, `NO_TRADE` - Edge math after fees/slippage haircuts +- Stop-loss parameterized trigger (`STOP_LOSS_PCT`) attached to entry signals - Fractional Kelly sizing with confidence/invalid/liquidity discounts - Cap enforcement: - `MAX_POSITION_PCT` @@ -192,5 +193,6 @@ Each stream merges via repo policy: - `INITIATION_CONFIDENCE=0.75` - `KELLY_FRACTION=0.25` - `MAX_POSITION_PCT=0.05` +- `STOP_LOSS_PCT=0.15` All values remain config-driven and easy to tune after backtesting. diff --git a/docs/workstreams/WT-D_SIGNALS.md b/docs/workstreams/WT-D_SIGNALS.md index 5792be6..c48b4ef 100644 --- a/docs/workstreams/WT-D_SIGNALS.md +++ b/docs/workstreams/WT-D_SIGNALS.md @@ -15,3 +15,9 @@ Branch: `codex/resolution-signals` - Emits `ENTER_YES`, `ENTER_NO`, `EXIT`, `HOLD`, `NO_TRADE`. - Every signal has reason code + snapshot context. - Cap enforcement tests pass. + +## Progress + +- Signal action contract and sizing scaffolding implemented and merged. +- Stop-loss parameter support added (`stop_loss_pct`, `stop_loss_price` on signal decisions). +- Signal persistence contract extended with stop-loss fields. diff --git a/src/data/database.py b/src/data/database.py index c03016a..c51aa28 100644 --- a/src/data/database.py +++ b/src/data/database.py @@ -156,6 +156,7 @@ async def _create_tables(self) -> None: decision_path TEXT NOT NULL, -- pre_dispute, post_proposal, active_dispute, initiate_dispute, no_trade no_trade_reason TEXT, assumptions TEXT NOT NULL, -- JSON array + normalization_applied INTEGER NOT NULL DEFAULT 0, created_at TEXT NOT NULL, FOREIGN KEY (analysis_run_id) REFERENCES analysis_runs(id), FOREIGN KEY (market_id) REFERENCES markets(id) @@ -175,6 +176,8 @@ async def _create_tables(self) -> None: yes_price_snapshot REAL, no_price_snapshot REAL, liquidity_snapshot REAL, + stop_loss_price_snapshot REAL, + stop_loss_pct REAL, reason_code TEXT NOT NULL, reason_detail TEXT, strategy_version TEXT NOT NULL, @@ -224,7 +227,20 @@ async def _create_tables(self) -> None: CREATE INDEX IF NOT EXISTS idx_market_outcomes_resolution ON market_outcomes(final_resolution); CREATE INDEX IF NOT EXISTS idx_calibration_model_bucket ON calibration_metrics(model, prompt_version, time_bucket); """) + # Backward-compatible additive migrations for already-created local DBs. + await self._ensure_column("analysis_outputs_t2", "normalization_applied", "INTEGER NOT NULL DEFAULT 0") + await self._ensure_column("signals", "stop_loss_price_snapshot", "REAL") + await self._ensure_column("signals", "stop_loss_pct", "REAL") await self._connection.commit() + + async def _ensure_column(self, table: str, column: str, declaration: str) -> None: + """Add a column if missing; no-op when already present.""" + async with self._connection.execute(f"PRAGMA table_info({table})") as cursor: + rows = await cursor.fetchall() + existing = {row[1] for row in rows} + if column in existing: + return + await self._connection.execute(f"ALTER TABLE {table} ADD COLUMN {column} {declaration}") # --- Market Operations --- @@ -457,7 +473,8 @@ async def save_tier2_output( edge_cases: List[str], decision_path: str, no_trade_reason: Optional[str], - assumptions: List[str] + assumptions: List[str], + normalization_applied: bool = False ) -> int: """Persist validated Tier 2 output contract.""" now = datetime.now(timezone.utc).isoformat() @@ -465,12 +482,12 @@ async def save_tier2_output( INSERT INTO analysis_outputs_t2 ( analysis_run_id, market_id, p_dispute, p_yes_final, p_no_final, p_invalid_final, confidence, resolution_source_risk, edge_cases, - decision_path, no_trade_reason, assumptions, created_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + decision_path, no_trade_reason, assumptions, normalization_applied, created_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( analysis_run_id, market_id, p_dispute, p_yes_final, p_no_final, p_invalid_final, confidence, resolution_source_risk, json.dumps(edge_cases), - decision_path, no_trade_reason, json.dumps(assumptions), now + decision_path, no_trade_reason, json.dumps(assumptions), int(normalization_applied), now )) await self._connection.commit() return cursor.lastrowid @@ -490,6 +507,8 @@ async def save_signal( yes_price_snapshot: Optional[float] = None, no_price_snapshot: Optional[float] = None, liquidity_snapshot: Optional[float] = None, + stop_loss_price_snapshot: Optional[float] = None, + stop_loss_pct: Optional[float] = None, reason_detail: Optional[str] = None ) -> int: """Persist signal with edge context for replay.""" @@ -498,11 +517,13 @@ async def save_signal( INSERT INTO signals ( market_id, analysis_run_id, action, side, confidence, edge_yes, edge_no, edge_selected, yes_price_snapshot, no_price_snapshot, liquidity_snapshot, + stop_loss_price_snapshot, stop_loss_pct, reason_code, reason_detail, strategy_version, created_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( market_id, analysis_run_id, action, side, confidence, edge_yes, edge_no, edge_selected, yes_price_snapshot, no_price_snapshot, liquidity_snapshot, + stop_loss_price_snapshot, stop_loss_pct, reason_code, reason_detail, strategy_version, now )) await self._connection.commit() diff --git a/src/strategies/dispute/pipeline.py b/src/strategies/dispute/pipeline.py index 9faa29b..793aed7 100644 --- a/src/strategies/dispute/pipeline.py +++ b/src/strategies/dispute/pipeline.py @@ -160,6 +160,7 @@ async def persist_tier2_result( decision_path=output.decision_path, no_trade_reason=output.no_trade_reason, assumptions=output.assumptions, + normalization_applied=normalization_applied, ) return PersistResult( stage="tier2", diff --git a/src/strategies/dispute/signal_engine.py b/src/strategies/dispute/signal_engine.py index 9f80719..6e1c7d7 100644 --- a/src/strategies/dispute/signal_engine.py +++ b/src/strategies/dispute/signal_engine.py @@ -14,6 +14,8 @@ class SignalDecision: edge_yes: Optional[float] = None edge_no: Optional[float] = None edge_selected: Optional[float] = None + stop_loss_price: Optional[float] = None + stop_loss_pct: Optional[float] = None @dataclass @@ -22,6 +24,7 @@ class SizingConfig: max_position_pct: float = 0.05 max_market_exposure_usd: float = 1500.0 max_strategy_exposure_usd: float = 5000.0 + stop_loss_pct: float = 0.15 def compute_edge(prob: float, price: float, fee_haircut: float, slippage_haircut: float) -> float: @@ -34,24 +37,74 @@ def validate_action(action: str) -> None: raise ValueError(f"unsupported action: {action}") +def compute_stop_loss_price(entry_price: float, stop_loss_pct: float) -> float: + """Compute stop-loss trigger from entry price and fractional drawdown.""" + if entry_price <= 0: + raise ValueError("entry_price must be > 0") + if stop_loss_pct <= 0 or stop_loss_pct >= 1: + raise ValueError("stop_loss_pct must be in (0,1)") + return max(0.0, entry_price * (1.0 - stop_loss_pct)) + + def choose_action( edge_yes: float, edge_no: float, confidence: float, min_edge: float, min_confidence: float, + entry_yes_price: Optional[float] = None, + entry_no_price: Optional[float] = None, + stop_loss_pct: Optional[float] = None, ) -> SignalDecision: """Choose trading action based on edge and confidence gates.""" if confidence < min_confidence: - return SignalDecision(action="NO_TRADE", reason_code="LOW_CONFIDENCE", confidence=confidence, edge_yes=edge_yes, edge_no=edge_no) + return SignalDecision( + action="NO_TRADE", + reason_code="LOW_CONFIDENCE", + confidence=confidence, + edge_yes=edge_yes, + edge_no=edge_no, + stop_loss_pct=stop_loss_pct, + ) best_edge = max(edge_yes, edge_no) if best_edge < min_edge: - return SignalDecision(action="NO_TRADE", reason_code="INSUFFICIENT_EDGE", confidence=confidence, edge_yes=edge_yes, edge_no=edge_no) + return SignalDecision( + action="NO_TRADE", + reason_code="INSUFFICIENT_EDGE", + confidence=confidence, + edge_yes=edge_yes, + edge_no=edge_no, + stop_loss_pct=stop_loss_pct, + ) if edge_yes >= edge_no: - return SignalDecision(action="ENTER_YES", reason_code="EDGE_YES", confidence=confidence, edge_yes=edge_yes, edge_no=edge_no, edge_selected=edge_yes) - return SignalDecision(action="ENTER_NO", reason_code="EDGE_NO", confidence=confidence, edge_yes=edge_yes, edge_no=edge_no, edge_selected=edge_no) + stop_loss_price = None + if stop_loss_pct is not None and entry_yes_price is not None: + stop_loss_price = compute_stop_loss_price(entry_yes_price, stop_loss_pct) + return SignalDecision( + action="ENTER_YES", + reason_code="EDGE_YES", + confidence=confidence, + edge_yes=edge_yes, + edge_no=edge_no, + edge_selected=edge_yes, + stop_loss_price=stop_loss_price, + stop_loss_pct=stop_loss_pct, + ) + stop_loss_price = None + if stop_loss_pct is not None and entry_no_price is not None: + stop_loss_price = compute_stop_loss_price(entry_no_price, stop_loss_pct) + return SignalDecision( + action="ENTER_NO", + reason_code="EDGE_NO", + confidence=confidence, + edge_yes=edge_yes, + edge_no=edge_no, + edge_selected=edge_no, + stop_loss_price=stop_loss_price, + stop_loss_pct=stop_loss_pct, + ) def compute_position_size_usd( diff --git a/test_dispute_pipeline_persistence.py b/test_dispute_pipeline_persistence.py index 9a613ad..6adb251 100644 --- a/test_dispute_pipeline_persistence.py +++ b/test_dispute_pipeline_persistence.py @@ -105,9 +105,12 @@ async def test_persist_tier2_success(tmp_path: Path): assert result.output_db_id is not None assert result.normalization_applied is False - async with db._connection.execute("SELECT COUNT(*) FROM analysis_outputs_t2") as cursor: - count = (await cursor.fetchone())[0] - assert count == 1 + async with db._connection.execute( + "SELECT COUNT(*), normalization_applied FROM analysis_outputs_t2" + ) as cursor: + row = await cursor.fetchone() + assert row[0] == 1 + assert row[1] == 0 await db.close() @@ -138,12 +141,13 @@ async def test_persist_tier2_small_drift_is_normalized(tmp_path: Path): assert result.normalization_applied is True async with db._connection.execute( - "SELECT p_yes_final, p_no_final, p_invalid_final FROM analysis_outputs_t2 WHERE id = ?", + "SELECT p_yes_final, p_no_final, p_invalid_final, normalization_applied FROM analysis_outputs_t2 WHERE id = ?", (result.output_db_id,), ) as cursor: row = await cursor.fetchone() assert row is not None - assert pytest.approx(sum(row), abs=1e-9) == 1.0 + assert pytest.approx(row[0] + row[1] + row[2], abs=1e-9) == 1.0 + assert row[3] == 1 await db.close() diff --git a/test_resolution_schema.py b/test_resolution_schema.py index ace8463..983a6ab 100644 --- a/test_resolution_schema.py +++ b/test_resolution_schema.py @@ -76,6 +76,8 @@ async def test_signal_replay_roundtrip(tmp_path: Path): yes_price_snapshot=0.45, no_price_snapshot=0.55, liquidity_snapshot=5000, + stop_loss_price_snapshot=0.38, + stop_loss_pct=0.15, reason_code="EDGE_OK", reason_detail="test", strategy_version="sv1", @@ -86,5 +88,7 @@ async def test_signal_replay_roundtrip(tmp_path: Path): assert replay["run_id"] == "run-123" assert replay["model"] == "test-model" assert replay["action"] == "ENTER_YES" + assert replay["stop_loss_price_snapshot"] == pytest.approx(0.38) + assert replay["stop_loss_pct"] == pytest.approx(0.15) await db.close() diff --git a/test_signal_engine.py b/test_signal_engine.py index 7136fdf..5c5fe7b 100644 --- a/test_signal_engine.py +++ b/test_signal_engine.py @@ -6,6 +6,7 @@ choose_action, compute_edge, compute_position_size_usd, + compute_stop_loss_price, ) @@ -14,9 +15,19 @@ def test_actions_include_expected_values(): def test_choose_action_prefers_best_edge_when_confident(): - signal = choose_action(edge_yes=0.08, edge_no=0.02, confidence=0.75, min_edge=0.03, min_confidence=0.6) + signal = choose_action( + edge_yes=0.08, + edge_no=0.02, + confidence=0.75, + min_edge=0.03, + min_confidence=0.6, + entry_yes_price=0.42, + stop_loss_pct=0.15, + ) assert signal.action == "ENTER_YES" assert signal.reason_code == "EDGE_YES" + assert signal.stop_loss_pct == 0.15 + assert signal.stop_loss_price == pytest.approx(0.357) def test_choose_action_blocks_low_confidence(): @@ -43,3 +54,10 @@ def test_position_size_respects_caps(): def test_compute_edge_applies_haircuts(): edge = compute_edge(prob=0.62, price=0.55, fee_haircut=0.01, slippage_haircut=0.02) assert pytest.approx(edge, abs=1e-9) == 0.04 + + +def test_compute_stop_loss_price_validation(): + with pytest.raises(ValueError, match="entry_price"): + compute_stop_loss_price(0.0, 0.15) + with pytest.raises(ValueError, match="stop_loss_pct"): + compute_stop_loss_price(0.5, 1.0) From 81c1886c0c10a8800d0f27915875991ef01cf5ee Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Thu, 5 Feb 2026 21:48:48 -0600 Subject: [PATCH 14/15] Wire paired-leg arb execution path and signal contract --- docs/specs/polymarket_arb_master.md | 3 +- src/engine/core.py | 139 +++++++++++++++++++++++ src/strategies/base.py | 5 +- src/strategies/polymarket_arb_v1.py | 22 ++++ tests/test_polymarket_arb_v1_strategy.py | 60 ++++++++++ 5 files changed, 226 insertions(+), 3 deletions(-) create mode 100644 tests/test_polymarket_arb_v1_strategy.py diff --git a/docs/specs/polymarket_arb_master.md b/docs/specs/polymarket_arb_master.md index 5148cba..2582b94 100644 --- a/docs/specs/polymarket_arb_master.md +++ b/docs/specs/polymarket_arb_master.md @@ -5,8 +5,9 @@ Owner: PR3DICT arbitrage planning thread Last updated: 2026-02-04 ## Implementation Status -- M1 in progress: executable pricing, staleness checks, and risk-gate plumbing added in Python. +- M1 complete in branch lane: executable pricing, staleness checks, and risk-gate plumbing added in Python. - M2 in progress: opportunity engine ports and execution lifecycle state machine scaffolded. +- M2 update: paired-leg entry contract now wired through engine execution path (paper + live fallback handling). ## Purpose This is the single source of truth for Polymarket arbitrage planning and implementation decisions. diff --git a/src/engine/core.py b/src/engine/core.py index 8098616..2424f17 100644 --- a/src/engine/core.py +++ b/src/engine/core.py @@ -13,6 +13,7 @@ from ..platforms.base import PlatformInterface, Market, Position, OrderSide, OrderType from ..strategies.base import TradingStrategy, Signal +from ..execution import ArbV1State, ArbV1StateMachine from ..risk.manager import RiskManager from ..notifications.manager import NotificationManager @@ -63,6 +64,7 @@ def __init__(self, self.notifications = notifications self.config = config or EngineConfig() self.state = EngineState() + self._arb_v1_state_machine = ArbV1StateMachine() self._task: Optional[asyncio.Task] = None self._start_time: Optional[datetime] = None @@ -274,6 +276,11 @@ async def _execute_entry(self, signal: Signal, size: int, strategy_name: str = N if not platform: logger.error(f"Platform {signal.market.platform} not connected") return + + paired_leg = signal.metadata.get("paired_leg") + if strategy_name == "polymarket_arb_v1" and isinstance(paired_leg, dict): + await self._execute_paired_entry(signal, size, platform) + return logger.info(f"ENTRY: {signal.market.ticker} {signal.side.value.upper()} " f"x{size} @ {signal.target_price or 'MKT'} - {signal.reason}") @@ -311,6 +318,138 @@ async def _execute_entry(self, signal: Signal, size: int, strategy_name: str = N error_msg=f"Order placement failed: {e}", context=f"Market: {signal.market.ticker}, Side: {signal.side.value}" ) + + async def _execute_paired_entry(self, signal: Signal, size: int, platform: PlatformInterface) -> None: + """ + Execute paired YES/NO legs for combinatorial arb. + + Conservative policy: + - if both legs are not cleanly filled, treat as partial/failure + - attempt immediate flatten for any filled exposure + """ + state = ArbV1State.DISCOVERED + opportunity_id = signal.metadata.get("opportunity_id", "unknown") + paired_leg = signal.metadata.get("paired_leg", {}) + + state = self._transition_state(state, ArbV1State.PRICED_EXECUTABLE, opportunity_id) + state = self._transition_state(state, ArbV1State.RISK_APPROVED, opportunity_id) + state = self._transition_state(state, ArbV1State.EXECUTION_SUBMITTED, opportunity_id) + + paired_side_raw = str(paired_leg.get("side", "")).lower() + if paired_side_raw not in (OrderSide.YES.value, OrderSide.NO.value): + logger.error(f"Invalid paired leg side for opportunity {opportunity_id}: {paired_side_raw}") + state = self._transition_state(state, ArbV1State.FAILED, opportunity_id) + self._transition_state(state, ArbV1State.CLOSED, opportunity_id) + return + + paired_side = OrderSide.YES if paired_side_raw == OrderSide.YES.value else OrderSide.NO + paired_price_raw = paired_leg.get("target_price") + paired_price = None + if paired_price_raw is not None: + paired_price = Decimal(str(paired_price_raw)) + + if self.config.paper_mode: + logger.info( + f"[PAPER] PAIRED ENTRY: {signal.market.ticker} " + f"{signal.side.value.upper()}+{paired_side.value.upper()} x{size} " + f"opp={opportunity_id}" + ) + state = self._transition_state(state, ArbV1State.FILLED, opportunity_id) + self._transition_state(state, ArbV1State.CLOSED, opportunity_id) + return + + logger.info( + f"PAIRED ENTRY: {signal.market.ticker} {signal.side.value.upper()}+{paired_side.value.upper()} " + f"x{size} opp={opportunity_id}" + ) + + primary_order_type = OrderType.LIMIT if signal.target_price else OrderType.MARKET + paired_order_type = OrderType.LIMIT if paired_price is not None else OrderType.MARKET + + results = await asyncio.gather( + platform.place_order( + market_id=signal.market_id, + side=signal.side, + order_type=primary_order_type, + quantity=size, + price=signal.target_price, + ), + platform.place_order( + market_id=signal.market_id, + side=paired_side, + order_type=paired_order_type, + quantity=size, + price=paired_price, + ), + return_exceptions=True, + ) + + orders = [] + had_exception = False + for result in results: + if isinstance(result, Exception): + had_exception = True + logger.error(f"Paired leg placement error for {opportunity_id}: {result}") + else: + orders.append(result) + + if had_exception or len(orders) != 2: + state = self._transition_state(state, ArbV1State.FAILED, opportunity_id) + await self._flatten_residual_exposure(platform, signal.market_id, orders, size, opportunity_id) + self._transition_state(state, ArbV1State.CLOSED, opportunity_id) + return + + fully_filled = all(o.filled_quantity >= size for o in orders) + if fully_filled: + state = self._transition_state(state, ArbV1State.FILLED, opportunity_id) + self._transition_state(state, ArbV1State.CLOSED, opportunity_id) + self.state.trades_today += 1 + return + + state = self._transition_state(state, ArbV1State.PARTIAL_FILL, opportunity_id) + await self._flatten_residual_exposure(platform, signal.market_id, orders, size, opportunity_id) + state = self._transition_state(state, ArbV1State.HEDGED_OR_FLATTENED, opportunity_id) + self._transition_state(state, ArbV1State.CLOSED, opportunity_id) + + def _transition_state(self, state: ArbV1State, target: ArbV1State, opportunity_id: str) -> ArbV1State: + transition = self._arb_v1_state_machine.transition(state, target) + if not transition.valid: + logger.warning(f"Invalid arb v1 state transition for {opportunity_id}: {transition.reason}") + return state + return target + + async def _flatten_residual_exposure( + self, + platform: PlatformInterface, + market_id: str, + orders: List, + expected_qty: int, + opportunity_id: str, + ) -> None: + """Flatten any partial exposure from paired execution failures.""" + for order in orders: + if order is None: + continue + filled = max(0, int(getattr(order, "filled_quantity", 0))) + if filled <= 0: + continue + + # Flatten only the filled exposure. + flatten_qty = min(filled, expected_qty) + flatten_side = OrderSide.NO if order.side == OrderSide.YES else OrderSide.YES + try: + await platform.place_order( + market_id=market_id, + side=flatten_side, + order_type=OrderType.MARKET, + quantity=flatten_qty, + ) + logger.warning( + f"Flattened residual exposure for {opportunity_id}: " + f"{flatten_side.value.upper()} x{flatten_qty}" + ) + except Exception as e: + logger.error(f"Failed flatten order for {opportunity_id}: {e}") async def _execute_exit(self, position: Position, signal: Signal) -> None: """Execute an exit order.""" diff --git a/src/strategies/base.py b/src/strategies/base.py index f725dcd..648d291 100644 --- a/src/strategies/base.py +++ b/src/strategies/base.py @@ -5,8 +5,8 @@ Follows ST0CK's Strategy Pattern for decoupled signal/execution logic. """ from abc import ABC, abstractmethod -from dataclasses import dataclass -from typing import Optional, List +from dataclasses import dataclass, field +from typing import Optional, List, Dict, Any from decimal import Decimal from datetime import datetime @@ -24,6 +24,7 @@ class Signal: target_price: Optional[Decimal] = None stop_price: Optional[Decimal] = None timestamp: datetime = None + metadata: Dict[str, Any] = field(default_factory=dict) def __post_init__(self): if self.timestamp is None: diff --git a/src/strategies/polymarket_arb_v1.py b/src/strategies/polymarket_arb_v1.py index eb073dd..a30c48e 100644 --- a/src/strategies/polymarket_arb_v1.py +++ b/src/strategies/polymarket_arb_v1.py @@ -34,6 +34,7 @@ class PolymarketArbV1Config: max_time_to_resolution_hours: int = 24 * 30 probe_quantity_contracts: int = 25 fee_buffer_bps: int = 10 + max_signal_contracts: int = 200 class PolymarketArbitrageV1Strategy(TradingStrategy): @@ -119,6 +120,16 @@ async def scan_markets(self, markets: List[Market]) -> List[Signal]: f"(paired NO leg required)" ), target_price=complement.yes_buy.executable_price, + metadata={ + "opportunity_id": opportunity.opportunity_id, + "ttl_ms": opportunity.ttl_ms, + "expected_edge_bps": net_edge_bps, + "suggested_size": int(min(self.config.max_signal_contracts, decision.size_adjusted_contracts)), + "paired_leg": { + "side": OrderSide.NO.value, + "target_price": str(complement.no_buy.executable_price), + }, + }, ) ) @@ -127,3 +138,14 @@ async def scan_markets(self, markets: List[Market]) -> List[Signal]: async def check_exit(self, position: Position, market: Market) -> Optional[Signal]: # v1 focuses on entry plumbing; existing exit behavior remains unchanged. return None + + def get_position_size( + self, + signal: Signal, + account_balance: Decimal, + risk_pct: float = 0.02, + ) -> int: + planned = signal.metadata.get("suggested_size") + if isinstance(planned, int) and planned > 0: + return planned + return super().get_position_size(signal, account_balance, risk_pct=risk_pct) diff --git a/tests/test_polymarket_arb_v1_strategy.py b/tests/test_polymarket_arb_v1_strategy.py new file mode 100644 index 0000000..1142ea9 --- /dev/null +++ b/tests/test_polymarket_arb_v1_strategy.py @@ -0,0 +1,60 @@ +from datetime import datetime, timedelta, timezone +from decimal import Decimal + +import pytest + +from src.platforms.base import Market, OrderBook, OrderSide +from src.strategies.polymarket_arb_v1 import PolymarketArbitrageV1Strategy + + +def _market(platform: str = "polymarket") -> Market: + return Market( + id="m1", + ticker="TEST-1", + title="Will test pass?", + description="test", + yes_price=Decimal("0.50"), + no_price=Decimal("0.50"), + volume=Decimal("10000"), + liquidity=Decimal("5000"), + close_time=datetime.now(timezone.utc) + timedelta(days=1), + resolved=False, + platform=platform, + ) + + +async def _provider(_: str) -> OrderBook: + # Asks for YES-buy, bids for YES-sell (used to derive NO-buy). + return OrderBook( + market_id="m1", + asks=[(Decimal("0.50"), 200), (Decimal("0.51"), 200)], + bids=[(Decimal("0.53"), 200), (Decimal("0.52"), 200)], + timestamp=datetime.now(timezone.utc), + ) + + +@pytest.mark.asyncio +async def test_scan_markets_emits_paired_signal_metadata(): + strategy = PolymarketArbitrageV1Strategy(orderbook_provider=_provider) + signals = await strategy.scan_markets([_market()]) + assert len(signals) == 1 + + signal = signals[0] + assert signal.metadata["paired_leg"]["side"] == OrderSide.NO.value + assert "target_price" in signal.metadata["paired_leg"] + assert signal.metadata["suggested_size"] > 0 + assert signal.metadata["expected_edge_bps"] > 0 + + +@pytest.mark.asyncio +async def test_scan_markets_skips_non_polymarket(): + strategy = PolymarketArbitrageV1Strategy(orderbook_provider=_provider) + signals = await strategy.scan_markets([_market(platform="kalshi")]) + assert signals == [] + + +def test_get_position_size_prefers_planned_size(): + strategy = PolymarketArbitrageV1Strategy(orderbook_provider=_provider) + signal = type("SignalLike", (), {"metadata": {"suggested_size": 17}})() + size = strategy.get_position_size(signal=signal, account_balance=Decimal("1000")) + assert size == 17 From 049d628599f0b266523329d5192c768033f34947 Mon Sep 17 00:00:00 2001 From: Nate Oswalt Date: Thu, 5 Feb 2026 21:54:22 -0600 Subject: [PATCH 15/15] Add combinatorial dependency detector with verifier hook --- docs/specs/polymarket_arb_master.md | 5 + src/strategies/__init__.py | 7 + src/strategies/dependency_detector.py | 256 +++++++++++++++++++++++ src/strategies/polymarket_arb_v1.py | 35 ++++ tests/test_dependency_detector.py | 76 +++++++ tests/test_polymarket_arb_v1_strategy.py | 27 ++- 6 files changed, 400 insertions(+), 6 deletions(-) create mode 100644 src/strategies/dependency_detector.py create mode 100644 tests/test_dependency_detector.py diff --git a/docs/specs/polymarket_arb_master.md b/docs/specs/polymarket_arb_master.md index 2582b94..1e8293e 100644 --- a/docs/specs/polymarket_arb_master.md +++ b/docs/specs/polymarket_arb_master.md @@ -8,6 +8,7 @@ Last updated: 2026-02-04 - M1 complete in branch lane: executable pricing, staleness checks, and risk-gate plumbing added in Python. - M2 in progress: opportunity engine ports and execution lifecycle state machine scaffolded. - M2 update: paired-leg entry contract now wired through engine execution path (paper + live fallback handling). +- M2 update: dependency detection added as two-stage flow (deterministic narrowing + optional verifier hook). ## Purpose This is the single source of truth for Polymarket arbitrage planning and implementation decisions. @@ -75,3 +76,7 @@ Any change to locked decisions requires: ### 2026-02-04 | DEC-003 | Resolution dependency boundary - Decision: Consume only `risk_multiplier` from resolution/adjudication thread in v1. - Why: Keep arb thread decoupled while allowing risk-aware sizing. + +### 2026-02-06 | DEC-004 | Combinatorial dependency detection approach +- Decision: Use deterministic candidate narrowing first, then optional verifier hook for ambiguous relations. +- Why: Deterministic-only is too brittle; LLM-only is too expensive/noisy for pair explosion. diff --git a/src/strategies/__init__.py b/src/strategies/__init__.py index f66cc17..ec92f3d 100644 --- a/src/strategies/__init__.py +++ b/src/strategies/__init__.py @@ -16,6 +16,12 @@ snapshot_age_ms, ) from .polymarket_arb_v1 import PolymarketArbV1Config, PolymarketArbitrageV1Strategy +from .dependency_detector import ( + DependencyAssessment, + DependencyDetector, + DependencyRelation, + DependencyVerifierPort, +) from .market_making import MarketMakingStrategy, MarketMakingConfig, InventoryTracker from .market_rebalancing import MarketRebalancingStrategy, RebalancingConfig, RebalancingOpportunity @@ -23,6 +29,7 @@ "TradingStrategy", "Signal", "ArbitrageStrategy", "CrossPlatformArbitrage", "PolymarketArbV1Config", "PolymarketArbitrageV1Strategy", + "DependencyAssessment", "DependencyDetector", "DependencyRelation", "DependencyVerifierPort", "ExecutablePricerPort", "RiskGatePort", "ComplementPricing", "ArbV1RiskConfig", "ArbV1RiskGate", diff --git a/src/strategies/dependency_detector.py b/src/strategies/dependency_detector.py new file mode 100644 index 0000000..0165968 --- /dev/null +++ b/src/strategies/dependency_detector.py @@ -0,0 +1,256 @@ +""" +Combinatorial dependency detection for prediction markets. + +Design: +- Stage 1 deterministic narrowing/assessment +- Stage 2 optional external verifier (e.g., LLM) for ambiguous pairs +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass +from datetime import datetime +from enum import Enum +from typing import List, Optional, Protocol, Sequence, Tuple + +from ..platforms.base import Market + + +class DependencyRelation(str, Enum): + EQUIVALENT = "equivalent" + MUTUALLY_EXCLUSIVE = "mutually_exclusive" + IMPLIES = "implies" + INDEPENDENT = "independent" + UNKNOWN = "unknown" + + +@dataclass(frozen=True) +class DependencyAssessment: + market_a_id: str + market_b_id: str + relation: DependencyRelation + confidence: float + reason: str + source: str = "deterministic" + + +class DependencyVerifierPort(Protocol): + """Optional second-stage verifier for ambiguous dependency pairs.""" + + def verify( + self, + market_a: Market, + market_b: Market, + deterministic: DependencyAssessment, + ) -> Optional[DependencyAssessment]: + """Return refined assessment, or None to keep deterministic result.""" + + +class DependencyDetector: + """ + Detect dependencies between markets for combinatorial arb constraints. + + The detector is intentionally conservative: + - deterministic stage only outputs high-confidence relations + - ambiguous cases remain UNKNOWN until verified + """ + + _stopwords = { + "will", + "the", + "a", + "an", + "be", + "is", + "are", + "to", + "of", + "in", + "on", + "for", + "by", + "and", + "or", + "at", + "with", + "from", + } + + _identity_markers = {"win", "wins", "elected", "president", "nominee", "nomination", "primary"} + + _year_re = re.compile(r"\b(20[2-4]\d)\b") + _word_re = re.compile(r"[a-z0-9]+") + + def detect( + self, + markets: Sequence[Market], + verifier: Optional[DependencyVerifierPort] = None, + ) -> List[DependencyAssessment]: + """Run full dependency detection flow on all candidate pairs.""" + assessments: List[DependencyAssessment] = [] + for a, b in self.generate_candidates(markets): + deterministic = self.assess_pair(a, b) + refined = verifier.verify(a, b, deterministic) if verifier is not None else None + assessments.append(refined or deterministic) + return assessments + + def generate_candidates(self, markets: Sequence[Market]) -> List[Tuple[Market, Market]]: + """ + Deterministically narrow pair space. + + Candidate rules: + - same platform and unresolved + - close times within 45 days + - matching year token when present + - at least 2 shared event signature tokens + """ + candidates: List[Tuple[Market, Market]] = [] + n = len(markets) + for i in range(n): + for j in range(i + 1, n): + a = markets[i] + b = markets[j] + if a.platform != b.platform: + continue + if a.resolved or b.resolved: + continue + if abs((a.close_time - b.close_time).total_seconds()) > 45 * 24 * 3600: + continue + + year_a = self._extract_year(a.title) + year_b = self._extract_year(b.title) + if year_a is not None and year_b is not None and year_a != year_b: + continue + + sig_a = self._event_signature_tokens(a.title) + sig_b = self._event_signature_tokens(b.title) + if len(sig_a.intersection(sig_b)) < 2: + continue + + candidates.append((a, b)) + return candidates + + def assess_pair(self, market_a: Market, market_b: Market) -> DependencyAssessment: + """Deterministic relation assessment for one market pair.""" + text_a = self._normalize(market_a.title) + text_b = self._normalize(market_b.title) + + if text_a == text_b: + return DependencyAssessment( + market_a_id=market_a.id, + market_b_id=market_b.id, + relation=DependencyRelation.EQUIVALENT, + confidence=0.99, + reason="identical normalized title", + ) + + focus_a = self._focus_tokens(market_a.title) + focus_b = self._focus_tokens(market_b.title) + sig_a = self._event_signature_tokens(market_a.title) + sig_b = self._event_signature_tokens(market_b.title) + overlap = sig_a.intersection(sig_b) + + if focus_a and focus_b and focus_a == focus_b and len(overlap) >= 3: + return DependencyAssessment( + market_a_id=market_a.id, + market_b_id=market_b.id, + relation=DependencyRelation.EQUIVALENT, + confidence=0.8, + reason="same focus entity and event signature overlap", + ) + + if focus_a and focus_b and focus_a != focus_b and len(overlap) >= 3: + return DependencyAssessment( + market_a_id=market_a.id, + market_b_id=market_b.id, + relation=DependencyRelation.MUTUALLY_EXCLUSIVE, + confidence=0.72, + reason="different focus entities on same event signature", + ) + + a_nom = {"nominee", "nomination", "primary"}.intersection(sig_a) + b_nom = {"nominee", "nomination", "primary"}.intersection(sig_b) + a_win = {"win", "wins", "elected", "president"}.intersection(sig_a) + b_win = {"win", "wins", "elected", "president"}.intersection(sig_b) + + if focus_a and focus_b and focus_a == focus_b: + if a_nom and b_win: + return DependencyAssessment( + market_a_id=market_a.id, + market_b_id=market_b.id, + relation=DependencyRelation.IMPLIES, + confidence=0.66, + reason="nomination/primary phrasing implies election-win path", + ) + if b_nom and a_win: + return DependencyAssessment( + market_a_id=market_a.id, + market_b_id=market_b.id, + relation=DependencyRelation.IMPLIES, + confidence=0.66, + reason="nomination/primary phrasing implies election-win path", + ) + + return DependencyAssessment( + market_a_id=market_a.id, + market_b_id=market_b.id, + relation=DependencyRelation.UNKNOWN, + confidence=0.35, + reason="insufficient deterministic evidence", + ) + + def _extract_year(self, text: str) -> Optional[int]: + m = self._year_re.search(text.lower()) + return int(m.group(1)) if m else None + + def _normalize(self, text: str) -> str: + tokens = self._word_re.findall(text.lower()) + return " ".join(tokens) + + def _focus_tokens(self, text: str) -> Tuple[str, ...]: + """ + Extract probable focus entity from leading clause. + + Example: + - "Will Donald Trump win..." -> ("donald", "trump") + - "Will Biden be..." -> ("biden",) + """ + normalized = self._normalize(text) + tokens = normalized.split() + if tokens and tokens[0] == "will": + tokens = tokens[1:] + verbs = {"win", "wins", "be", "become", "get", "receive"} + + focus: List[str] = [] + for tok in tokens: + if tok in verbs: + break + if tok in self._stopwords: + continue + if tok.isdigit(): + continue + focus.append(tok) + if len(focus) >= 3: + break + return tuple(focus) + + def _event_signature_tokens(self, text: str) -> set[str]: + tokens = self._word_re.findall(text.lower()) + focus = set(self._focus_tokens(text)) + signature = set() + for tok in tokens: + if tok in self._stopwords: + continue + if tok in focus: + continue + if tok.isdigit(): + signature.add(tok) + continue + if tok in self._identity_markers: + signature.add(tok) + continue + # Keep content tokens with at least 4 chars to reduce noise. + if len(tok) >= 4: + signature.add(tok) + return signature diff --git a/src/strategies/polymarket_arb_v1.py b/src/strategies/polymarket_arb_v1.py index a30c48e..0d2e92f 100644 --- a/src/strategies/polymarket_arb_v1.py +++ b/src/strategies/polymarket_arb_v1.py @@ -22,6 +22,7 @@ OrderbookExecutablePricer, RiskAction, ) +from .dependency_detector import DependencyDetector, DependencyRelation, DependencyVerifierPort from .base import Signal, TradingStrategy from ..platforms.base import Market, OrderBook, OrderSide, Position @@ -35,6 +36,7 @@ class PolymarketArbV1Config: probe_quantity_contracts: int = 25 fee_buffer_bps: int = 10 max_signal_contracts: int = 200 + enable_dependency_discovery: bool = True class PolymarketArbitrageV1Strategy(TradingStrategy): @@ -46,11 +48,15 @@ def __init__( config: Optional[PolymarketArbV1Config] = None, pricer: Optional[ExecutablePricerPort] = None, risk_gate: Optional[RiskGatePort] = None, + dependency_detector: Optional[DependencyDetector] = None, + dependency_verifier: Optional[DependencyVerifierPort] = None, ): self.orderbook_provider = orderbook_provider self.config = config or PolymarketArbV1Config() self.pricer = pricer or OrderbookExecutablePricer(stale_after_ms=750) self.risk_gate = risk_gate or ArbV1RiskGate(ArbV1RiskConfig()) + self.dependency_detector = dependency_detector or DependencyDetector() + self.dependency_verifier = dependency_verifier @property def name(self) -> str: @@ -59,6 +65,10 @@ def name(self) -> str: async def scan_markets(self, markets: List[Market]) -> List[Signal]: signals: List[Signal] = [] now = int(time.time() * 1000) + dependency_map = {} + + if self.config.enable_dependency_discovery: + dependency_map = self._dependency_map(markets) for market in markets: if market.platform != "polymarket": @@ -125,6 +135,7 @@ async def scan_markets(self, markets: List[Market]) -> List[Signal]: "ttl_ms": opportunity.ttl_ms, "expected_edge_bps": net_edge_bps, "suggested_size": int(min(self.config.max_signal_contracts, decision.size_adjusted_contracts)), + "dependencies": dependency_map.get(market.id, []), "paired_leg": { "side": OrderSide.NO.value, "target_price": str(complement.no_buy.executable_price), @@ -149,3 +160,27 @@ def get_position_size( if isinstance(planned, int) and planned > 0: return planned return super().get_position_size(signal, account_balance, risk_pct=risk_pct) + + def _dependency_map(self, markets: List[Market]) -> dict: + assessments = self.dependency_detector.detect(markets, verifier=self.dependency_verifier) + by_market = {} + for a in assessments: + if a.relation == DependencyRelation.UNKNOWN: + continue + by_market.setdefault(a.market_a_id, []).append( + { + "other_market_id": a.market_b_id, + "relation": a.relation.value, + "confidence": round(a.confidence, 3), + "source": a.source, + } + ) + by_market.setdefault(a.market_b_id, []).append( + { + "other_market_id": a.market_a_id, + "relation": a.relation.value, + "confidence": round(a.confidence, 3), + "source": a.source, + } + ) + return by_market diff --git a/tests/test_dependency_detector.py b/tests/test_dependency_detector.py new file mode 100644 index 0000000..a7ddd5d --- /dev/null +++ b/tests/test_dependency_detector.py @@ -0,0 +1,76 @@ +from datetime import datetime, timedelta, timezone +from decimal import Decimal + +from src.platforms.base import Market +from src.strategies.dependency_detector import ( + DependencyAssessment, + DependencyDetector, + DependencyRelation, + DependencyVerifierPort, +) + + +def _market(mid: str, title: str) -> Market: + return Market( + id=mid, + ticker=mid.upper(), + title=title, + description="", + yes_price=Decimal("0.50"), + no_price=Decimal("0.50"), + volume=Decimal("1000"), + liquidity=Decimal("1000"), + close_time=datetime.now(timezone.utc) + timedelta(days=20), + resolved=False, + platform="polymarket", + ) + + +def test_detect_mutually_exclusive_candidates(): + detector = DependencyDetector() + a = _market("a", "Will Donald Trump win the 2028 US presidential election?") + b = _market("b", "Will Kamala Harris win the 2028 US presidential election?") + assessments = detector.detect([a, b]) + assert len(assessments) == 1 + assert assessments[0].relation == DependencyRelation.MUTUALLY_EXCLUSIVE + assert assessments[0].confidence >= 0.7 + + +def test_detect_equivalent_on_same_normalized_title(): + detector = DependencyDetector() + a = _market("a", "Will BTC be above 120k by Dec 31 2026?") + b = _market("b", "Will BTC be above 120k by Dec 31, 2026?") + assessments = detector.detect([a, b]) + assert len(assessments) == 1 + assert assessments[0].relation == DependencyRelation.EQUIVALENT + + +def test_detect_implies_for_nominee_to_win(): + detector = DependencyDetector() + a = _market("a", "Will Donald Trump win the Republican nomination in 2028?") + b = _market("b", "Will Donald Trump win the 2028 US presidential election?") + assessments = detector.detect([a, b]) + assert len(assessments) == 1 + assert assessments[0].relation == DependencyRelation.IMPLIES + + +class _Verifier(DependencyVerifierPort): + def verify(self, market_a: Market, market_b: Market, deterministic: DependencyAssessment): + return DependencyAssessment( + market_a_id=market_a.id, + market_b_id=market_b.id, + relation=DependencyRelation.INDEPENDENT, + confidence=0.91, + reason="verifier override", + source="verifier", + ) + + +def test_verifier_can_override_deterministic_assessment(): + detector = DependencyDetector() + a = _market("a", "Will Donald Trump win the 2028 US presidential election?") + b = _market("b", "Will Kamala Harris win the 2028 US presidential election?") + assessments = detector.detect([a, b], verifier=_Verifier()) + assert len(assessments) == 1 + assert assessments[0].relation == DependencyRelation.INDEPENDENT + assert assessments[0].source == "verifier" diff --git a/tests/test_polymarket_arb_v1_strategy.py b/tests/test_polymarket_arb_v1_strategy.py index 1142ea9..8070d73 100644 --- a/tests/test_polymarket_arb_v1_strategy.py +++ b/tests/test_polymarket_arb_v1_strategy.py @@ -7,11 +7,11 @@ from src.strategies.polymarket_arb_v1 import PolymarketArbitrageV1Strategy -def _market(platform: str = "polymarket") -> Market: +def _market(mid: str = "m1", title: str = "Will test pass?", platform: str = "polymarket") -> Market: return Market( - id="m1", - ticker="TEST-1", - title="Will test pass?", + id=mid, + ticker=mid.upper(), + title=title, description="test", yes_price=Decimal("0.50"), no_price=Decimal("0.50"), @@ -23,10 +23,10 @@ def _market(platform: str = "polymarket") -> Market: ) -async def _provider(_: str) -> OrderBook: +async def _provider(mid: str) -> OrderBook: # Asks for YES-buy, bids for YES-sell (used to derive NO-buy). return OrderBook( - market_id="m1", + market_id=mid, asks=[(Decimal("0.50"), 200), (Decimal("0.51"), 200)], bids=[(Decimal("0.53"), 200), (Decimal("0.52"), 200)], timestamp=datetime.now(timezone.utc), @@ -44,6 +44,7 @@ async def test_scan_markets_emits_paired_signal_metadata(): assert "target_price" in signal.metadata["paired_leg"] assert signal.metadata["suggested_size"] > 0 assert signal.metadata["expected_edge_bps"] > 0 + assert "dependencies" in signal.metadata @pytest.mark.asyncio @@ -53,6 +54,20 @@ async def test_scan_markets_skips_non_polymarket(): assert signals == [] +@pytest.mark.asyncio +async def test_scan_markets_includes_dependency_assessment_metadata(): + strategy = PolymarketArbitrageV1Strategy(orderbook_provider=_provider) + markets = [ + _market("m1", "Will Donald Trump win the 2028 US presidential election?"), + _market("m2", "Will Kamala Harris win the 2028 US presidential election?"), + ] + signals = await strategy.scan_markets(markets) + assert len(signals) == 2 + by_id = {s.market_id: s for s in signals} + assert by_id["m1"].metadata["dependencies"][0]["other_market_id"] == "m2" + assert by_id["m1"].metadata["dependencies"][0]["relation"] == "mutually_exclusive" + + def test_get_position_size_prefers_planned_size(): strategy = PolymarketArbitrageV1Strategy(orderbook_provider=_provider) signal = type("SignalLike", (), {"metadata": {"suggested_size": 17}})()