senko · senko · Nov 28, 2025 · Nov 27, 2025 · Nov 27, 2025 · Nov 27, 2025
diff --git a/crawler/crawl.py b/crawler/crawl.py
@@ -1,36 +1,34 @@
-from dataclasses import dataclass
-import os
 import datetime
-from typing import List
 import logging
+import os
+from dataclasses import dataclass
 from pathlib import Path
 from time import time
+from typing import List
 
-
+from crawler.store.boso import BosoCrawler
+from crawler.store.brodokomerc import BrodokomercCrawler
+from crawler.store.dm import DmCrawler
+from crawler.store.eurospin import EurospinCrawler
+from crawler.store.jadranka_trgovina import JadrankaTrgovinaCrawler
+from crawler.store.kaufland import KauflandCrawler
 from crawler.store.konzum import KonzumCrawler
+from crawler.store.ktc import KtcCrawler
 from crawler.store.lidl import LidlCrawler
+from crawler.store.lorenco import LorencoCrawler
+from crawler.store.metro import MetroCrawler
+from crawler.store.ntl import NtlCrawler
+from crawler.store.output import copy_archive_info, create_archive, save_chain
 from crawler.store.plodine import PlodineCrawler
 from crawler.store.ribola import RibolaCrawler
 from crawler.store.roto import RotoCrawler
 from crawler.store.spar import SparCrawler
 from crawler.store.studenac import StudenacCrawler
 from crawler.store.tommy import TommyCrawler
-from crawler.store.kaufland import KauflandCrawler
-from crawler.store.eurospin import EurospinCrawler
-from crawler.store.dm import DmCrawler
-from crawler.store.ktc import KtcCrawler
-from crawler.store.metro import MetroCrawler
 from crawler.store.trgocentar import TrgocentarCrawler
-from crawler.store.zabac import ZabacCrawler
-from crawler.store.vrutak import VrutakCrawler
-from crawler.store.ntl import NtlCrawler
 from crawler.store.trgovina_krk import TrgovinaKrkCrawler
-from crawler.store.brodokomerc import BrodokomercCrawler
-from crawler.store.lorenco import LorencoCrawler
-from crawler.store.boso import BosoCrawler
-
-
-from crawler.store.output import save_chain, copy_archive_info, create_archive
+from crawler.store.vrutak import VrutakCrawler
+from crawler.store.zabac import ZabacCrawler
 
 logger = logging.getLogger(__name__)
 
@@ -56,6 +54,7 @@
     BrodokomercCrawler.CHAIN: BrodokomercCrawler,
     LorencoCrawler.CHAIN: LorencoCrawler,
     BosoCrawler.CHAIN: BosoCrawler,
+    JadrankaTrgovinaCrawler.CHAIN: JadrankaTrgovinaCrawler,
 }
 
 

diff --git a/crawler/store/base.py b/crawler/store/base.py
@@ -1,16 +1,16 @@
+import datetime
+import unicodedata
 from csv import DictReader
-from decimal import Decimal, InvalidOperation, ROUND_HALF_UP
+from decimal import ROUND_HALF_UP, Decimal, InvalidOperation
 from logging import getLogger
+from re import Pattern
 from tempfile import NamedTemporaryFile
-from typing import Any, BinaryIO, Generator
 from time import time
+from typing import Any, BinaryIO, Generator
 from zipfile import ZipFile
-import datetime
-from bs4 import BeautifulSoup
-from re import Pattern
-import unicodedata
 
 import httpx
+from bs4 import BeautifulSoup
 
 from .models import Product, Store
 

diff --git a/crawler/store/boso.py b/crawler/store/boso.py
@@ -8,6 +8,7 @@
 from bs4 import BeautifulSoup
 
 from crawler.store.models import Store
+
 from .base import BaseCrawler
 
 logger = logging.getLogger(__name__)

diff --git a/crawler/store/brodokomerc.py b/crawler/store/brodokomerc.py
@@ -1,14 +1,15 @@
 import datetime
 import logging
 import re
-from typing import List, Dict, Any, Optional
+from typing import Any, Dict, List, Optional
 from urllib.parse import urljoin
 
 from bs4 import BeautifulSoup
 
-from .base import BaseCrawler
 from crawler.store.models import Store
 
+from .base import BaseCrawler
+
 logger = logging.getLogger(__name__)
 
 

diff --git a/crawler/store/dm.py b/crawler/store/dm.py
@@ -7,6 +7,7 @@
 from typing import Any, List
 
 import openpyxl
+
 from crawler.store.models import Product, Store
 
 from .base import BaseCrawler

diff --git a/crawler/store/eurospin.py b/crawler/store/eurospin.py
@@ -4,6 +4,7 @@
 from typing import List
 
 from bs4 import BeautifulSoup
+
 from crawler.store.models import Product, Store
 
 from .base import BaseCrawler

diff --git a/crawler/store/jadranka_trgovina.py b/crawler/store/jadranka_trgovina.py
@@ -0,0 +1,200 @@
+import datetime
+import logging
+import re
+
+from bs4 import BeautifulSoup
+from crawler.store.models import Product, Store
+
+from .base import BaseCrawler
+
+logger = logging.getLogger(__name__)
+
+
+class JadrankaTrgovinaCrawler(BaseCrawler):
+    """
+    Crawler for Jadranka Trgovina store prices.
+
+    Jadranka Trgovina publishes daily CSV price lists for a single store location
+    (Market Maxi Dražica 5, Mali Lošinj). Files follow the pattern:
+    MARKET_MAXI_DRAZICA5_MALILOSINJ_607_DDMMYYYY_0800.csv
+    """
+
+    CHAIN = "jadranka_trgovina"
+    BASE_URL = "https://jadranka-trgovina.com"
+    INDEX_URL = "https://jadranka-trgovina.com/cjenici/"
+
+    # Regex to match CSV filenames and extract date
+    # Format: MARKET_MAXI_DRAZICA5_MALILOSINJ_607_DDMMYYYY_0800.csv
+    CSV_FILENAME_PATTERN = re.compile(
+        r"MARKET_MAXI_DRAZICA5_MALILOSINJ_607_(\d{2})(\d{2})(\d{4})_0800\.csv"
+    )
+
+    # Mapping for price fields from CSV columns
+    # CSV columns in Croatian:
+    # NAZIV PROIZVODA, ŠIFRA PROIZVODA, MARKA PROIZVODA, NETO KOLIČINA,
+    # JEDINICA MJERE, MALOPRODAJNA CIJENA, CIJENA ZA JEDINICU MJERE,
+    # MPC ZA VRIJEME POSEBNOG OBLIKA PRODAJE, NAJNIŽA CIJENA U POSLJEDNIH 30 DANA,
+    # SIDRENA CIJENA NA 2.5.2025, BARKOD, KATEGORIJA PROIZVODA
+    PRICE_MAP = {
+        # field: (column_name, is_required)
+        # Note: Many products have empty retail price but filled special price
+        # Some products also have empty unit_price
+        "price": ("MALOPRODAJNA CIJENA", False),
+        "unit_price": ("CIJENA ZA JEDINICU MJERE", False),
+        "special_price": ("MPC ZA VRIJEME POSEBNOG OBLIKA PRODAJE", False),
+        "best_price_30": ("NAJNIŽA CIJENA U POSLJEDNIH 30 DANA", False),
+        "anchor_price": ("SIDRENA CIJENA NA 2.5.2025", False),
+    }
+
+    # Mapping for other product fields from CSV columns
+    FIELD_MAP = {
+        "product_id": ("ŠIFRA PROIZVODA", True),
+        "product": ("NAZIV PROIZVODA", True),
+        "brand": ("MARKA PROIZVODA", False),
+        "barcode": ("BARKOD", False),
+        "category": ("KATEGORIJA PROIZVODA", False),
+        "quantity": ("NETO KOLIČINA", False),
+        "unit": ("JEDINICA MJERE", False),
+    }
+
+    def parse_index(self, content: str) -> list[str]:
+        """
+        Parse the Jadranka Trgovina index page to extract CSV links.
+
+        Args:
+            content: HTML content of the index page
+
+        Returns:
+            List of absolute CSV URLs found on the page
+        """
+        soup = BeautifulSoup(content, "html.parser")
+        urls = []
+
+        # Find all links ending with .csv
+        for link_tag in soup.select('a[href$=".csv"]'):
+            href = str(link_tag.get("href"))
+            # Make absolute URL if needed
+            if not href.startswith("http"):
+                href = (
+                    f"{self.BASE_URL}{href}"
+                    if href.startswith("/")
+                    else f"{self.BASE_URL}/{href}"
+                )
+            urls.append(href)
+
+        return urls
+
+    def get_index(self, date: datetime.date) -> str | None:
+        """
+        Fetch the index page and find the CSV URL for the specified date.
+
+        Args:
+            date: The date for which to fetch the price list
+
+        Returns:
+            CSV URL for the specified date, or None if not found
+        """
+        content = self.fetch_text(self.INDEX_URL)
+        if not content:
+            logger.warning(
+                f"No content found at Jadranka Trgovina index URL: {self.INDEX_URL}"
+            )
+            return None
+
+        urls = self.parse_index(content)
+
+        # Format date as DDMMYYYY to match filename pattern
+        date_str = f"{date.day:02d}{date.month:02d}{date.year}"
+
+        # Find URL matching the requested date
+        for url in urls:
+            if date_str in url:
+                logger.info(f"Found Jadranka Trgovina CSV for {date}: {url}")
+                return url
+
+        logger.warning(f"No Jadranka Trgovina CSV found for date {date}")
+        return None
+
+    def parse_store_info(self) -> Store:
+        """
+        Create store information for the single Jadranka Trgovina location.
+
+        Jadranka Trgovina only has one location that publishes prices:
+        Market Maxi Dražica 5, Mali Lošinj (Store ID: 607)
+
+        Returns:
+            Store object with the fixed store information
+        """
+        return Store(
+            chain=self.CHAIN,
+            store_id="607",
+            name="Jadranka Trgovina Market Maxi",
+            store_type="market",
+            city="Mali Lošinj",
+            street_address="Dražica 5",
+            zipcode="",
+            items=[],
+        )
+
+    def get_store_prices(self, csv_url: str) -> list[Product]:
+        """
+        Fetch and parse store prices from a CSV URL.
+
+        Args:
+            csv_url: URL to the CSV file containing prices
+
+        Returns:
+            List of Product objects parsed from the CSV
+        """
+        try:
+            content = self.fetch_text(csv_url, encodings=["windows-1250", "utf-8"])
+            return self.parse_csv(content, delimiter=";")
+        except Exception as e:
+            logger.error(
+                f"Failed to get Jadranka Trgovina prices from {csv_url}: {e}",
+                exc_info=True,
+            )
+            return []
+
+    def get_all_products(self, date: datetime.date) -> list[Store]:
+        """
+        Main method to fetch and parse Jadranka Trgovina store and price data.
+
+        Args:
+            date: The date for which to fetch price data
+
+        Returns:
+            List containing a single Store object with products, or empty list if unavailable
+        """
+        csv_url = self.get_index(date)
+
+        if not csv_url:
+            logger.warning(f"No Jadranka Trgovina data available for {date}")
+            return []
+
+        try:
+            store = self.parse_store_info()
+            products = self.get_store_prices(csv_url)
+        except Exception as e:
+            logger.error(f"Error processing Jadranka Trgovina: {e}", exc_info=True)
+            return []
+
+        if not products:
+            logger.warning("No products found for Jadranka Trgovina")
+            return []
+
+        store.items = products
+        logger.info(f"Jadranka Trgovina: {len(products)} products found")
+        return [store]
+
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.DEBUG)
+    crawler = JadrankaTrgovinaCrawler()
+    stores = crawler.crawl(datetime.date.today())
+    if stores:
+        print(stores[0])
+        if stores[0].items:
+            print(stores[0].items[0])
+    else:
+        print("No stores found")
diff --git a/crawler/store/konzum.py b/crawler/store/konzum.py
@@ -1,10 +1,11 @@
 import datetime
 import logging
-import urllib.parse
 import re
+import urllib.parse
 from typing import List
 
 from bs4 import BeautifulSoup
+
 from crawler.store.models import Product, Store
 
 from .base import BaseCrawler

diff --git a/crawler/store/lidl.py b/crawler/store/lidl.py
@@ -1,11 +1,11 @@
 import datetime
 import logging
-from typing import Optional
 import re
+from typing import Optional
 
+from crawler.store.models import Product, Store
 
 from .base import BaseCrawler
-from crawler.store.models import Store, Product
 
 logger = logging.getLogger(__name__)
 

diff --git a/crawler/store/ntl.py b/crawler/store/ntl.py
@@ -2,9 +2,10 @@
 import logging
 import os
 import re
-from urllib.parse import unquote, quote_plus
+from urllib.parse import quote_plus, unquote
 
 from bs4 import BeautifulSoup
+
 from crawler.store.models import Product, Store
 
 from .base import BaseCrawler

diff --git a/crawler/store/spar.py b/crawler/store/spar.py
@@ -1,9 +1,8 @@
 import datetime
 import logging
 import re
-from typing import Optional
 from json import loads
-
+from typing import Optional
 
 from crawler.store.models import Store
 

diff --git a/crawler/store/studenac.py b/crawler/store/studenac.py
@@ -1,8 +1,8 @@
 import datetime
 import logging
-from pathlib import Path
 import re
 import subprocess
+from pathlib import Path
 from tempfile import TemporaryDirectory
 from typing import Generator, Optional, Tuple