diff --git a/data/scrape-sharing-excess-calendar/.env.example b/data/scrape-sharing-excess-calendar/.env.example new file mode 100644 index 00000000..32563484 --- /dev/null +++ b/data/scrape-sharing-excess-calendar/.env.example @@ -0,0 +1,7 @@ +SUPABASE_URL= +SUPABASE_SERVICE_ROLE_KEY= + +# Decoded from the base64 `src=` param in the Google Calendar embed URL +GOOGLE_CALENDAR_ID=c_d43974649dbbaa8699b3583c8aa847737aecda4539202c423471282eedd44bbc@group.calendar.google.com + +LOOK_FORWARD_DAYS=30 diff --git a/data/scrape-sharing-excess-calendar/README.md b/data/scrape-sharing-excess-calendar/README.md new file mode 100644 index 00000000..72f49ab8 --- /dev/null +++ b/data/scrape-sharing-excess-calendar/README.md @@ -0,0 +1,126 @@ +# Sharing Excess Calendar Sync Script +--- + +## How It Works + +Fetches events from a public Google Calendar at: https://www.sharingexcess.com/find-food +--- + +## Requirements + +- Python 3.10+ +- Phlask Supabase Credentials: + - `SUPABASE_URL` + - `SUPABASE_SERVICE_ROLE_KEY` (server-side only) +- For any Google Calendar, the base64-decoded `src=` value from the embed URL (see instructions below) + +Install dependencies: + +```bash +pip install requests icalendar supabase python-dotenv +# OR +pip install -r requirements.txt +``` + +--- + +## Getting a calendar ID from a Google Calendar Embed URL + +> ⚠️ The `src=` parameter in a Google Calendar embed URL is **base64-encoded**. Do not copy it directly — decode it first. + +Given an embed URL like: + +``` +src=Y19kNDM5NzQ2NDlkYmJhYTg2OTliMzU4M2M4YWE4NDc3MzdhZWNkYTQ1MzkyMDJjNDIzNDcxMjgyZWVkZDQ0YmJjQGdyb3VwLmNhbGVuZGFyLmdvb2dsZS5jb20 +``` + +Decode it with: + +```bash +python3 -c " +import base64, sys +s = sys.argv[1] +s += '=' * (-len(s) % 4) # fix base64 padding +print(base64.b64decode(s).decode()) +" "Y19kNDM5NzQ2NDlkYmJhYTg2OTliMzU4M2M4YWE4NDc3MzdhZWNkYTQ1MzkyMDJjNDIzNDcxMjgyZWVkZDQ0YmJjQGdyb3VwLmNhbGVuZGFyLmdvb2dsZS5jb20" +``` + +This yields the correct Calendar ID: + +``` +c_d43974649dbbaa8699b3583c8aa847737aecda4539202c423471282eedd44bbc@group.calendar.google.com +``` + +Use this decoded value in your `.env` — never the raw base64 string. + +--- + +## Supabase Setup + +### Create the table + +Run `migration.sql` in the **Supabase SQL Editor** (Dashboard → SQL Editor → New query). It creates the table, a date index, and RLS policies. + +### Get credentials + +Get these from the Phlask Data team in Slack. + +> ⚠️ Use the **service role** key in the sync script (writes bypass RLS). +> Use the **anon** key in your frontend app to read data safely. + +--- + +## Configuration + +Copy `.env.example` to `.env` and fill in your values: + +```bash +cp .env.example .env +``` + +```ini +SUPABASE_URL= +SUPABASE_SERVICE_ROLE_KEY= +GOOGLE_CALENDAR_ID=c_d43974649dbbaa8699b3583c8aa847737aecda4539202c423471282eedd44bbc@group.calendar.google.com +LOOK_FORWARD_DAYS=30 +``` + +--- + +## Usage + +### Supabase mode (default) + +```bash +export $(cat .env | xargs) && python calendar_to_supabase.py +``` + +### CSV mode (debugging) + +Outputs to a local CSV instead of Supabase for local debugging. Defaults to `events.csv` if no filename is given. + +```bash +# Default output → events.csv +python calendar_to_supabase.py --csv + +# Custom filename +python calendar_to_supabase.py --csv debug_output.csv +``` + +--- + +## Table Schema + +| Column | Type | Description | +|---|---|---| +| `id` | `bigint` | Auto-incrementing primary key | +| `uid` | `text` | Google Calendar's stable event UID (upsert key) | +| `summary` | `text` | Event title | +| `start_at` | `timestamptz` | Start date/time (ISO 8601) | +| `end_at` | `timestamptz` | End date/time, nullable | +| `description` | `text` | Event description, nullable | +| `location` | `text` | Event location, nullable | +| `all_day` | `boolean` | `true` for all-day events | +| `updated_at` | `timestamptz` | Timestamp of last sync | + +--- \ No newline at end of file diff --git a/data/scrape-sharing-excess-calendar/calendar_to_supabase.py b/data/scrape-sharing-excess-calendar/calendar_to_supabase.py new file mode 100644 index 00000000..a98906ba --- /dev/null +++ b/data/scrape-sharing-excess-calendar/calendar_to_supabase.py @@ -0,0 +1,414 @@ +# Sharing Excess Calendar Sync Script +import argparse +import csv +import json +import os +import re +import time +import requests +from datetime import date, datetime, timedelta, timezone +from icalendar import Calendar +import recurring_ical_events +from supabase import create_client, Client + +# config +# NOTE: use base64 decoded value +CALENDAR_ID = os.environ.get( + "GOOGLE_CALENDAR_ID", + "c_d43974649dbbaa8699b3583c8aa847737aecda4539202c423471282eedd44bbc@group.calendar.google.com" +) +LOOK_FORWARD_DAYS = int(os.environ.get("LOOK_FORWARD_DAYS", 30)) + +SUPABASE_URL = os.environ.get("SUPABASE_URL", "") +SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY", "") # service role for writes +TABLE_NAME = "resources" + +CREATOR = "phlask-sharing-excess-sync" +SOURCE_URL = "https://www.sharingexcess.com/find-food" +NOMINATIM_ENDPOINT = "https://nominatim.openstreetmap.org/search" +NOMINATIM_UA = "phlask-map/2.0 (https://phlask.me)" + +# Address parsing + +def parse_location(location: str) -> dict: + """ + Split a US address string into address / city / state / zip_code. + + Handles prefixed venue names, e.g.: + "ASPIRA Charter School, 6301 N 2nd St, Philadelphia, PA 19126, USA" + """ + if not location: + return {} + + # Match trailing: CITY, STATE ZIP[, USA] + pattern = r'^(.+),\s*([^,]+),\s*([A-Z]{2})\s+(\d{5})(?:-\d{4})?(?:,\s*USA)?$' + m = re.match(pattern, location.strip()) + if not m: + return {"address": location} + + street_part, city, state, zip_code = m.groups() + + # If the street part has multiple comma-separated pieces (venue name + street), + # prefer the last piece that starts with a digit. + parts = [p.strip() for p in street_part.split(',')] + address = parts[-1] + for part in reversed(parts): + if part and part[0].isdigit(): + address = part + break + + return { + "address": address, + "city": city.strip(), + "state": state.strip(), + "zip_code": zip_code.strip(), + } + + +def geocode(location: str) -> tuple[float, float] | None: + """ + Return (lat, lon) for a location string via Nominatim. + Returns None if the location cannot be resolved. + Caller is responsible for rate-limiting (1 req/s). + """ + if not location: + return None + try: + resp = requests.get( + NOMINATIM_ENDPOINT, + params={"q": location, "format": "json", "limit": 1}, + headers={"User-Agent": NOMINATIM_UA}, + timeout=10, + ) + resp.raise_for_status() + results = resp.json() + if results: + return float(results[0]["lat"]), float(results[0]["lon"]) + except Exception as exc: + print(f" Geocoding failed for '{location}': {exc}") + return None + + +# Use ical for getting recurring events outside the window, +# and to avoid timezone issues with Google's API which returns +# all-day events as date-only + +def fetch_events(calendar_id: str, look_forward_days: int) -> list[dict]: + """Fetch upcoming events from a public Google Calendar and return raw dicts.""" + ical_url = ( + f"https://calendar.google.com/calendar/ical/" + f"{requests.utils.quote(calendar_id, safe='')}/public/basic.ics" + ) + + response = requests.get(ical_url, timeout=10) + response.raise_for_status() + + cal = Calendar.from_ical(response.text) + + now = datetime.now(tz=timezone.utc) + cutoff = now + timedelta(days=look_forward_days) + + # recurring_ical_events expands RRULE/recurring events into concrete occurrences + # within the window, so long-running series (e.g. weekly Saturday events whose + # DTSTART is far in the past) are captured correctly. + components = recurring_ical_events.of(cal).between(now, cutoff) + + events = [] + for component in components: + if component.name != "VEVENT": + continue + + dtstart = component.get("DTSTART").dt + dtend = component.get("DTEND").dt if component.get("DTEND") else None + + events.append({ + "uid": str(component.get("UID", "")), + "summary": str(component.get("SUMMARY", "")), + "start_at": dtstart.isoformat(), + "end_at": dtend.isoformat() if dtend else None, + "description": str(component.get("DESCRIPTION", "")) or None, + "location": str(component.get("LOCATION", "")) or None, + "all_day": not isinstance(dtstart, datetime), + }) + + events.sort(key=lambda e: e["start_at"]) + return events + + +# Normalize events for the resources table + +def _parse_event_dt(iso_str: str) -> datetime: + """Parse an ISO string that may be a date-only string (all-day events).""" + try: + return datetime.fromisoformat(iso_str) + except ValueError: + d = date.fromisoformat(iso_str) + return datetime(d.year, d.month, d.day, tzinfo=timezone.utc) + + +def build_hours(start_dt: datetime, end_dt: datetime | None) -> list[dict]: + """ + Return a list with one GooglePlacesPeriod-compatible object representing + the event's open/close window. + + Google's day numbering: 0 = Sunday … 6 = Saturday. + Python's weekday(): 0 = Monday … 6 = Sunday. + Conversion: google_day = (python_weekday + 1) % 7 + """ + def time_point(dt: datetime, truncated: bool = False) -> dict: + return { + "date": dt.date().isoformat(), + "truncated": truncated, + "day": (dt.weekday() + 1) % 7, + "hour": dt.hour, + "minute": dt.minute, + } + + open_point = time_point(start_dt) + close_point = time_point(end_dt) if end_dt else time_point(start_dt, truncated=True) + + return [{"open": open_point, "close": close_point}] + + +def build_description(original: str | None, start_iso: str, end_iso: str | None) -> str: + """ + Prepend structured date metadata with clear delimiters so a downstream + parser can extract event times with a simple regex, e.g.: + + re.search(r'\[\[ start: (.+?) \| end: (.+?) \]\]', description) + + Format: + [[ start: | end: ]] + + """ + end_part = f" | end: {end_iso}" if end_iso else "" + header = f"[[ start: {start_iso}{end_part} ]]" + return f"{header}\n{original}" if original else header + + +def event_to_resource(event: dict) -> dict | None: + """ + Convert a raw calendar event dict to a normalized `resources` table row. + Returns None and logs a warning when geocoding fails (lat/lon are NOT NULL). + """ + location = event.get("location") + parsed = parse_location(location or "") + + coords = geocode(location) if location else None + if coords is None and parsed.get("address"): + # Fall back to the stripped street address + fallback = ", ".join(filter(None, [ + parsed.get("address"), + parsed.get("city"), + parsed.get("state"), + parsed.get("zip_code"), + ])) + coords = geocode(fallback) + if coords is None: + print(f" Skipping '{event['summary']}' — could not geocode: {location!r}") + return None + + lat, lon = coords + time.sleep(1) # Nominatim rate limit: 1 req/s + now_iso = datetime.now(tz=timezone.utc).isoformat() + start_dt = _parse_event_dt(event["start_at"]) + end_dt = _parse_event_dt(event["end_at"]) if event.get("end_at") else None + + return { + "version": 1, + "creator": CREATOR, + "last_modifier": CREATOR, + "date_created": now_iso, + "last_modified": now_iso, + "source": { + "type": "WEB_SCRAPE", + "url": SOURCE_URL, + }, + "verification": { + "verified": False, + "last_modified": now_iso, + "last_modifier": CREATOR, + }, + "resource_type": "FOOD", + "status": "OPERATIONAL", + "entry_type": "UNSURE", + "name": event["summary"], + "description": build_description(event.get("description"), event["start_at"], event.get("end_at")), + "address": parsed.get("address"), + "city": parsed.get("city"), + "state": parsed.get("state"), + "zip_code": parsed.get("zip_code"), + "latitude": lat, + "longitude": lon, + "gp_id": event["uid"], + "food": { + "food_type": [], + "distribution_type": ["PICKUP"], + "organization_type": "NON_PROFIT", + "organization_name": "Sharing Excess", + "organization_url": SOURCE_URL, + "tags": [], + }, + "hours": build_hours(start_dt, end_dt), + "images": None, + "guidelines": None, + "water": None, + "forage": None, + "bathroom": None, + } + + +def normalize_events(events: list[dict]) -> list[dict]: + """Convert all raw events to resources rows, dropping those that fail geocoding.""" + resources = [] + for event in events: + print(f" Processing: {event['summary']}") + resource = event_to_resource(event) + if resource: + resources.append(resource) + return resources + + +# Supabase helpers + +def get_supabase_client() -> Client: + if not SUPABASE_URL or not SUPABASE_KEY: + raise EnvironmentError( + "SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY must be set. " + "Use --csv to output locally instead." + ) + return create_client(SUPABASE_URL, SUPABASE_KEY) + + +def upsert_resources(client: Client, resources: list[dict]) -> None: + """ + Insert new resources and update existing ones matched by gp_id. + Uses a manual select-then-insert/update pattern because gp_id does not + have a database-level unique constraint required for ON CONFLICT upserts. + """ + if not resources: + print("No resources to upsert.") + return + + gp_ids = [r["gp_id"] for r in resources] + + existing = ( + client.table(TABLE_NAME) + .select("id, gp_id, date_created") + .in_("gp_id", gp_ids) + .execute() + ) + existing_map = {row["gp_id"]: row for row in existing.data} + + to_insert = [] + to_update = [] # list of (id, row) + + for r in resources: + if r["gp_id"] in existing_map: + existing_row = existing_map[r["gp_id"]] + # Preserve the original creation timestamp on updates + r = {**r, "date_created": existing_row["date_created"]} + to_update.append((existing_row["id"], r)) + else: + to_insert.append(r) + + if to_insert: + client.table(TABLE_NAME).insert(to_insert).execute() + print(f"Inserted {len(to_insert)} new resource(s).") + + for row_id, row in to_update: + client.table(TABLE_NAME).update(row).eq("id", row_id).execute() + + if to_update: + print(f"Updated {len(to_update)} existing resource(s).") + + +def delete_stale_resources(client: Client, current_gp_ids: list[str]) -> None: + """ + Remove resources sourced from Sharing Excess whose UIDs are no longer + in the current look-forward window. + """ + if not current_gp_ids: + return + ( + client.table(TABLE_NAME) + .delete() + .filter("creator", "eq", CREATOR) + .execute() + ) + print("Removed stale Sharing Excess resources outside the current window.") + + +# CSV for debugging/local + +def save_csv(resources: list[dict], path: str) -> None: + """Write normalized resources to a CSV, serializing jsonb fields as JSON strings.""" + if not resources: + print("No resources found — CSV not written.") + return + + JSONB_FIELDS = {"source", "verification", "food", "hours"} + ARRAY_FIELDS = {"images"} + + fieldnames = [ + "gp_id", "name", "resource_type", "status", "entry_type", + "address", "city", "state", "zip_code", "latitude", "longitude", + "description", "guidelines", + "source", "verification", "food", "hours", + "images", "water", "forage", "bathroom", + "creator", "last_modifier", "version", + ] + + def serialize(key, val): + if val is None: + return "" + if key in JSONB_FIELDS: + return json.dumps(val) + if key in ARRAY_FIELDS: + return json.dumps(val) + return val + + with open(path, "w", newline="", encoding="utf-8") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore") + writer.writeheader() + for r in resources: + writer.writerow({k: serialize(k, r.get(k)) for k in fieldnames}) + + print(f"Saved {len(resources)} resource(s) → {path}") + + +# CLI + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Sync Sharing Excess Google Calendar to Supabase resources table." + ) + parser.add_argument( + "--csv", + nargs="?", + const="events.csv", + default=None, + metavar="FILE", + help="Output to CSV instead of Supabase (default filename: events.csv).", + ) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + print(f"Fetching events for the next {LOOK_FORWARD_DAYS} day(s)...") + events = fetch_events(CALENDAR_ID, LOOK_FORWARD_DAYS) + print(f"Found {len(events)} event(s) in window. Geocoding and normalizing...") + + resources = normalize_events(events) + print(f"Normalized {len(resources)} resource(s).") + + if args.csv: + save_csv(resources, args.csv) + else: + supabase = get_supabase_client() + upsert_resources(supabase, resources) + delete_stale_resources(supabase, [r["gp_id"] for r in resources]) + + print("Done.") diff --git a/data/scrape-sharing-excess-calendar/requirements.txt b/data/scrape-sharing-excess-calendar/requirements.txt new file mode 100644 index 00000000..9c6848fd --- /dev/null +++ b/data/scrape-sharing-excess-calendar/requirements.txt @@ -0,0 +1,4 @@ +requests +icalendar +recurring_ical_events +supabase \ No newline at end of file diff --git a/supabase/functions/sync-sharing-excess/index.ts b/supabase/functions/sync-sharing-excess/index.ts new file mode 100644 index 00000000..d8a5fa96 --- /dev/null +++ b/supabase/functions/sync-sharing-excess/index.ts @@ -0,0 +1,425 @@ +// Sharing Excess Calendar Sync — Supabase Edge Function +// Fetches upcoming events from the Sharing Excess public Google Calendar, +// geocodes locations, and upserts them into the `resources` table. + +import { createClient } from "https://esm.sh/@supabase/supabase-js@2"; +import ICAL from "https://esm.sh/ical.js@2.1.0"; + +// --- Config --- + +const CALENDAR_ID = + Deno.env.get("GOOGLE_CALENDAR_ID") ?? + "c_d43974649dbbaa8699b3583c8aa847737aecda4539202c423471282eedd44bbc@group.calendar.google.com"; + +const LOOK_FORWARD_DAYS = Number(Deno.env.get("LOOK_FORWARD_DAYS") ?? "30"); +const TABLE_NAME = "resources"; +const CREATOR = "phlask-sharing-excess-sync"; +const SOURCE_URL = "https://www.sharingexcess.com/find-food"; +const NOMINATIM_ENDPOINT = "https://nominatim.openstreetmap.org/search"; +const NOMINATIM_UA = "phlask-map/2.0 (https://phlask.me)"; + +// --- Address parsing --- + +interface ParsedLocation { + address?: string; + city?: string; + state?: string; + zip_code?: string; +} + +function parseLocation(location: string): ParsedLocation { + if (!location) return {}; + + const pattern = + /^(.+),\s*([^,]+),\s*([A-Z]{2})\s+(\d{5})(?:-\d{4})?(?:,\s*USA)?$/; + const m = location.trim().match(pattern); + if (!m) return { address: location }; + + const [, streetPart, city, state, zipCode] = m; + const parts = streetPart.split(",").map((p) => p.trim()); + let address = parts[parts.length - 1]; + for (let i = parts.length - 1; i >= 0; i--) { + if (parts[i] && /^\d/.test(parts[i])) { + address = parts[i]; + break; + } + } + + return { + address, + city: city.trim(), + state: state.trim(), + zip_code: zipCode.trim(), + }; +} + +function cleanAddress(address: string): string { + let cleaned = address.replace(/(\d+)-\d+\b/g, "$1"); + cleaned = cleaned.replace(/\s*[#][\w-]+/g, ""); + cleaned = cleaned.replace( + /,?\s*\b(?:ste|suite|apt|unit|room|rm|fl|floor)\b\.?\s*\S+/gi, + "" + ); + return cleaned.trim().replace(/,$/, ""); +} + +// --- Geocoding --- + +async function geocodeQuery( + query: string +): Promise<[number, number] | null> { + try { + const url = new URL(NOMINATIM_ENDPOINT); + url.searchParams.set("q", query); + url.searchParams.set("format", "json"); + url.searchParams.set("limit", "1"); + const resp = await fetch(url.toString(), { + headers: { "User-Agent": NOMINATIM_UA }, + }); + if (!resp.ok) return null; + const results = await resp.json(); + if (results.length > 0) { + return [parseFloat(results[0].lat), parseFloat(results[0].lon)]; + } + } catch (e) { + console.error(` Geocoding failed for '${query}':`, e); + } + return null; +} + +function sleep(ms: number): Promise { + return new Promise((r) => setTimeout(r, ms)); +} + +function joinParts(parts: (string | undefined)[]): string { + return parts.filter(Boolean).join(", "); +} + +async function geocodeLocation( + location: string | null, + parsed: ParsedLocation +): Promise<[number, number] | null> { + if (!location) return null; + + // Attempt 1: full location string + let coords = await geocodeQuery(location); + + // Fallback 1: parsed street address + city/state/zip + if (!coords && parsed.address) { + await sleep(1000); + coords = await geocodeQuery( + joinParts([parsed.address, parsed.city, parsed.state, parsed.zip_code]) + ); + } + + // Fallback 2: cleaned address (simplify ranges, drop suite numbers) + if (!coords && parsed.address) { + const cleaned = cleanAddress(parsed.address); + if (cleaned !== parsed.address) { + await sleep(1000); + coords = await geocodeQuery( + joinParts([cleaned, parsed.city, parsed.state, parsed.zip_code]) + ); + } + } + + // Fallback 3: venue/landmark name + city/state + if (!coords && parsed.city && parsed.state) { + const parts = location.split(",").map((p) => p.trim()); + const venue = parts[0]; + if (venue && !/^\d/.test(venue)) { + await sleep(1000); + coords = await geocodeQuery(`${venue}, ${parsed.city}, ${parsed.state}`); + } + } + + return coords; +} + +// --- iCal fetch & recurring event expansion --- + +interface RawEvent { + uid: string; + summary: string; + start_at: string; + end_at: string | null; + description: string | null; + location: string | null; + all_day: boolean; +} + +function fetchAndExpandEvents( + icalText: string, + lookForwardDays: number +): RawEvent[] { + const jcal = ICAL.parse(icalText); + const comp = new ICAL.Component(jcal); + const vevents = comp.getAllSubcomponents("vevent"); + + const now = new Date(); + const cutoff = new Date(now.getTime() + lookForwardDays * 86400_000); + + const events: RawEvent[] = []; + + for (const vevent of vevents) { + const event = new ICAL.Event(vevent); + + if (event.isRecurring()) { + const iter = event.iterator(); + let next: ICAL.Time | null; + while ((next = iter.next())) { + const jsDate = next.toJSDate(); + if (jsDate > cutoff) break; + if (jsDate < now) continue; + + const duration = event.duration; + const endTime = next.clone(); + endTime.addDuration(duration); + + events.push({ + uid: event.uid + "_" + next.toString(), + summary: event.summary ?? "", + start_at: jsDate.toISOString(), + end_at: endTime.toJSDate().toISOString(), + description: event.description ?? null, + location: event.location ?? null, + all_day: next.isDate, + }); + } + } else { + const start = event.startDate; + const end = event.endDate; + const jsStart = start.toJSDate(); + + if (jsStart > cutoff || jsStart < now) continue; + + events.push({ + uid: event.uid ?? "", + summary: event.summary ?? "", + start_at: jsStart.toISOString(), + end_at: end ? end.toJSDate().toISOString() : null, + description: event.description ?? null, + location: event.location ?? null, + all_day: start.isDate, + }); + } + } + + events.sort((a, b) => a.start_at.localeCompare(b.start_at)); + return events; +} + +// --- Normalization --- + +function parseEventDt(isoStr: string): Date { + return new Date(isoStr); +} + +function buildHours(startDt: Date, endDt: Date | null) { + function timePoint(dt: Date, truncated = false) { + // Google day numbering: 0=Sun…6=Sat. JS getUTCDay() is the same. + return { + date: dt.toISOString().slice(0, 10), + truncated, + day: dt.getUTCDay(), + hour: dt.getUTCHours(), + minute: dt.getUTCMinutes(), + }; + } + return [ + { + open: timePoint(startDt), + close: endDt ? timePoint(endDt) : timePoint(startDt, true), + }, + ]; +} + +function buildDescription( + original: string | null, + startIso: string, + endIso: string | null +): string { + const endPart = endIso ? ` | end: ${endIso}` : ""; + const header = `[[ start: ${startIso}${endPart} ]]`; + return original ? `${header}\n${original}` : header; +} + +// deno-lint-ignore no-explicit-any +async function eventToResource(event: RawEvent): Promise | null> { + const parsed = parseLocation(event.location ?? ""); + const coords = await geocodeLocation(event.location, parsed); + + if (!coords) { + console.warn( + ` Skipping '${event.summary}' — could not geocode: '${event.location}'` + ); + return null; + } + + const [lat, lon] = coords; + await sleep(1000); // Nominatim rate limit + const nowIso = new Date().toISOString(); + const startDt = parseEventDt(event.start_at); + const endDt = event.end_at ? parseEventDt(event.end_at) : null; + + return { + version: 1, + creator: CREATOR, + last_modifier: CREATOR, + date_created: nowIso, + last_modified: nowIso, + source: { type: "WEB_SCRAPE", url: SOURCE_URL }, + verification: { + verified: false, + last_modified: nowIso, + last_modifier: CREATOR, + }, + resource_type: "FOOD", + status: "OPERATIONAL", + entry_type: "UNSURE", + name: event.summary, + description: buildDescription( + event.description, + event.start_at, + event.end_at + ), + address: parsed.address ?? null, + city: parsed.city ?? null, + state: parsed.state ?? null, + zip_code: parsed.zip_code ?? null, + latitude: lat, + longitude: lon, + gp_id: event.uid, + food: { + food_type: [], + distribution_type: ["PICKUP"], + organization_type: "NON_PROFIT", + organization_name: "Sharing Excess", + organization_url: SOURCE_URL, + tags: [], + }, + hours: buildHours(startDt, endDt), + images: null, + guidelines: null, + water: null, + forage: null, + bathroom: null, + }; +} + +// --- Supabase helpers --- + +// deno-lint-ignore no-explicit-any +async function upsertResources(supabase: any, resources: Record[]) { + if (!resources.length) { + console.log("No resources to upsert."); + return; + } + + const gpIds = resources.map((r) => r.gp_id); + const { data: existing } = await supabase + .from(TABLE_NAME) + .select("id, gp_id, date_created") + .in("gp_id", gpIds); + + // deno-lint-ignore no-explicit-any + const existingMap = new Map((existing ?? []).map((r: any) => [r.gp_id, r])); + + const toInsert = []; + const toUpdate = []; + + for (const r of resources) { + // deno-lint-ignore no-explicit-any + const prev = existingMap.get(r.gp_id) as any; + if (prev) { + toUpdate.push({ id: prev.id, row: { ...r, date_created: prev.date_created } }); + } else { + toInsert.push(r); + } + } + + if (toInsert.length) { + await supabase.from(TABLE_NAME).insert(toInsert); + console.log(`Inserted ${toInsert.length} new resource(s).`); + } + + for (const { id, row } of toUpdate) { + await supabase.from(TABLE_NAME).update(row).eq("id", id); + } + if (toUpdate.length) { + console.log(`Updated ${toUpdate.length} existing resource(s).`); + } +} + +// deno-lint-ignore no-explicit-any +async function deleteStaleResources(supabase: any, currentGpIds: string[]) { + if (!currentGpIds.length) return; + await supabase + .from(TABLE_NAME) + .delete() + .filter("source->>url", "eq", SOURCE_URL) + .not("gp_id", "in", `(${currentGpIds.join(",")})`); + console.log("Removed stale Sharing Excess resources outside the current window."); +} + +// --- Handler --- + +Deno.serve(async (req) => { + // Optional: protect with a shared secret + const authHeader = req.headers.get("Authorization"); + const expectedToken = Deno.env.get("SYNC_SECRET"); + if (expectedToken && authHeader !== `Bearer ${expectedToken}`) { + return new Response("Unauthorized", { status: 401 }); + } + + try { + const supabase = createClient( + Deno.env.get("SUPABASE_URL")!, + Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")! + ); + + // Fetch iCal + const icalUrl = `https://calendar.google.com/calendar/ical/${encodeURIComponent(CALENDAR_ID)}/public/basic.ics`; + console.log(`Fetching calendar from ${icalUrl}...`); + const icalResp = await fetch(icalUrl); + if (!icalResp.ok) { + throw new Error(`Failed to fetch calendar: ${icalResp.status}`); + } + const icalText = await icalResp.text(); + + // Parse & expand recurring events + const events = fetchAndExpandEvents(icalText, LOOK_FORWARD_DAYS); + console.log(`Found ${events.length} event(s) in window. Geocoding...`); + + // Normalize + const resources = []; + for (const event of events) { + console.log(` Processing: ${event.summary}`); + const resource = await eventToResource(event); + if (resource) resources.push(resource); + } + console.log(`Normalized ${resources.length} resource(s).`); + + // Upsert & clean up + await upsertResources(supabase, resources); + await deleteStaleResources( + supabase, + resources.map((r) => r.gp_id) + ); + + return new Response( + JSON.stringify({ + ok: true, + fetched: events.length, + synced: resources.length, + }), + { headers: { "Content-Type": "application/json" } } + ); + } catch (err) { + console.error("Sync failed:", err); + return new Response( + JSON.stringify({ ok: false, error: String(err) }), + { status: 500, headers: { "Content-Type": "application/json" } } + ); + } +});