-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
118 lines (99 loc) · 4.35 KB
/
main.py
File metadata and controls
118 lines (99 loc) · 4.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""Apartments.com Listings Scraper — multifamily-market research demo.
Use case: scrape every rental listing on the first two pages of the Brooklyn,
NY apartments.com search, then pull full property details (90+ fields each)
for the first 15 hits — rent ranges, walk score, amenities, pet policy.
Run:
pip install -r requirements.txt
cp .env.example .env
python main.py
Actor: https://apify.com/pro100chok/apartments-scraper-usage?utm_source=github_pro100&utm_medium=readme&utm_campaign=apartments-com-listings-scraper-python
"""
from __future__ import annotations
import csv
import json
import os
import sys
from pathlib import Path
from apify_client import ApifyClient
from apify_client.errors import ApifyApiError
from dotenv import load_dotenv
SEARCH_URL = "https://www.apartments.com/apartments/brooklyn-ny/"
MAX_PAGES = 2
MAX_DETAILS = 15
ACTOR_ID = "pro100chok/apartments-scraper-usage"
def main() -> int:
load_dotenv()
token = os.environ.get("APIFY_API_TOKEN")
if not token:
sys.exit("APIFY_API_TOKEN missing — copy .env.example to .env first.")
client = ApifyClient(token)
print(f"Step 1/2 — searching apartments.com pages 1–{MAX_PAGES} for {SEARCH_URL}")
try:
search_run = client.actor(ACTOR_ID).call(
run_input={
"action": "search",
"startUrls": [{"url": SEARCH_URL}],
"maxPages": MAX_PAGES,
"proxyConfiguration": {"useApifyProxy": True,
"apifyProxyGroups": ["RESIDENTIAL"],
"apifyProxyCountry": "US"},
},
timeout_secs=900,
)
except ApifyApiError as exc:
sys.exit(f"Search step failed: {exc}")
listings = list(client.dataset(search_run["defaultDatasetId"]).iterate_items())
print(f" found {len(listings)} listings")
if not listings:
sys.exit("No listings — search returned empty (try a different startUrl).")
listing_urls = [{"url": l["url"]} for l in listings if l.get("url")][:MAX_DETAILS]
print(f"\nStep 2/2 — fetching full details for first {len(listing_urls)} listings")
try:
details_run = client.actor(ACTOR_ID).call(
run_input={
"action": "details",
"listingUrls": listing_urls,
"maxItems": MAX_DETAILS,
"proxyConfiguration": {"useApifyProxy": True,
"apifyProxyGroups": ["RESIDENTIAL"],
"apifyProxyCountry": "US"},
},
timeout_secs=900,
)
except ApifyApiError as exc:
sys.exit(f"Details step failed: {exc}")
items = list(client.dataset(details_run["defaultDatasetId"]).iterate_items())
out = Path(__file__).parent
(out / "output.json").write_text(json.dumps(items, indent=2, ensure_ascii=False), encoding="utf-8")
rows = [
{
"name": it.get("name"),
"address": (it.get("address") or {}).get("fullAddress") or it.get("address"),
"min_rent": (it.get("rent") or {}).get("min"),
"max_rent": (it.get("rent") or {}).get("max"),
"beds": (it.get("beds") or it.get("bedrooms") or {}).get("min") if isinstance(it.get("beds"), dict) else it.get("beds"),
"baths": it.get("baths"),
"sqft_min": (it.get("sqft") or {}).get("min") if isinstance(it.get("sqft"), dict) else it.get("sqft"),
"walk_score": (it.get("scores") or {}).get("walk"),
"transit_score": (it.get("scores") or {}).get("transit"),
"pet_friendly": it.get("petsAllowed"),
"url": it.get("url"),
}
for it in items
]
if rows:
with (out / "output.csv").open("w", newline="", encoding="utf-8") as fh:
w = csv.DictWriter(fh, fieldnames=list(rows[0].keys()))
w.writeheader()
w.writerows(rows)
print(f"\nSaved {len(items)} property records → output.json + output.csv\n")
print(f"{'Property':<35}{'Rent':>16}{'Walk':>6}")
print("-" * 60)
for r in rows:
name = (r['name'] or '?')[:33]
rent = f"${r['min_rent']}–${r['max_rent']}" if r['min_rent'] else "—"
walk = str(r['walk_score'] or '—')
print(f"{name:<35}{rent:>16}{walk:>6}")
return 0
if __name__ == "__main__":
raise SystemExit(main())