-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
68 lines (61 loc) · 2.27 KB
/
Copy pathconfig.py
File metadata and controls
68 lines (61 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""Configuration settings for Marktplaats scraper."""
import os
# ScrapingAnt API settings
SCRAPINGANT_API_KEY = os.environ.get("SCRAPINGANT_API_KEY", "")
SCRAPINGANT_BASE_URL = "https://api.scrapingant.com/v2/general"
# Marktplaats base URL
MARKTPLAATS_BASE_URL = "https://www.marktplaats.nl"
# Request configuration
REQUEST_CONFIG = {
"timeout": 120000, # 2 minutes in milliseconds
"retry_count": 3,
"retry_delay": 5, # seconds
}
# Available categories for scraping
# Format: "slug": "Display Name"
CATEGORIES = {
# Electronics
"computers-en-software": "Computers en Software",
"computers-en-software/windows-laptops": "Windows Laptops",
"computers-en-software/apple-macbook": "Apple MacBook",
"computers-en-software/monitoren": "Monitoren",
"computers-en-software/tablets": "Tablets",
# Audio & TV
"audio-tv-en-foto": "Audio, TV en Foto",
"audio-tv-en-foto/televisies": "Televisies",
"audio-tv-en-foto/luidsprekers": "Luidsprekers",
"audio-tv-en-foto/koptelefoons": "Koptelefoons",
# Phones
"telecommunicatie/mobiele-telefoons": "Mobiele Telefoons",
"telecommunicatie/mobiele-telefoons-apple-iphone": "Apple iPhone",
"telecommunicatie/mobiele-telefoons-samsung": "Samsung Phones",
# Gaming
"spelcomputers-en-games": "Spelcomputers en Games",
"spelcomputers-en-games/playstation": "PlayStation",
"spelcomputers-en-games/xbox": "Xbox",
"spelcomputers-en-games/nintendo": "Nintendo",
# Home & Garden
"huis-en-inrichting": "Huis en Inrichting",
"huis-en-inrichting/meubels": "Meubels",
"tuin-en-terras": "Tuin en Terras",
# Vehicles
"fietsen-en-brommers": "Fietsen en Brommers",
"fietsen-en-brommers/fietsen-heren": "Heren Fietsen",
"fietsen-en-brommers/fietsen-dames": "Dames Fietsen",
# Fashion
"kleding-dames": "Kleding Dames",
"kleding-heren": "Kleding Heren",
"sieraden-tassen-en-uiterlijk": "Sieraden, Tassen en Uiterlijk",
}
# Default categories to scrape if none specified
DEFAULT_CATEGORIES = [
"computers-en-software/windows-laptops",
"telecommunicatie/mobiele-telefoons-apple-iphone",
]
# CSS Selectors
SELECTORS = {
"listing_container": ".hz-Listing",
"listing_link": 'a[href*="/v/"]',
"title": "h3",
"wait_for_content": ".hz-Listing",
}