Best Practices
Rate limiting and error handling best practices for using proxies effectively.
On this page
Rate Limiting and Request Management
Proper rate limiting is crucial for maintaining good relationships with target websites and avoiding blocks.
Understanding Rate Limits
Different websites have different tolerance levels:
- •E-commerce sites: 1-5 requests per second
- •Social media platforms: 0.5-2 requests per second
- •News sites: 2-10 requests per second
- •APIs: Varies widely, check documentation
Implementation Strategies
python
import time
from datetime import datetime, timedelta
class RateLimiter:
def __init__(self, max_requests, time_window):
self.max_requests = max_requests
self.time_window = time_window
self.requests = []
def wait_if_needed(self):
now = datetime.now()
# Remove old requests outside the time window
self.requests = [req_time for req_time in self.requests
if now - req_time < timedelta(seconds=self.time_window)]
if len(self.requests) >= self.max_requests:
sleep_time = self.time_window - (now - self.requests[0]).total_seconds()
if sleep_time > 0:
time.sleep(sleep_time)
self.requests.append(now)
# Usage
rate_limiter = RateLimiter(max_requests=5, time_window=10) # 5 requests per 10 seconds
for url in urls:
rate_limiter.wait_if_needed()
response = requests.get(url, proxies=proxies)
# Process responsepython
import time
import random
from collections import defaultdict
from datetime import datetime, timedelta
class AdvancedRateLimiter:
def __init__(self, proxies, requests_per_proxy_per_minute=30):
self.proxies = proxies
self.max_requests = requests_per_proxy_per_minute
self.request_history = defaultdict(list)
def get_proxy_with_rate_limit(self):
now = datetime.now()
minute_ago = now - timedelta(minutes=1)
# Find a proxy that hasn't exceeded rate limit
for proxy in self.proxies:
# Clean old requests
self.request_history[proxy] = [
req_time for req_time in self.request_history[proxy]
if req_time > minute_ago
]
# Check if proxy can be used
if len(self.request_history[proxy]) < self.max_requests:
self.request_history[proxy].append(now)
return proxy
# All proxies at limit, wait a bit
time.sleep(2)
return self.get_proxy_with_rate_limit()
# Usage
proxies_list = [
{'http': 'http://user1:pass1@proxy1:port1'},
{'http': 'http://user2:pass2@proxy2:port2'},
{'http': 'http://user3:pass3@proxy3:port3'},
]
rate_limiter = AdvancedRateLimiter(proxies_list)
for url in urls:
proxy = rate_limiter.get_proxy_with_rate_limit()
response = requests.get(url, proxies=proxy, timeout=10)Best Practices for Rate Limiting
- •Start Slow: Begin with conservative limits and gradually increase
- •Add Randomization: Use random delays between requests (0.5-2 seconds)
- •Monitor Response Codes: 429 means you're going too fast
- •Respect robots.txt: Always check and honor crawl delays
- •Use Multiple Proxies: Distribute load across different IPs
- •Implement Backoff: If blocked, wait progressively longer before retrying
Robust Error Handling
Proper error handling is essential for building reliable proxy-based applications.
Common Proxy Errors
Comprehensive Error Handling
python
import requests
import time
from requests.exceptions import (
ProxyError, SSLError, ConnectTimeout,
ReadTimeout, ConnectionError, RequestException
)
class ProxyErrorHandler:
def __init__(self, max_retries=3, retry_delay=5):
self.max_retries = max_retries
self.retry_delay = retry_delay
self.logger = self.setup_logger()
def setup_logger(self):
import logging
logging.basicConfig(level=logging.INFO)
return logging.getLogger(__name__)
def make_request_with_retry(self, url, proxies):
last_exception = None
for attempt in range(self.max_retries):
try:
response = requests.get(
url,
proxies=proxies,
timeout=(10, 30), # (connection timeout, read timeout)
verify=True,
allow_redirects=True
)
# Check status code and handle accordingly
if response.status_code == 200:
return response
if response.status_code == 429:
// Logger statement removed
time.sleep(self.retry_delay * (attempt + 1))
continue
elif response.status_code == 403:
// Logger statement removed
# Switch to different proxy here
continue
elif response.status_code in [502, 503, 504]:
// Logger statement removed
time.sleep(self.retry_delay)
continue
elif response.status_code >= 400:
// Logger statement removed
return None
return response
except ConnectTimeout:
// Logger statement removed
last_exception = "Connection timeout"
except ReadTimeout:
// Logger statement removed
last_exception = "Read timeout"
except ProxyError as e:
// Logger statement removed
last_exception = f"Proxy error: {e}"
# Switch proxy here
except SSLError as e:
// Logger statement removed
last_exception = f"SSL error: {e}"
except RequestException as e:
// Logger statement removed
last_exception = f"Request error: {e}"
# Wait before retry
if attempt < self.max_retries - 1:
time.sleep(self.retry_delay * (attempt + 1))
// Logger statement removed
return None
# Usage
error_handler = ProxyErrorHandler(max_retries=5, retry_delay=3)
def safe_request(url, proxies):
return error_handler.make_request_with_retry(url, proxies)Found an issue? Let us know on Discord
Go to Dashboard