Examples¶
Following are some complete examples to demonstrate some of the features of aiohttp-client-cache. These can also be found in the examples/ folder on GitHub.
Expiration based on URL patterns¶
An example of setting expiration based on URL Patterns
Example code
import asyncio
from datetime import timedelta
from aiohttp_client_cache import CachedSession, SQLiteBackend
default_expire_after = 60 * 60 # By default, cached responses expire in an hour
urls_expire_after = {
'httpbin.org/image': timedelta(days=7), # Requests for this base URL will expire in a week
'*.fillmurray.com': -1, # Requests matching this pattern will never expire
}
urls = [
'https://httpbin.org/get', # Will expire in an hour
'https://httpbin.org/image/jpeg', # Will expire in a week
'http://www.fillmurray.com/460/300', # Will never expire
]
async def main():
cache = SQLiteBackend(
cache_name='~/.cache/aiohttp-requests.db',
expire_after=default_expire_after,
urls_expire_after=urls_expire_after,
)
async with CachedSession(cache=cache) as session:
tasks = [asyncio.create_task(session.get(url)) for url in urls]
return await asyncio.gather(*tasks)
if __name__ == "__main__":
original_responses = asyncio.run(main())
cached_responses = asyncio.run(main())
for response in cached_responses:
expires = response.expires.isoformat() if response.expires else 'Never'
print(f'{response.url}: {expires}')
Precaching site links¶
An example that fetches and caches the content of a given web page, and all links found on that page
Usage: ./precache.py <url>
Example:
$ # Run twice and note stats before and after
$ ./precache.py https://www.nytimes.com
Found 102 links
Completed run in 6.195 seconds and cached 53.570 MB
$ ./precache.py https://www.nytimes.com
Found 102 links
Completed run in 0.436 seconds and cached 0.000 MB
Example code
import asyncio
import re
import sys
import time
import urllib.parse
from contextlib import contextmanager
from os.path import getsize
from aiohttp_client_cache import CachedSession, SQLiteBackend
CACHE_NAME = 'precache'
DEFAULT_URL = 'https://www.nytimes.com'
HREF_PATTERN = re.compile(r'href="(.*?)"')
async def precache_page_links(parent_url):
"""Fetch and cache the content of a given web page and all links found on that page"""
async with CachedSession(cache=SQLiteBackend()) as session:
urls = await get_page_links(session, parent_url)
tasks = [asyncio.create_task(cache_url(session, url)) for url in urls]
responses = await asyncio.gather(*tasks)
return responses
async def get_page_links(session, url):
"""Get all links found in the HTML of the given web page"""
print(f'Finding all links on page: {url}')
links = set()
response = await session.get(url)
response.raise_for_status()
html = await response.text()
for link in HREF_PATTERN.findall(html):
try:
links.add(urllib.parse.urljoin(url, link))
except Exception as e:
print(f'Failed to add link: {link}')
print(e)
print(f'Found {len(links)} links')
return links
async def cache_url(session, url):
try:
return await session.get(url)
except Exception as e:
print(e)
return None
def get_cache_bytes():
"""Get the current size of the cache, in bytes"""
try:
return getsize(f'{CACHE_NAME}.sqlite')
except Exception:
return 0
@contextmanager
def measure_cache():
"""Measure time elapsed and size of added cache content"""
start_time = time.perf_counter()
start_bytes = get_cache_bytes()
yield
elapsed_time = time.perf_counter() - start_time
cached_bytes = (get_cache_bytes() - start_bytes) / 1024 / 1024
print(f'Completed run in {elapsed_time:0.3f} seconds and cached {cached_bytes:0.3f} MB')
if __name__ == '__main__':
parent_url = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_URL
with measure_cache():
asyncio.run(precache_page_links(parent_url))
Logging requests¶
An example of testing the cache to prove that it’s not making more requests than expected.
Example code
import asyncio
from contextlib import asynccontextmanager
from logging import basicConfig, getLogger
from unittest.mock import patch
from aiohttp import ClientSession
from aiohttp_client_cache import CachedResponse, CachedSession, SQLiteBackend
basicConfig(level='INFO')
logger = getLogger('aiohttp_client_cache.examples')
# Uncomment for more verbose debug output
# getLogger('aiohttp_client_cache').setLevel('DEBUG')
@asynccontextmanager
async def log_requests():
"""Context manager that mocks and logs all non-cached requests"""
async def mock_response(*args, **kwargs):
return CachedResponse(method='GET', reason='OK', status=200, url='url', version='1.1')
with patch.object(ClientSession, '_request', side_effect=mock_response) as mock_request:
async with CachedSession(cache=SQLiteBackend('cache-test.sqlite')) as session:
await session.cache.clear()
yield session
cached_responses = [v async for v in session.cache.responses.values()]
logger.debug('All calls to ClientSession._request():')
logger.debug(mock_request.mock_calls)
logger.info(f'Responses cached: {len(cached_responses)}')
logger.info(f'Requests sent: {mock_request.call_count}')
async def main():
"""Example usage; replace with any other requests you want to test"""
async with log_requests() as session:
for i in range(10):
response = await session.get('http://httpbin.org/get')
logger.debug(f'Response {i}: {type(response).__name__}')
if __name__ == '__main__':
asyncio.run(main())