Examples¶
Following are some complete examples to demonstrate some of the features of aiohttp-client-cache. These can also be found in the examples/ folder on GitHub.
Expiration based on URL patterns¶
An example of setting expiration based on URL Patterns
Example code
#!/usr/bin/env python3
"""
import asyncio
from datetime import timedelta
from aiohttp_client_cache import CachedSession, SQLiteBackend
default_expire_after = 60 * 60 # By default, cached responses expire in an hour
urls_expire_after = {
'httpbin.org/image': timedelta(days=7), # Requests for this base URL will expire in a week
'*.fillmurray.com': -1, # Requests matching this pattern will never expire
}
urls = [
'https://httpbin.org/get', # Will expire in an hour
'https://httpbin.org/image/jpeg', # Will expire in a week
'http://www.fillmurray.com/460/300', # Will never expire
]
async def main():
cache = SQLiteBackend(
cache_name='~/.cache/aiohttp-requests.db',
expire_after=default_expire_after,
urls_expire_after=urls_expire_after,
)
async with CachedSession(cache=cache) as session:
tasks = [asyncio.create_task(session.get(url)) for url in urls]
return await asyncio.gather(*tasks)
if __name__ == "__main__":
original_responses = asyncio.run(main())
cached_responses = asyncio.run(main())
for response in cached_responses:
expires = response.expires.isoformat() if response.expires else 'Never'
print(f'{response.url}: {expires}')
Precaching site links¶
An example that fetches and caches the content of a given web page, and all links found on that page
Usage: ./precache.py <url>
Example:
$ # Run twice and note stats before and after
$ ./precache.py https://www.nytimes.com
Found 102 links
Completed run in 6.195 seconds and cached 53.570 MB
$ ./precache.py https://www.nytimes.com
Found 102 links
Completed run in 0.436 seconds and cached 0.000 MB
Example code
#!/usr/bin/env python3
import asyncio
import re
import sys
import time
import urllib.parse
from contextlib import contextmanager
from os.path import getsize
from aiohttp_client_cache import CachedSession, SQLiteBackend
CACHE_NAME = 'precache'
DEFAULT_URL = 'https://www.nytimes.com'
HREF_PATTERN = re.compile(r'href="(.*?)"')
async def precache_page_links(parent_url):
"""Fetch and cache the content of a given web page and all links found on that page"""
async with CachedSession(cache=SQLiteBackend()) as session:
urls = await get_page_links(session, parent_url)
tasks = [asyncio.create_task(cache_url(session, url)) for url in urls]
responses = await asyncio.gather(*tasks)
return responses
async def get_page_links(session, url):
"""Get all links found in the HTML of the given web page"""
print(f'Finding all links on page: {url}')
links = set()
response = await session.get(url)
response.raise_for_status()
html = await response.text()
for link in HREF_PATTERN.findall(html):
try:
links.add(urllib.parse.urljoin(url, link))
except Exception as e:
print(f'Failed to add link: {link}')
print(e)
print(f'Found {len(links)} links')
return links
async def cache_url(session, url):
try:
return await session.get(url)
except Exception as e:
print(e)
return None
def get_cache_bytes():
"""Get the current size of the cache, in bytes"""
try:
return getsize(f'{CACHE_NAME}.sqlite')
except Exception:
return 0
@contextmanager
def measure_cache():
"""Measure time elapsed and size of added cache content"""
start_time = time.perf_counter()
start_bytes = get_cache_bytes()
yield
elapsed_time = time.perf_counter() - start_time
cached_bytes = (get_cache_bytes() - start_bytes) / 1024 / 1024
print(f'Completed run in {elapsed_time:0.3f} seconds and cached {cached_bytes:0.3f} MB')
if __name__ == "__main__":
parent_url = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_URL
with measure_cache():
asyncio.run(precache_page_links(parent_url))
Logging requests¶
An example of testing the cache to prove that it’s not making more requests than expected.
Example code
#!/usr/bin/env python3
import asyncio
from contextlib import asynccontextmanager
from logging import basicConfig, getLogger
from unittest.mock import patch
from aiohttp import ClientSession
from aiohttp_client_cache import CachedResponse, CachedSession, SQLiteBackend
basicConfig(level='INFO')
logger = getLogger('aiohttp_client_cache.examples')
# Uncomment for more verbose debug output
# getLogger('aiohttp_client_cache').setLevel('DEBUG')
@asynccontextmanager
async def log_requests():
"""Context manager that mocks and logs all non-cached requests"""
async def mock_response(*args, **kwargs):
return CachedResponse('GET', 'url', 200, 'url', None)
with patch.object(ClientSession, '_request', side_effect=mock_response) as mock_request:
async with CachedSession(cache=SQLiteBackend('cache-test.sqlite')) as session:
await session.cache.clear()
yield session
cached_responses = [v async for v in session.cache.responses.values()]
logger.debug('All calls to ClientSession._request():')
logger.debug(mock_request.mock_calls)
logger.info(f'Responses cached: {len(cached_responses)}')
logger.info(f'Requests sent: {mock_request.call_count}')
async def main():
"""Example usage; replace with any other requests you want to test"""
async with log_requests() as session:
for i in range(10):
response = await session.get('http://httpbin.org/get')
logger.debug(f'Response {i}: {type(response).__name__}')
if __name__ == '__main__':
asyncio.run(main())