Examples

Following are some complete examples to demonstrate some of the features of aiohttp-client-cache. These can also be found in the examples/ folder on GitHub.

Expiration based on URL patterns

An example of setting expiration based on URL Patterns

Example code

#!/usr/bin/env python3
"""
import asyncio
from datetime import timedelta

from aiohttp_client_cache import CachedSession, SQLiteBackend

default_expire_after = 60 * 60               # By default, cached responses expire in an hour
urls_expire_after = {
    'httpbin.org/image': timedelta(days=7),  # Requests for this base URL will expire in a week
    '*.fillmurray.com': -1,                  # Requests matching this pattern will never expire
}
urls = [
    'https://httpbin.org/get',               # Will expire in an hour
    'https://httpbin.org/image/jpeg',        # Will expire in a week
    'http://www.fillmurray.com/460/300',     # Will never expire
]


async def main():
    cache = SQLiteBackend(
        cache_name='~/.cache/aiohttp-requests.db',
        expire_after=default_expire_after,
        urls_expire_after=urls_expire_after,
    )

    async with CachedSession(cache=cache) as session:
        tasks = [asyncio.create_task(session.get(url)) for url in urls]
        return await asyncio.gather(*tasks)


if __name__ == "__main__":
    original_responses = asyncio.run(main())
    cached_responses = asyncio.run(main())
    for response in cached_responses:
        expires = response.expires.isoformat() if response.expires else 'Never'
        print(f'{response.url}: {expires}')

An example that fetches and caches the content of a given web page, and all links found on that page

Usage: ./precache.py <url>

Example:

$ # Run twice and note stats before and after
$ ./precache.py https://www.nytimes.com
Found 102 links
Completed run in 6.195 seconds and cached 53.570 MB
$ ./precache.py https://www.nytimes.com
Found 102 links
Completed run in 0.436 seconds and cached 0.000 MB

Example code

#!/usr/bin/env python3
import asyncio
import re
import sys
import time
import urllib.parse
from contextlib import contextmanager
from os.path import getsize

from aiohttp_client_cache import CachedSession, SQLiteBackend

CACHE_NAME = 'precache'
DEFAULT_URL = 'https://www.nytimes.com'
HREF_PATTERN = re.compile(r'href="(.*?)"')


async def precache_page_links(parent_url):
    """Fetch and cache the content of a given web page and all links found on that page"""
    async with CachedSession(cache=SQLiteBackend()) as session:
        urls = await get_page_links(session, parent_url)

        tasks = [asyncio.create_task(cache_url(session, url)) for url in urls]
        responses = await asyncio.gather(*tasks)

    return responses


async def get_page_links(session, url):
    """Get all links found in the HTML of the given web page"""
    print(f'Finding all links on page: {url}')
    links = set()
    response = await session.get(url)
    response.raise_for_status()
    html = await response.text()

    for link in HREF_PATTERN.findall(html):
        try:
            links.add(urllib.parse.urljoin(url, link))
        except Exception as e:
            print(f'Failed to add link: {link}')
            print(e)

    print(f'Found {len(links)} links')
    return links


async def cache_url(session, url):
    try:
        return await session.get(url)
    except Exception as e:
        print(e)
        return None


def get_cache_bytes():
    """Get the current size of the cache, in bytes"""
    try:
        return getsize(f'{CACHE_NAME}.sqlite')
    except Exception:
        return 0


@contextmanager
def measure_cache():
    """Measure time elapsed and size of added cache content"""
    start_time = time.perf_counter()
    start_bytes = get_cache_bytes()
    yield

    elapsed_time = time.perf_counter() - start_time
    cached_bytes = (get_cache_bytes() - start_bytes) / 1024 / 1024
    print(f'Completed run in {elapsed_time:0.3f} seconds and cached {cached_bytes:0.3f} MB')


if __name__ == "__main__":
    parent_url = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_URL
    with measure_cache():
        asyncio.run(precache_page_links(parent_url))

Logging requests

An example of testing the cache to prove that it’s not making more requests than expected.

Example code

#!/usr/bin/env python3
import asyncio
from contextlib import asynccontextmanager
from logging import basicConfig, getLogger
from unittest.mock import patch

from aiohttp import ClientSession

from aiohttp_client_cache import CachedResponse, CachedSession, SQLiteBackend

basicConfig(level='INFO')
logger = getLogger('aiohttp_client_cache.examples')
# Uncomment for more verbose debug output
# getLogger('aiohttp_client_cache').setLevel('DEBUG')


@asynccontextmanager
async def log_requests():
    """Context manager that mocks and logs all non-cached requests"""

    async def mock_response(*args, **kwargs):
        return CachedResponse('GET', 'url', 200, 'url', None)

    with patch.object(ClientSession, '_request', side_effect=mock_response) as mock_request:
        async with CachedSession(cache=SQLiteBackend('cache-test.sqlite')) as session:
            await session.cache.clear()
            yield session
            cached_responses = [v async for v in session.cache.responses.values()]

    logger.debug('All calls to ClientSession._request():')
    logger.debug(mock_request.mock_calls)

    logger.info(f'Responses cached: {len(cached_responses)}')
    logger.info(f'Requests sent: {mock_request.call_count}')


async def main():
    """Example usage; replace with any other requests you want to test"""
    async with log_requests() as session:
        for i in range(10):
            response = await session.get('http://httpbin.org/get')
            logger.debug(f'Response {i}: {type(response).__name__}')


if __name__ == '__main__':
    asyncio.run(main())