Examples#

Following are some complete examples to demonstrate some of the features of aiohttp-client-cache. These can also be found in the examples/ folder on GitHub.

Expiration based on URL patterns#

An example of setting expiration based on URL Patterns

Example code

import asyncio
from datetime import timedelta

from aiohttp_client_cache import CachedSession, SQLiteBackend

default_expire_after = 60 * 60               # By default, cached responses expire in an hour
urls_expire_after = {
    'httpbin.org/image': timedelta(days=7),  # Requests for this base URL will expire in a week
    '*.fillmurray.com': -1,                  # Requests matching this pattern will never expire
}
urls = [
    'https://httpbin.org/get',               # Will expire in an hour
    'https://httpbin.org/image/jpeg',        # Will expire in a week
    'http://www.fillmurray.com/460/300',     # Will never expire
]


async def main():
    cache = SQLiteBackend(
        cache_name='~/.cache/aiohttp-requests.db',
        expire_after=default_expire_after,
        urls_expire_after=urls_expire_after,
    )

    async with CachedSession(cache=cache) as session:
        tasks = [asyncio.create_task(session.get(url)) for url in urls]
        return await asyncio.gather(*tasks)


if __name__ == "__main__":
    original_responses = asyncio.run(main())
    cached_responses = asyncio.run(main())
    for response in cached_responses:
        expires = response.expires.isoformat() if response.expires else 'Never'
        print(f'{response.url}: {expires}')

Precaching site links#

An example that fetches and caches the content of a given web page, and all links found on that page

Usage: ./precache.py <url>

Example:

$ # Run twice and note stats before and after
$ ./precache.py https://www.nytimes.com
Found 102 links
Completed run in 6.195 seconds and cached 53.570 MB
$ ./precache.py https://www.nytimes.com
Found 102 links
Completed run in 0.436 seconds and cached 0.000 MB

Example code

import asyncio
import re
import sys
import time
import urllib.parse
from contextlib import contextmanager
from os.path import getsize

from aiohttp_client_cache import CachedSession, SQLiteBackend

CACHE_NAME = 'precache'
DEFAULT_URL = 'https://www.nytimes.com'
HREF_PATTERN = re.compile(r'href="(.*?)"')


async def precache_page_links(parent_url):
    """Fetch and cache the content of a given web page and all links found on that page"""
    async with CachedSession(cache=SQLiteBackend()) as session:
        urls = await get_page_links(session, parent_url)

        tasks = [asyncio.create_task(cache_url(session, url)) for url in urls]
        responses = await asyncio.gather(*tasks)

    return responses


async def get_page_links(session, url):
    """Get all links found in the HTML of the given web page"""
    print(f'Finding all links on page: {url}')
    links = set()
    response = await session.get(url)
    response.raise_for_status()
    html = await response.text()

    for link in HREF_PATTERN.findall(html):
        try:
            links.add(urllib.parse.urljoin(url, link))
        except Exception as e:
            print(f'Failed to add link: {link}')
            print(e)

    print(f'Found {len(links)} links')
    return links


async def cache_url(session, url):
    try:
        return await session.get(url)
    except Exception as e:
        print(e)
        return None


def get_cache_bytes():
    """Get the current size of the cache, in bytes"""
    try:
        return getsize(f'{CACHE_NAME}.sqlite')
    except Exception:
        return 0


@contextmanager
def measure_cache():
    """Measure time elapsed and size of added cache content"""
    start_time = time.perf_counter()
    start_bytes = get_cache_bytes()
    yield

    elapsed_time = time.perf_counter() - start_time
    cached_bytes = (get_cache_bytes() - start_bytes) / 1024 / 1024
    print(f'Completed run in {elapsed_time:0.3f} seconds and cached {cached_bytes:0.3f} MB')


if __name__ == '__main__':
    parent_url = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_URL
    with measure_cache():
        asyncio.run(precache_page_links(parent_url))

Logging requests#

An example of testing the cache to prove that it’s not making more requests than expected.

Example code

import asyncio
from contextlib import asynccontextmanager
from logging import basicConfig, getLogger
from unittest.mock import patch

from aiohttp import ClientSession

from aiohttp_client_cache import CachedResponse, CachedSession, SQLiteBackend

basicConfig(level='INFO')
logger = getLogger('aiohttp_client_cache.examples')
# Uncomment for more verbose debug output
# getLogger('aiohttp_client_cache').setLevel('DEBUG')


@asynccontextmanager
async def log_requests():
    """Context manager that mocks and logs all non-cached requests"""

    async def mock_response(*args, **kwargs):
        return CachedResponse(method='GET', reason='OK', status=200, url='url', version='1.1')

    with patch.object(ClientSession, '_request', side_effect=mock_response) as mock_request:
        async with CachedSession(cache=SQLiteBackend('cache-test.sqlite')) as session:
            await session.cache.clear()
            yield session
            cached_responses = [v async for v in session.cache.responses.values()]

    logger.debug('All calls to ClientSession._request():')
    logger.debug(mock_request.mock_calls)

    logger.info(f'Responses cached: {len(cached_responses)}')
    logger.info(f'Requests sent: {mock_request.call_count}')


async def main():
    """Example usage; replace with any other requests you want to test"""
    async with log_requests() as session:
        for i in range(10):
            response = await session.get('http://httpbin.org/get')
            logger.debug(f'Response {i}: {type(response).__name__}')


if __name__ == '__main__':
    asyncio.run(main())