2023-11-16

SQLAlchemy relationship loading techniques TLDR - v2

The SQLAlchemy docs for relationship loading techniques are great and have some useful examples.

This is a TLDR for those in a hurry who can never remember the difference between a subqueryload and a selectinload .


Users have many Baskets, simple! Here's our set up code.
from __future__ import annotations

from sqlalchemy import (
    ForeignKey,
    create_engine,
    select,
)
from sqlalchemy.orm import (
    DeclarativeBase,
    Mapped,
    Session,
    mapped_column,
    relationship,
    lazyload,
    joinedload,
    subqueryload,
    selectinload,
    raiseload,
    noload,
)


class Base(DeclarativeBase):
    pass


class User(Base):
    __tablename__ = "user"
    user_id: Mapped[int] = mapped_column(primary_key=True)
    name: Mapped[str]
    baskets: Mapped[list[Basket]] = relationship(lazy="raise")


class Basket(Base):
    __tablename__ = "basket"
    basket_id: Mapped[int] = mapped_column(primary_key=True)
    user_id: Mapped[int] = mapped_column(ForeignKey(User.user_id))
    price: Mapped[int]


engine = create_engine("sqlite://", echo=True)
Base.metadata.create_all(engine)

with Session(engine) as session:
    users = [
        User(
            name="ben",
            baskets=[Basket(price=1), Basket(price=2)],
        ),
        User(
            name="si",
            baskets=[Basket(price=3), Basket(price=4)],
        ),
        User(
            name="si",
            baskets=[Basket(price=5)],
        ),
    ]
    session.add_all(users)
    session.commit()


Note from the above:
relationship(lazy="raise")
This should probably be the default as it forces you to pick a loading technique at query time.

The N+1 (where N is the number of Users) looking bit of code we're going to write at is:

qry = (
    select(User)
    .where(User.name == "si")
    .options(someloadingtechnique(User.baskets))
)

for user in session.execute(qry).unique().scalars():  # Point A
    print(f"User: {user.user_id}")

    for basket in user.baskets:  # Point B
        print(f"Basket {basket.basket_id}")

We're going to swap out someloadingtechnique with each of SQLAlchemy's and inspect the SQL.


.options(lazyload(User.baskets))  # SQLAlchemy's default on a relationship
-- At point A
SELECT ...
FROM user
WHERE user.name = ?
-- Each time at point B - classic N+1!
SELECT ...
FROM basket
WHERE ? = basket.user_id

.options(joinedload(User.baskets))  # I'm informed this is similar to Django's select_related
-- At point A, nothing at point B
SELECT ...
FROM user
LEFT OUTER JOIN basket ON user.user_id = basket.user_id
WHERE user.name = ?

.options(subqueryload(User.baskets))
-- At point A, nothing at point B
SELECT ...
FROM (
    SELECT user.user_id AS user_user_id
    FROM user
    WHERE user.name = ?
) AS anon_1
JOIN basket ON anon_1.user_user_id = basket.user_id

.options(selectinload(User.baskets))  # I'm informed this is similar to Django's prefetch_related
-- Both at point A!
SELECT ...
FROM user
WHERE user.name = ?

SELECT ...
FROM user
JOIN basket ON user.user_id = basket.user_id
WHERE user.user_id IN (?, ?) -- The ids we SELECT-ed above earlier

-- SQLAlchemy "joins" the results together in memory

.options(raiseload(User.baskets))

# At point B, raises an error, no second query performed

.options(noload(User.baskets))

# At point B, no second query performed
# user.baskets is just an empty list