2023-11-16
The SQLAlchemy docs for relationship loading techniques are great and have some useful examples.
This is a TLDR for those in a hurry who can never remember the difference between a subqueryload
and a selectinload
.
from __future__ import annotations
from sqlalchemy import (
ForeignKey,
create_engine,
select,
)
from sqlalchemy.orm import (
DeclarativeBase,
Mapped,
Session,
mapped_column,
relationship,
lazyload,
joinedload,
subqueryload,
selectinload,
raiseload,
noload,
)
class Base(DeclarativeBase):
pass
class User(Base):
__tablename__ = "user"
user_id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str]
baskets: Mapped[list[Basket]] = relationship(lazy="raise")
class Basket(Base):
__tablename__ = "basket"
basket_id: Mapped[int] = mapped_column(primary_key=True)
user_id: Mapped[int] = mapped_column(ForeignKey(User.user_id))
price: Mapped[int]
engine = create_engine("sqlite://", echo=True)
Base.metadata.create_all(engine)
with Session(engine) as session:
users = [
User(
name="ben",
baskets=[Basket(price=1), Basket(price=2)],
),
User(
name="si",
baskets=[Basket(price=3), Basket(price=4)],
),
User(
name="si",
baskets=[Basket(price=5)],
),
]
session.add_all(users)
session.commit()
relationship(lazy="raise")
This should probably be the default as it forces you to pick a loading technique at query time.
The N+1 (where N is the number of Users) looking bit of code we're going to write at is:
qry = (
select(User)
.where(User.name == "si")
.options(someloadingtechnique(User.baskets))
)
for user in session.execute(qry).unique().scalars(): # Point A
print(f"User: {user.user_id}")
for basket in user.baskets: # Point B
print(f"Basket {basket.basket_id}")
We're going to swap out someloadingtechnique
with each of SQLAlchemy's and inspect the SQL.
.options(lazyload(User.baskets)) # SQLAlchemy's default on a relationship
-- At point A
SELECT ...
FROM user
WHERE user.name = ?
-- Each time at point B - classic N+1!
SELECT ...
FROM basket
WHERE ? = basket.user_id
.options(joinedload(User.baskets)) # I'm informed this is similar to Django's select_related
-- At point A, nothing at point B
SELECT ...
FROM user
LEFT OUTER JOIN basket ON user.user_id = basket.user_id
WHERE user.name = ?
.options(subqueryload(User.baskets))
-- At point A, nothing at point B
SELECT ...
FROM (
SELECT user.user_id AS user_user_id
FROM user
WHERE user.name = ?
) AS anon_1
JOIN basket ON anon_1.user_user_id = basket.user_id
.options(selectinload(User.baskets)) # I'm informed this is similar to Django's prefetch_related
-- Both at point A!
SELECT ...
FROM user
WHERE user.name = ?
SELECT ...
FROM user
JOIN basket ON user.user_id = basket.user_id
WHERE user.user_id IN (?, ?) -- The ids we SELECT-ed above earlier
-- SQLAlchemy "joins" the results together in memory
.options(raiseload(User.baskets))
# At point B, raises an error, no second query performed
.options(noload(User.baskets))
# At point B, no second query performed
# user.baskets is just an empty list