Skip to content

Commit

Permalink
Add migration to backfill user.pubid column
Browse files Browse the repository at this point in the history
  • Loading branch information
mtomilov committed Jan 14, 2025
1 parent 766f4d5 commit cdd4c59
Showing 1 changed file with 76 additions and 0 deletions.
76 changes: 76 additions & 0 deletions h/migrations/versions/f32200e2e496_backfill_user_pubid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Backfill user.pubid with unique values."""

import logging
import random

import sqlalchemy as sa
from alembic import op
from sqlalchemy.orm import declarative_base, sessionmaker

revision = "f32200e2e496"
down_revision = "3bae642f2b36"

logger = logging.getLogger(__name__)

USER_PUBID_LENGTH = 12
USER_BATCH_LIMIT = 1000
USER_PUBID_RETRIES = 5


Base = declarative_base()


class User(Base):
__tablename__ = "user"
id = sa.Column(sa.Integer, primary_key=True)
pubid = sa.Column(sa.String())


def generate(length):
"""
Generate a random string of the specified length.
This is the generate() function from h/pubid.py.
"""
alphabet = "123456789ABDEGJKLMNPQRVWXYZabdegijkmnopqrvwxyz"
return "".join(random.SystemRandom().choice(alphabet) for _ in range(length))


def get_unique_pubids(count, length):
return {generate(length) for _ in range(count)}


def set_user_pubids(users, pubids):
for user, pubid in zip(users, pubids):
user.pubid = pubid


def upgrade():
session = sessionmaker()(bind=op.get_bind())

users_query = (
session.query(User).filter(User.pubid.is_(None)).limit(USER_BATCH_LIMIT)
)

count = 0
while users := users_query.all():
batch_count = len(users)
for retries in range(USER_PUBID_RETRIES):
pubids = get_unique_pubids(batch_count, USER_PUBID_LENGTH)
if len(pubids) == batch_count:
break
logger.warning("Failed to generate %d unique pubids, retrying %d/%d", batch_count, retries + 1, USER_PUBID_RETRIES)
else:
raise RuntimeError(f"Failed to generate {batch_count} unique pubids")

set_user_pubids(users, pubids)
session.commit()
count += batch_count
logger.info("Back-filled %d user.pubid's", count)


def downgrade():
session = sessionmaker()(bind=op.get_bind())

session.query(User).update({User.pubid: None})
session.commit()

0 comments on commit cdd4c59

Please sign in to comment.