Skip to content

Commit 024e807

Browse files
committed
Initial commit of migration qdrant
0 parents  commit 024e807

File tree

4 files changed

+259
-0
lines changed

4 files changed

+259
-0
lines changed

Diff for: README.md

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
Qdrant Config Migration Script-
2+
3+
1. In the folder create a file in the following format qdrant*migrator/migrations/<index>*<message>.py . Example qdrant_migrator/migrations/001_initial_migration.py . Now for the next version create 002
4+
2. Now these file executions are version controlled and you can see the versions in GUI ->
5+
a. Go to collections there you will find migrations with a single point, with a metadata version. As of now it is in version 1 both dev and prod.
6+
b. Once you run the next version 002, it will change.
7+
c. You can't rerun versions forward. You have to create a new file with a new script to run next version.
8+
3. There is a rollback feature also, (backward)-
9+
a. Your task will be to update the file with the backward function, for example, if you are writing something in v2, like create collection, hence it's rollback will be delete collection. Every file should consist of rollbacks also.
10+
b. There can be some non rollback files also, like I was not able to find methods to remove payload*indexes in v1. Hence my rollback function is empty. You can anytime come in and re-write the rollback method in future before rolling back.
11+
c. Ref code will be written below for file structure of qdrant_migrator/migrations/<index>*<message>.py
12+
4. How to run the code? There are 2 clis -
13+
a. python cli.py migrate --url http://your-qdrant-url --api-key your-api-key --migration-folder migrations
14+
b. python cli.py rollback --url http://your-qdrant-url --api-key your-api-key --migration-folder migrations --target-version <target_version>
15+
16+
Summing it up, steps to go forward ->
17+
18+
1. Create a version file (ref pt.1).
19+
2. Write the code for migration according to 3.c.
20+
3. Execute 4.a.
21+
4. Your Migration will be successfully completed.
22+
23+
For Backward/Rollback ->
24+
25+
1. Check its backward() function is written as expected.
26+
2. Execute 4.b.
27+
3. Your Rollback will be successfully completed.
28+
4. You can track the current version from migration collections
29+
30+
Eg- code for point 3.c.-
31+
32+
```python
33+
#Ensure each migration file has both `forward` and `backward` functions
34+
def forward(client):
35+
# Your migration logic here
36+
client.create_collection('new_collection')
37+
def backward(client):
38+
# Logic to undo the migration if needed
39+
client.delete_collection('new_collection')
40+
```
41+
42+
Why to use this?
43+
44+
Now we can manage the versions and executions of scripts that we do run over. We don't have a GUI to change configs for qdrant, we have to do it via notebook. Now the thing was, we were not able to manage it across that who and what is getting updated in the config. This script will make it easy, we don't anymore need to create notebooks and, you need API keys and URL to execute any changes on collection, adding up to access control. Plus, we can commit the config changes over org repo, so, we will be able to control the versions too.

Diff for: cli.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import click
2+
from migrator import migrate, rollback
3+
4+
5+
@click.group()
6+
def cli():
7+
pass
8+
9+
10+
@click.command()
11+
@click.option("--url", prompt="Qdrant URL", help="The URL of the Qdrant instance.")
12+
@click.option(
13+
"--api-key", prompt="API Key", help="The API key for the Qdrant instance."
14+
)
15+
@click.option(
16+
"--migration-folder",
17+
prompt="Migration Folder",
18+
help="The folder containing migration scripts.",
19+
)
20+
def migrate_cmd(url, api_key, migration_folder):
21+
migrate(url, api_key, migration_folder)
22+
23+
24+
@click.command()
25+
@click.option("--url", prompt="Qdrant URL", help="The URL of the Qdrant instance.")
26+
@click.option(
27+
"--api-key", prompt="API Key", help="The API key for the Qdrant instance."
28+
)
29+
@click.option(
30+
"--migration-folder",
31+
prompt="Migration Folder",
32+
help="The folder containing migration scripts.",
33+
)
34+
@click.option(
35+
"--target-version",
36+
prompt="Target Version",
37+
type=int,
38+
help="The target version to rollback to.",
39+
)
40+
def rollback_cmd(url, api_key, migration_folder, target_version):
41+
rollback(url, api_key, migration_folder, target_version)
42+
43+
44+
cli.add_command(migrate_cmd, name="migrate")
45+
cli.add_command(rollback_cmd, name="rollback")
46+
if __name__ == "__main__":
47+
cli()

Diff for: migrations/001_initial_migration.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from qdrant_client import models
2+
3+
4+
def forward(client):
5+
# Your migration logic here
6+
client.update_collection(
7+
collection_name="collection-name",
8+
# vectors_config=models.VectorParams(
9+
# size=3072, distance=models.Distance.COSINE, on_disk=True
10+
# ),
11+
hnsw_config=models.HnswConfigDiff(
12+
payload_m=16,
13+
m=0,
14+
on_disk=True,
15+
),
16+
quantization_config=models.BinaryQuantization(
17+
binary=models.BinaryQuantizationConfig(
18+
always_ram=False, # This mode allows to achieve the smallest memory footprint, but at the cost of the search speed.
19+
),
20+
),
21+
)
22+
indices = [
23+
"organisation_id",
24+
"document_id",
25+
"ref_doc_id",
26+
"doc_id",
27+
"provider",
28+
"category",
29+
]
30+
31+
for index in indices:
32+
client.create_payload_index(
33+
collection_name="collection-name",
34+
field_name=index,
35+
field_schema=models.PayloadSchemaType.KEYWORD,
36+
)
37+
38+
39+
def backward(client):
40+
# Logic to undo the migration if needed
41+
# client.delete_collection("new_collection")
42+
print("Your code to rollback the migration here")

Diff for: migrator.py

+126
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import os
2+
import importlib.util
3+
from qdrant_client import QdrantClient, models
4+
import logging
5+
6+
logging.basicConfig(level=logging.INFO)
7+
logger = logging.getLogger(__name__)
8+
logging.getLogger("httpx").setLevel(logging.WARNING)
9+
10+
11+
def initialize_qdrant_client(url, api_key):
12+
client = QdrantClient(url=url, api_key=api_key)
13+
return client
14+
15+
16+
def check_and_create_migrations_collection(client):
17+
collections_response = client.get_collections()
18+
collections = collections_response.collections
19+
collection_names = [collection.name for collection in collections]
20+
if "migrations" not in collection_names:
21+
client.create_collection(
22+
collection_name="migrations",
23+
vectors_config=models.VectorParams(size=2, distance=models.Distance.COSINE),
24+
)
25+
client.upsert(
26+
collection_name="migrations",
27+
points=[
28+
models.PointStruct(
29+
id="5c56c793-69f3-4fbf-87e6-c4bf54c28c26",
30+
payload={
31+
"version": 0,
32+
},
33+
vector=[0.0, 0.1],
34+
),
35+
],
36+
)
37+
38+
39+
def get_current_version(client):
40+
points = client.scroll(
41+
"migrations",
42+
with_payload=True,
43+
)
44+
if points:
45+
version = points[0][0].payload["version"]
46+
return version
47+
return 0
48+
49+
50+
def set_current_version(client, version):
51+
client.upsert(
52+
collection_name="migrations",
53+
points=[
54+
models.PointStruct(
55+
id="5c56c793-69f3-4fbf-87e6-c4bf54c28c26",
56+
payload={
57+
"version": version,
58+
},
59+
vector=[0.0, 0.1],
60+
),
61+
],
62+
)
63+
64+
65+
def get_migration_files(migration_folder):
66+
files = []
67+
for file in os.listdir(migration_folder):
68+
if file.endswith(".py"):
69+
index = int(file.split("_")[0])
70+
files.append((index, file))
71+
files.sort(key=lambda x: x[0])
72+
return files
73+
74+
75+
def run_migrations(client, migration_folder, current_version, target_version=None):
76+
migration_files = get_migration_files(migration_folder)
77+
for index, file in migration_files:
78+
if index > current_version and (
79+
target_version is None or index <= target_version
80+
):
81+
module_name = file.replace(".py", "")
82+
spec = importlib.util.spec_from_file_location(
83+
module_name, os.path.join(migration_folder, file)
84+
)
85+
module = importlib.util.module_from_spec(spec)
86+
spec.loader.exec_module(module)
87+
module.forward(client)
88+
set_current_version(client, index)
89+
logger.info(
90+
f"Migration completed successfully for {file}! Enjoy your migration :D"
91+
)
92+
else:
93+
logger.info(f"Skipping migration {file}")
94+
95+
96+
def rollback_migrations(client, migration_folder, current_version, target_version):
97+
migration_files = get_migration_files(migration_folder)
98+
for index, file in reversed(migration_files):
99+
if index <= current_version and index > target_version:
100+
module_name = file.replace(".py", "")
101+
spec = importlib.util.spec_from_file_location(
102+
module_name, os.path.join(migration_folder, file)
103+
)
104+
module = importlib.util.module_from_spec(spec)
105+
spec.loader.exec_module(module)
106+
module.backward(client)
107+
set_current_version(client, index - 1)
108+
logger.info(
109+
f"Rollback completed successfully for {file}! Enjoy your migration :D"
110+
)
111+
else:
112+
logger.info(f"Skipping rollback {file}")
113+
114+
115+
def migrate(url, api_key, migration_folder):
116+
client = initialize_qdrant_client(url, api_key)
117+
check_and_create_migrations_collection(client)
118+
current_version = get_current_version(client)
119+
run_migrations(client, migration_folder, current_version)
120+
121+
122+
def rollback(url, api_key, migration_folder, target_version):
123+
client = initialize_qdrant_client(url, api_key)
124+
check_and_create_migrations_collection(client)
125+
current_version = get_current_version(client)
126+
rollback_migrations(client, migration_folder, current_version, target_version)

0 commit comments

Comments
 (0)