Fixed bugs and updated version

vadyushkins · vadyushkins · commit 39073f3a9c9d · 2021-05-29T14:37:01.000+03:00
diff --git a/cfpq_data/__init__.py b/cfpq_data/__init__.py
@@ -7,7 +7,7 @@
 experimental analysis of context-free path querying algorithms
 """
 
-__version__ = "1.0.1-dev"
+__version__ = "1.0.1"
 
 import cfpq_data.config
 from cfpq_data.config import *
diff --git a/cfpq_data/graphs/readwrite/rdf.py b/cfpq_data/graphs/readwrite/rdf.py
@@ -1,6 +1,7 @@
 """Read (and write) a graph
 from (and to) RDF file.
 """
+import re
 from os import path, remove
 from pathlib import Path
 from shutil import unpack_archive
@@ -26,9 +27,6 @@
     "graph_to_rdf",
 ]
 
-if "dev" in VERSION:
-    VERSION = "dev"
-
 
 def graph_from_dataset(graph_name: str, verbose: bool = True) -> MultiDiGraph:
     """Returns a graph from
@@ -65,6 +63,17 @@ def graph_from_dataset(graph_name: str, verbose: bool = True) -> MultiDiGraph:
             graph_file_path = str(dst / graph_file)
 
             if not path.isfile(graph_file_path):
+
+                DATASET_VERSION = VERSION
+
+                if re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION) is not None:
+                    DATASET_VERSION = (
+                        str(
+                            re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION).group(1)
+                        )
+                        + ".0.0"
+                    )
+
                 graph_archive = (
                     graph_file + DATASET[graph_class][graph_name]["ArchiveExtension"]
                 )
@@ -86,7 +95,7 @@ def _inner(bytes_amount):
 
                     file_size_in_bytes = s3.head_object(
                         Bucket=BUCKET_NAME,
-                        Key=f"{VERSION}/{graph_class}/{graph_archive}",
+                        Key=f"{DATASET_VERSION}/{graph_class}/{graph_archive}",
                     )["ContentLength"]
 
                     with tqdm(
@@ -97,14 +106,14 @@ def _inner(bytes_amount):
                     ) as t:
                         s3.download_file(
                             Bucket=BUCKET_NAME,
-                            Key=f"{VERSION}/{graph_class}/{graph_archive}",
+                            Key=f"{DATASET_VERSION}/{graph_class}/{graph_archive}",
                             Filename=graph_archive_path,
                             Callback=_hook(t),
                         )
                 else:
                     s3.download_file(
                         Bucket=BUCKET_NAME,
-                        Key=f"{VERSION}/{graph_class}/{graph_archive}",
+                        Key=f"{DATASET_VERSION}/{graph_class}/{graph_archive}",
                         Filename=graph_archive_path,
                     )
 
diff --git a/utils/fetch_dataset.py b/utils/fetch_dataset.py
@@ -1,3 +1,4 @@
+import re
 from collections import defaultdict
 from json import dumps
 
@@ -7,11 +8,15 @@
 from cfpq_data.config import AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, BUCKET_NAME
 from config import MAIN_FOLDER
 
-if "dev" in VERSION:
-    VERSION = "dev"
-
 
 def fetch_dataset():
+    DATASET_VERSION = VERSION
+
+    if re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION) is not None:
+        DATASET_VERSION = (
+            str(re.match(r"^(\d+)\.(\d+)\.(\d+)$", DATASET_VERSION).group(1)) + ".0.0"
+        )
+
     s3 = client(
         "s3",
         aws_access_key_id=AWS_ACCESS_KEY_ID,
@@ -20,7 +25,9 @@ def fetch_dataset():
 
     dataset = defaultdict(dict)
 
-    for graph in s3.list_objects(Bucket="cfpq-data", Prefix=VERSION)["Contents"]:
+    for graph in s3.list_objects(Bucket="cfpq-data", Prefix=DATASET_VERSION)[
+        "Contents"
+    ]:
         graph_key = graph["Key"]
         graph_class, graph_full_name = graph_key.split("/")[1:]
         graph_name = graph_full_name.split(".")[0]