|
48 | 48 | "import shutil\n",
|
49 | 49 | "import tempfile\n",
|
50 | 50 | "from collections import Counter\n",
|
| 51 | + "from datetime import datetime, timezone\n", |
| 52 | + "from pathlib import Path\n", |
51 | 53 | "\n",
|
52 |
| - "from datetime import datetime\n", |
53 |
| - "from dateutil.relativedelta import relativedelta\n", |
54 | 54 | "import numpy as np\n",
|
55 | 55 | "import pandas as pd\n",
|
| 56 | + "from dateutil.relativedelta import relativedelta\n", |
56 | 57 | "from google.colab.data_table import DataTable\n",
|
57 | 58 | "from google.colab.files import download\n",
|
58 | 59 | "from ipywidgets import widgets\n",
|
|
861 | 862 | " response = requests.get(download_url, timeout=10)\n",
|
862 | 863 | " with tempfile.NamedTemporaryFile() as gz_file:\n",
|
863 | 864 | " gz_file.write(response.content)\n",
|
864 |
| - " with gzip.open(gz_file.name) as i, open(file_name, \"wb\") as o:\n", |
| 865 | + " with gzip.open(gz_file.name) as i, Path(file_name).open(\"wb\") as o:\n", |
865 | 866 | " shutil.copyfileobj(i, o)\n",
|
866 | 867 | " return file_name"
|
867 | 868 | ]
|
|
910 | 911 | "outputs": [],
|
911 | 912 | "source": [
|
912 | 913 | "non_frozen_publications = list(filter(lambda item: not item[\"frozen\"] and item[\"date_to\"], publications))\n",
|
913 |
| - "past_year = datetime.now() - relativedelta(years=1)\n", |
| 914 | + "today = datetime.now(tz=timezone.utc)\n", |
| 915 | + "past_year = today - relativedelta(years=1)\n", |
914 | 916 | "lapsed_publications = list(\n",
|
915 |
| - " filter(lambda item: datetime.strptime(item[\"date_to\"], \"%Y-%m-%d\") < past_year, non_frozen_publications)\n", |
| 917 | + " filter(\n", |
| 918 | + " lambda item: datetime.strptime(item[\"date_to\"], \"%Y-%m-%d\").astimezone(timezone.utc) < past_year,\n", |
| 919 | + " non_frozen_publications,\n", |
| 920 | + " )\n", |
916 | 921 | ")\n",
|
917 | 922 | "lapsed_publications_table = pd.DataFrame(lapsed_publications)\n",
|
918 | 923 | "lapsed_publications_table"
|
|
940 | 945 | "active_publications = [item for item in non_frozen_publications if item not in lapsed_publications]\n",
|
941 | 946 | "for publication in active_publications:\n",
|
942 | 947 | " year = publication[\"date_to\"][:4]\n",
|
943 |
| - " if int(year) > datetime.now().year:\n", |
944 |
| - " year = datetime.now().year\n", |
| 948 | + " if int(year) > today.year:\n", |
| 949 | + " year = today.year\n", |
945 | 950 | " file_name = download_file(publication, year)\n",
|
946 | 951 | " field_table = cardinal_calculate_coverage(file_name)\n",
|
947 | 952 | " fields_list = field_table.iloc[:, 0].tolist()\n",
|
|
0 commit comments