From 9d98a6b5045b25ee51e39428231060752dc960a1 Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Mon, 10 May 2021 10:54:46 +0200 Subject: [PATCH 1/7] ripe-import: optionally import RIPE route data The ripe importer can now optionally import RIPE route data. In the context of IntelMQ, this information is useful to e.g. map IP addresses to ASNs which in turn can be mapped to contact information for notifications. This commit contains the changes for main part of this: * The download script also downloads the ripe.db.route.gz and ripe.db.route6.gz files * The database schema has a new table and index for the route information * The import code optionally loads these files and saves it into the database Notably missing is the database upgrade. Related to #15 --- bin/ripe_download | 2 +- intelmq_certbund_contact/ripe/ripe_data.py | 22 ++++++++++++-- intelmq_certbund_contact/ripe/ripe_import.py | 30 ++++++++++++++++++-- sql/initdb.sql | 17 +++++++++++ 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/bin/ripe_download b/bin/ripe_download index 5536379..d43a551 100755 --- a/bin/ripe_download +++ b/bin/ripe_download @@ -5,7 +5,7 @@ set -e d=`date +%F` mkdir $d cd $d -for db in ripe.db.organisation.gz ripe.db.role.gz ripe.db.aut-num.gz ripe.db.inet6num.gz ripe.db.inetnum.gz +for db in ripe.db.organisation.gz ripe.db.role.gz ripe.db.aut-num.gz ripe.db.inet6num.gz ripe.db.inetnum.gz ripe.db.route.gz ripe.db.route6.gz do echo "Downloading: " $db curl -O "https://ftp.ripe.net/ripe/dbase/split/$db" diff --git a/intelmq_certbund_contact/ripe/ripe_data.py b/intelmq_certbund_contact/ripe/ripe_data.py index 41f1683..1324f4b 100644 --- a/intelmq_certbund_contact/ripe/ripe_data.py +++ b/intelmq_certbund_contact/ripe/ripe_data.py @@ -55,6 +55,14 @@ def add_common_args(parser): parser.add_argument("--inet6num-file", default='ripe.db.inet6num.gz', help=("Specify the inet6num data file.")) + parser.add_argument("--route-file", + default='ripe.db.route.gz', + help=("Specify the route data file.")) + parser.add_argument("--route6-file", + default='ripe.db.route6.gz', + help=("Specify the route6 data file.")) + parser.add_argument("--import-route-data", action='store_true', + help=("Whether to import/diff the route data.")) parser.add_argument("--ripe-delegated-file", default='delegated-ripencc-latest', help=("Name of the delegated-ripencc-latest file to" @@ -81,7 +89,7 @@ def load_ripe_files(options) -> tuple: Returns: tuple of (asn_list, organisation_list, role_list, abusec_to_org, - inetnum_list, inet6num_list) + inetnum_list, inet6num_list, route_list, route6_list) """ # Step 1: read all files @@ -123,6 +131,16 @@ def restrict_country(record): verbose=options.verbose) role_index = build_index(role_list, 'nic-hdl') + route_list = [] + route6_list = [] + if options.import_route_data: + route_list = parse_file(options.route_file, + ('route', 'origin'), + verbose=options.verbose) + route6_list = parse_file(options.route6_file, + ('route6', 'origin'), + verbose=options.verbose) + # Step 2: Prepare new data for insertion (asn_list, asn_list_u, organisation_list, organisation_index) \ @@ -166,7 +184,7 @@ def restrict_country(record): return (asn_list, organisation_list, role_list, abusec_to_org, - inetnum_list, inet6num_list) + inetnum_list, inet6num_list, route_list, route6_list) def read_delegated_file(filename, country, verbose=False): diff --git a/intelmq_certbund_contact/ripe/ripe_import.py b/intelmq_certbund_contact/ripe/ripe_import.py index c232b31..dc1a1e1 100755 --- a/intelmq_certbund_contact/ripe/ripe_import.py +++ b/intelmq_certbund_contact/ripe/ripe_import.py @@ -37,7 +37,7 @@ SOURCE_NAME = 'ripe' -def remove_old_entries(cur, verbose): +def remove_old_entries(cur, verbose, delete_route_data=False): """Remove the entries imported by previous runs.""" if verbose: print('** Removing old entries from database...') @@ -51,6 +51,8 @@ def remove_old_entries(cur, verbose): (SOURCE_NAME,)) cur.execute("DELETE FROM organisation_automatic WHERE import_source = %s;", (SOURCE_NAME,)) + cur.execute("DELETE FROM route_automatic WHERE import_source = %s;", + (SOURCE_NAME,)) def insert_new_network_entries(cur, network_list, key, verbose): @@ -186,6 +188,21 @@ def insert_new_contact_entries(cur, role_list, abusec_to_org, mapping, verbose): (email, mapping[orh]['org_id'], SOURCE_NAME)) +def insert_new_routes(cur, route_list, key, verbose): + if verbose: + print('** Saving {} data to database...'.format(key)) + + for entry in route_list: + # 'origin' is the ASN. Some values contain what appears to be + # comments (e.g. "origin: # AS1234 # FOO") them which we need to + # strip. + asn = entry['origin'][0].split()[0][2:] + cur.execute("""INSERT INTO route_automatic + (address, asn, import_source, import_time) + VALUES (%s, %s, %s, CURRENT_TIMESTAMP)""", + (entry[key][0], asn, SOURCE_NAME)) + + def main(): parser = argparse.ArgumentParser( description="" @@ -204,14 +221,14 @@ def main(): print('------------------------') (asn_list, organisation_list, role_list, abusec_to_org, inetnum_list, - inet6num_list) = ripe_data.load_ripe_files(args) + inet6num_list, route_list, route6_list) = ripe_data.load_ripe_files(args) con = None try: con = psycopg2.connect(dsn=args.conninfo) cur = con.cursor() - remove_old_entries(cur, args.verbose) + remove_old_entries(cur, args.verbose, args.import_route_data) # network addresses org_inet6_mapping = insert_new_network_entries( @@ -235,6 +252,13 @@ def main(): insert_new_contact_entries(cur, role_list, abusec_to_org, mapping, args.verbose) + # + # Routing + # + if args.import_route_data: + insert_new_routes(cur, route_list, 'route', args.verbose) + insert_new_routes(cur, route6_list, 'route6', args.verbose) + # Commit all data con.commit() except psycopg2.DatabaseError as e: diff --git a/sql/initdb.sql b/sql/initdb.sql index c8d7018..34e4145 100644 --- a/sql/initdb.sql +++ b/sql/initdb.sql @@ -246,6 +246,23 @@ CREATE INDEX fqdn_annotation_fqdn_idx ON fqdn_annotation (fqdn_id); +-- Routing information, useful as a mapping from network addresses to +-- ASNs. +CREATE TABLE route_automatic ( + route_automatic_id SERIAL PRIMARY KEY, + address CIDR NOT NULL, + asn BIGINT NOT NULL, + LIKE automatic_templ INCLUDING ALL, + + -- The data from ripe.db.route.gz and ripe.db.route6.gz has cases + -- where the same network address is associated with multiple ASNs, + -- so we cannot have a constraint on just (address, import_source). + UNIQUE (address, asn, import_source) +); + +CREATE INDEX route_automatic_cidr_gist_idx ON route_automatic + USING gist (address inet_ops); + -- Information about national CERTs From 8d4c4f6d023401dade2fcd312147642dd481f144 Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Thu, 20 May 2021 18:41:44 +0200 Subject: [PATCH 2/7] ripe-import: Add DB update script for route_automatic This adds an update script for the new route_automatic table. The name script file does not include a version number like the other update scripts because it's not clear yet which version this file will be included in. Related to #15 --- sql/update-route.sql | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 sql/update-route.sql diff --git a/sql/update-route.sql b/sql/update-route.sql new file mode 100644 index 0000000..ce064b6 --- /dev/null +++ b/sql/update-route.sql @@ -0,0 +1,18 @@ +-- Update script for the route_automatic table. + +CREATE TABLE route_automatic ( + route_automatic_id SERIAL PRIMARY KEY, + address CIDR NOT NULL, + asn BIGINT NOT NULL, + import_source VARCHAR(500) NOT NULL, + import_time TIMESTAMP NOT NULL, + + -- explicitly name the constraint to make sure it has the same name + -- as the constraint created by initdb.sql. + CONSTRAINT automatic_templ_import_source_check CHECK (import_source <> ''), + + UNIQUE (address, asn, import_source) +); + +CREATE INDEX route_automatic_cidr_gist_idx ON route_automatic + USING gist (address inet_ops); From 4f5b6650f4b15fa21aeaf81abfc992e1bb15b1f0 Mon Sep 17 00:00:00 2001 From: Bernhard Herzog Date: Mon, 14 Jun 2021 10:43:25 +0200 Subject: [PATCH 3/7] ripe-import: Add --before-commit-command option The RIPE importer has a new command line option, --before-commit-command, whose argument is an SQL statement that is executed by the importer before the transaction that updates the RIPE data is committed. This command can be used to e.g. cleanup data that depends on the RIPE data, but which cannot be automatically updated with 'ON DELETE CASCADE' for foreign keys or similar mechanisms. --- intelmq_certbund_contact/ripe/ripe_import.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/intelmq_certbund_contact/ripe/ripe_import.py b/intelmq_certbund_contact/ripe/ripe_import.py index dc1a1e1..e82845b 100755 --- a/intelmq_certbund_contact/ripe/ripe_import.py +++ b/intelmq_certbund_contact/ripe/ripe_import.py @@ -214,6 +214,12 @@ def main(): ripe_data.add_db_args(parser) ripe_data.add_common_args(parser) + parser.add_argument("--before-commit-command", + help=("SQL statement that is executed before committing" + " the changes. This can be used to e.g. cleanup" + " data that refers to the potentially changed" + " RIPE data.")) + args = parser.parse_args() if args.verbose: @@ -259,6 +265,13 @@ def main(): insert_new_routes(cur, route_list, 'route', args.verbose) insert_new_routes(cur, route6_list, 'route6', args.verbose) + # run "before commit command" + if args.before_commit_command: + if args.verbose: + print('Running before commit command...') + print('------------------------') + cur.execute(args.before_commit_command) + # Commit all data con.commit() except psycopg2.DatabaseError as e: From 457106440e01ddfbff67a010f85cd5c32193c4a2 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 27 Feb 2024 09:09:09 +0100 Subject: [PATCH 4/7] Bulk insert RIPE data --- README.md | 3 + intelmq_certbund_contact/ripe/ripe_import.py | 79 +++++++++++++------- 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 2a14f59..0292e52 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +This is a fork to support uploading RIPE data into [Tuency](https://gitlab.com/intevation/tuency/tuency) +___ + # Two expert bots to lookup contact information in a database and apply notification rules Part of the [intelmq-cb-mailgen solution](https://github.com/Intevation/intelmq-mailgen-release). diff --git a/intelmq_certbund_contact/ripe/ripe_import.py b/intelmq_certbund_contact/ripe/ripe_import.py index e82845b..d7707c2 100755 --- a/intelmq_certbund_contact/ripe/ripe_import.py +++ b/intelmq_certbund_contact/ripe/ripe_import.py @@ -28,13 +28,16 @@ import sys import psycopg2 +import psycopg2.extras import argparse import collections +from datetime import datetime, UTC import intelmq_certbund_contact.ripe.ripe_data as ripe_data SOURCE_NAME = 'ripe' +BULK_PAGE_SIZE = 500 def remove_old_entries(cur, verbose, delete_route_data=False): @@ -127,25 +130,30 @@ def insert_new_organisations(cur, organisation_list, verbose): return mapping - -def insert_new_asn_org_entries(cur, asn_list, mapping): - # many-to-many table organisation <-> as number +def _generate_asn_entries(asn_list, mapping): + insert_time = datetime.now(tz=UTC) for entry in asn_list: org_id = mapping[entry["org"][0]].get("org_id") if org_id is None: print("org_id None for AS organisation handle {!r}" .format(entry["org"][0])) continue + yield (org_id, entry['aut-num'][0][2:], SOURCE_NAME, insert_time) - cur.execute("""INSERT INTO organisation_to_asn_automatic - (organisation_automatic_id, asn, - import_source, import_time) - VALUES (%s, %s, %s, CURRENT_TIMESTAMP);""", - (org_id, entry['aut-num'][0][2:], SOURCE_NAME)) - - -def insert_new_network_org_entries(cur, org_net_mapping, mapping): - # many-to-many table organisation <-> network number +def insert_new_asn_org_entries(cur, asn_list, mapping): + # many-to-many table organisation <-> as number + psycopg2.extras.execute_values( + cur, + """INSERT INTO organisation_to_asn_automatic + (organisation_automatic_id, asn, + import_source, import_time) + VALUES %s;""", + _generate_asn_entries(asn_list, mapping), + page_size=BULK_PAGE_SIZE, + ) + +def _generate_network_entries(org_net_mapping, mapping): + insert_time = datetime.now(tz=UTC) for org, networks in org_net_mapping.items(): org_id = mapping[org].get("org_id") if org_id is None: @@ -153,12 +161,20 @@ def insert_new_network_org_entries(cur, org_net_mapping, mapping): continue for network_id in networks: - cur.execute("""INSERT INTO organisation_to_network_automatic - (organisation_automatic_id, - network_automatic_id, - import_source, import_time) - VALUES (%s, %s, %s, CURRENT_TIMESTAMP);""", - (org_id, network_id, SOURCE_NAME)) + yield (org_id, network_id, SOURCE_NAME, insert_time) + +def insert_new_network_org_entries(cur, org_net_mapping, mapping): + # many-to-many table organisation <-> network number + psycopg2.extras.execute_values( + cur, + """INSERT INTO organisation_to_network_automatic + (organisation_automatic_id, + network_automatic_id, + import_source, import_time) + VALUES %s;""", + _generate_network_entries(org_net_mapping, mapping), + page_size=BULK_PAGE_SIZE, + ) def insert_new_contact_entries(cur, role_list, abusec_to_org, mapping, verbose): @@ -192,15 +208,24 @@ def insert_new_routes(cur, route_list, key, verbose): if verbose: print('** Saving {} data to database...'.format(key)) - for entry in route_list: - # 'origin' is the ASN. Some values contain what appears to be - # comments (e.g. "origin: # AS1234 # FOO") them which we need to - # strip. - asn = entry['origin'][0].split()[0][2:] - cur.execute("""INSERT INTO route_automatic - (address, asn, import_source, import_time) - VALUES (%s, %s, %s, CURRENT_TIMESTAMP)""", - (entry[key][0], asn, SOURCE_NAME)) + insert_time = datetime.now(tz=UTC) + + def _gen(): + for entry in route_list: + # 'origin' is the ASN. Some values contain what appears to be + # comments (e.g. "origin: # AS1234 # FOO") them which we need to + # strip. + asn = entry['origin'][0].split()[0][2:] + yield (entry[key][0], asn, SOURCE_NAME, insert_time) + + psycopg2.extras.execute_values( + cur, + """INSERT INTO route_automatic + address, asn, import_source, import_time) + VALUES %s;""", + _gen(), + page_size=BULK_PAGE_SIZE, + ) def main(): From 4c1d0b489049c739daa8cf60597718fd31e73235 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 27 Feb 2024 09:13:58 +0100 Subject: [PATCH 5/7] Compatibility with Python 3.9 --- intelmq_certbund_contact/ripe/ripe_import.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/intelmq_certbund_contact/ripe/ripe_import.py b/intelmq_certbund_contact/ripe/ripe_import.py index d7707c2..3f09bab 100755 --- a/intelmq_certbund_contact/ripe/ripe_import.py +++ b/intelmq_certbund_contact/ripe/ripe_import.py @@ -31,7 +31,7 @@ import psycopg2.extras import argparse import collections -from datetime import datetime, UTC +from datetime import datetime, timezone import intelmq_certbund_contact.ripe.ripe_data as ripe_data @@ -131,7 +131,7 @@ def insert_new_organisations(cur, organisation_list, verbose): return mapping def _generate_asn_entries(asn_list, mapping): - insert_time = datetime.now(tz=UTC) + insert_time = datetime.now(tz=timezone.utc) for entry in asn_list: org_id = mapping[entry["org"][0]].get("org_id") if org_id is None: @@ -153,7 +153,7 @@ def insert_new_asn_org_entries(cur, asn_list, mapping): ) def _generate_network_entries(org_net_mapping, mapping): - insert_time = datetime.now(tz=UTC) + insert_time = datetime.now(tz=timezone.utc) for org, networks in org_net_mapping.items(): org_id = mapping[org].get("org_id") if org_id is None: @@ -208,7 +208,7 @@ def insert_new_routes(cur, route_list, key, verbose): if verbose: print('** Saving {} data to database...'.format(key)) - insert_time = datetime.now(tz=UTC) + insert_time = datetime.now(tz=timezone.utc) def _gen(): for entry in route_list: From a76ce294f08d81e24dc3aeeec078de890487db0d Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Tue, 27 Feb 2024 09:31:39 +0100 Subject: [PATCH 6/7] Fix syntax --- intelmq_certbund_contact/ripe/ripe_import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/intelmq_certbund_contact/ripe/ripe_import.py b/intelmq_certbund_contact/ripe/ripe_import.py index 3f09bab..c5671f8 100755 --- a/intelmq_certbund_contact/ripe/ripe_import.py +++ b/intelmq_certbund_contact/ripe/ripe_import.py @@ -221,7 +221,7 @@ def _gen(): psycopg2.extras.execute_values( cur, """INSERT INTO route_automatic - address, asn, import_source, import_time) + (address, asn, import_source, import_time) VALUES %s;""", _gen(), page_size=BULK_PAGE_SIZE, From 1a2a6f2d3d74703157af2c97bcb72fa966ff2116 Mon Sep 17 00:00:00 2001 From: Kamil Mankowski Date: Thu, 7 Mar 2024 13:41:18 +0100 Subject: [PATCH 7/7] Remove not needed header --- README.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.md b/README.md index 0292e52..2a14f59 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,3 @@ -This is a fork to support uploading RIPE data into [Tuency](https://gitlab.com/intevation/tuency/tuency) -___ - # Two expert bots to lookup contact information in a database and apply notification rules Part of the [intelmq-cb-mailgen solution](https://github.com/Intevation/intelmq-mailgen-release).