diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 7a7d7f1b29..7898f04fa6 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -125,6 +125,7 @@ steps: - ./scion.sh topology -c topology/default.topo - ./scion.sh run - tools/await-connectivity + - sleep 5 - ./bin/scion_integration || ( echo "^^^ +++" && false ) - ./bin/end2end_integration || ( echo "^^^ +++" && false ) plugins: &scion-run-hooks @@ -141,7 +142,7 @@ steps: pre-exit: .buildkite/cleanup-leftovers.sh artifact_paths: &scion-run-artifact-paths - test-out.tar.gz - timeout_in_minutes: 15 + timeout_in_minutes: 20 key: e2e_integration_tests_v2 retry: *automatic-retry - label: "E2E: failing links :man_in_business_suit_levitating:" @@ -152,6 +153,7 @@ steps: - ./scion.sh topology -c topology/default-no-peers.topo - ./scion.sh run - tools/await-connectivity + - sleep 5 - ./bin/end2end_integration || ( echo "^^^ +++" && false ) - ./tools/integration/revocation_test.sh plugins: *scion-run-hooks @@ -167,6 +169,7 @@ steps: - ./scion.sh topology -d - ./scion.sh run - tools/await-connectivity + - sleep 5 - echo "--- run tests" - ./bin/end2end_integration -d || ( echo "^^^ +++" && false ) plugins: *scion-run-hooks diff --git a/.golangci.yml b/.golangci.yml index d3a2fc784f..8574907cd0 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -41,24 +41,29 @@ linters: msg: spell trust root certificate as trc / TRC goheader: values: - regexp: - copyright-lines: |- - (Copyright 20[0-9][0-9] .*)( - Copyright 20[0-9][0-9] .*)* - template: |- - {{copyright-lines}} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at + const: + LICENSE_TEXT: |- + Licensed under the Apache License, Version 2.0 \(the "License"\); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + SPDX_LINE: |- + SPDX-License-Identifier: Apache-2.0 + regexp: + copyright_lines: |- + (Copyright 20[0-9][0-9] .*) + (Copyright 20[0-9][0-9] .*)* + license: |- + ({{SPDX_LINE}})|({{LICENSE_TEXT}}) + template: |- + {{copyright_lines}}{{license}} lll: line-length: 100 tab-width: 4 diff --git a/MODULE.bazel.lock b/MODULE.bazel.lock index aa77e411a6..f53807ed73 100644 --- a/MODULE.bazel.lock +++ b/MODULE.bazel.lock @@ -241,7 +241,7 @@ "moduleExtensions": { "//:antlr.bzl%antlr": { "general": { - "bzlTransitiveDigest": "RES7NpV12lXIUZ53wkYAoJNKF9F09XklX2+arTCQ4nI=", + "bzlTransitiveDigest": "5XvXGtK4xIbJd9TfgGbSNCdMpU+eMrPzz404GgDb/D4=", "usagesDigest": "MYi7gCA2Rcv9xbxcCUXgWRLbup3cc3pAjZrw7P3G8M4=", "recordedFileInputs": {}, "recordedDirentsInputs": {}, diff --git a/Makefile b/Makefile index 34b9bb750b..962de9b9cb 100644 --- a/Makefile +++ b/Makefile @@ -6,11 +6,13 @@ build-dev: tar -kxf bazel-bin/scion.tar -C bin tar -kxf bazel-bin/scion-ci.tar -C bin tar -kxf bazel-bin/scion-topo.tar -C bin + sudo setcap "cap_bpf=ep cap_net_admin=ep cap_net_raw=ep" bin/router build: rm -f bin/* bazel build //:scion tar -kxf bazel-bin/scion.tar -C bin + sudo setcap "cap_bpf=ep cap_net_admin=ep cap_net_raw=ep" bin/router # BFLAGS is optional. It may contain additional command line flags for CI builds. Currently this is: # "--file_name_version=$(tools/git-version)" to include the git version in the artifacts names. diff --git a/acceptance/common/raw.bzl b/acceptance/common/raw.bzl index 7eb6718a53..d07a6dc7da 100644 --- a/acceptance/common/raw.bzl +++ b/acceptance/common/raw.bzl @@ -51,7 +51,7 @@ def raw_test( py_binary( name = "%s_teardown" % name, srcs = [src], - args = ["teardown"], + args = ["teardown"] + args, main = src, deps = [":%s_lib" % name], imports = imports, diff --git a/acceptance/common/topogen.bzl b/acceptance/common/topogen.bzl index bebb575a2a..d90e127f00 100644 --- a/acceptance/common/topogen.bzl +++ b/acceptance/common/topogen.bzl @@ -108,7 +108,7 @@ def topogen_test( py_test( name = name, - size = "large", + size = "medium", srcs = [src], main = src, args = args + common_args, diff --git a/acceptance/hidden_paths/test.py b/acceptance/hidden_paths/test.py index 3056096671..1932900461 100755 --- a/acceptance/hidden_paths/test.py +++ b/acceptance/hidden_paths/test.py @@ -4,6 +4,7 @@ import http.server import threading +import time from acceptance.common import base from acceptance.common import scion @@ -110,11 +111,11 @@ def setup_start(self): server_thread = threading.Thread(target=configuration_server, args=[server]) server_thread.start() self._server = server - super().setup_start() - self.await_connectivity() - self._server.shutdown() # by now configuration must have been downloaded everywhere + self.await_connectivity() # <- not very reliable + time.sleep(10) # <- ...so + self._server.shutdown() # by now configuration must have been downloaded everywhere def _run(self): # Group 3 @@ -141,7 +142,7 @@ def _showpaths_bidirectional(self, source: str, destination: str): def _showpaths_run(self, source_as: str, destination_as: str): print(self.execute_tester(ISD_AS(self._ases[source_as]), - "scion", "sp", self._ases[destination_as], "--timeout", "2s")) + "scion", "sp", self._ases[destination_as], "--timeout", "3s")) def configuration_server(server): diff --git a/acceptance/router_benchmark/BUILD.bazel b/acceptance/router_benchmark/BUILD.bazel index 21f3f853da..b44e0b9a5a 100644 --- a/acceptance/router_benchmark/BUILD.bazel +++ b/acceptance/router_benchmark/BUILD.bazel @@ -48,8 +48,6 @@ py_library( srcs = ["benchmarklib.py"], ) -# To ensure that the linter runs over this. Cannot actually be run with -# bazel run; it is meant to be executed from the command line. py_binary( name = "benchmark", srcs = ["benchmark.py"], @@ -57,9 +55,7 @@ py_binary( "--brload", "$(location //acceptance/router_benchmark/brload:brload)", ], - data = [ - "//acceptance/router_benchmark/brload", - ], + data = data, imports = ["."], visibility = ["//visibility:public"], deps = [ diff --git a/acceptance/router_benchmark/benchmark.py b/acceptance/router_benchmark/benchmark.py index 04e77807b5..1859c97e8f 100755 --- a/acceptance/router_benchmark/benchmark.py +++ b/acceptance/router_benchmark/benchmark.py @@ -25,16 +25,21 @@ from benchmarklib import Intf, RouterBM from collections import namedtuple +from plumbum import BG from plumbum import cli from plumbum import cmd from plumbum import local from plumbum.cmd import docker from plumbum.machines import LocalCommand -from random import randint + from urllib.request import urlopen logger = logging.getLogger(__name__) +# Router profiling ON or OFF? +PROFILING_TRACE = False +PROFILING_CPU = False + TEST_CASES = [ "in", "out", @@ -82,8 +87,7 @@ class RouterBMTool(cli.Application, RouterBM): mx_interface: str = None to_flush: list[str] = [] scrape_addr: str = None - - log_level = cli.SwitchAttr(["l", "loglevel"], str, default='warning', help="Logging level") + log_level: str = cli.SwitchAttr(["l", "loglevel"], str, default='warning', help="Logging level") doit = cli.Flag(["r", "run"], help="Run the benchmark, as opposed to seeing the instructions") @@ -95,23 +99,37 @@ class RouterBMTool(cli.Application, RouterBM): help="The coremark score of the subject machine") mmbm = cli.SwitchAttr(["m", "mmbm"], int, default=0, help="The mmbm score of the subject machine") - packet_size = cli.SwitchAttr(["s", "size"], int, default=172, + packet_size = cli.SwitchAttr(["s", "size"], int, default=1500, help="Test packet size (includes all headers - floored at 154)") brload_path = cli.SwitchAttr(["b", "brload"], str, default="bin/brload", help="Relative path to the brload tool") - + intern_overrides = cli.SwitchAttr(["i", "intern-addr-override"], str, list=True, default=[], + help="Override args") + public_overrides = cli.SwitchAttr(["p", "public-addr-override"], str, list=True, default=[], + help="Override args") + skip_ifconfig = cli.Flag(["n", "no-ifconfig"], + help="Skip configuring local interfaces (already configured).") intf_map: dict[str, Intf] = {} brload: LocalCommand = None brload_cpus: list[int] = [] artifacts = f"{os.getcwd()}/acceptance/router_benchmark" prom_address: str = "localhost:9090" + debug_run: bool = False + intern_over_args = [] + public_over_args = [] def host_interface(self, excl: bool): """Returns the next host interface that we should use for a brload links. If excl is true, we pick one and never pick that one again. Else, we pick one the first time it's needed and keep it for reuse. + + If skip_ifconfig is true, we forego multiplexing as it is likely not how things + have been configured. We assume one interface per address. """ + if self.skip_ifconfig: + return self.avail_interfaces.pop() + if excl: return self.avail_interfaces.pop() @@ -148,12 +166,14 @@ def config_interface(self, req: IntfReq): # the router's subnet that's not otherwise used. This must NOT be "PeerIP". # brload requires the internal interface to be "exclusive", that's our clue. if exclusive: - net = ipaddress.ip_network(f"{req.ip}/{req.prefix_len}", strict=False) - hostAddr = next(net.hosts()) + 126 self.scrape_addr = req.ip - sudo("ip", "addr", "add", f"{hostAddr}/{req.prefix_len}", - "broadcast", str(net.broadcast_address), "dev", host_intf) - self.to_flush.append(host_intf) + self.profiling_addr = req.ip + if not self.skip_ifconfig: + net = ipaddress.ip_network(f"{req.ip}/{req.prefix_len}", strict=False) + hostAddr = next(net.hosts()) + 126 + sudo("ip", "addr", "add", f"{hostAddr}/{req.prefix_len}", + "broadcast", str(net.broadcast_address), "dev", host_intf) + self.to_flush.append(host_intf) logger.debug(f"=> Configuring interface {host_intf} for: {req}...") @@ -163,19 +183,23 @@ def config_interface(self, req: IntfReq): if i.name == host_intf: break else: - sudo("ip", "link", "set", host_intf, "mtu", "9000") - - # Do not assign the host addresses but create one link-local addr. - # Brload needs some src IP to send arp requests. (This requires rp_filter - # to be off on the router side, else, brload's arp requests are discarded). - sudo("ip", "addr", "add", f"169.254.{randint(0, 255)}.{randint(0, 255)}/16", - "broadcast", "169.254.255.255", - "dev", host_intf, "scope", "link") - sudo("sysctl", "-qw", f"net.ipv6.conf.{host_intf}.disable_ipv6=1") - self.to_flush.append(host_intf) + # Do not assign the host addresses but some other addr in the same subnet. + # This is because brload needs some src IP to send arp requests but we do not want the + # linux kernel to handle our incoming packets and respond with icmp errors. + if not self.skip_ifconfig: + # We allow for packets as large as they get. + sudo("ip", "link", "set", host_intf, "mtu", "9000") + + net = ipaddress.ip_network(f"{req.ip}/{req.prefix_len}", strict=False) + hostAddr = next(net.hosts()) + 125 + sudo("ip", "addr", "add", f"{hostAddr}/{req.prefix_len}", + "broadcast", str(net.broadcast_address), "dev", host_intf) + sudo("sysctl", "-qw", f"net.ipv6.conf.{host_intf}.disable_ipv6=1") + self.to_flush.append(host_intf) # Fit for duty. - sudo("ip", "link", "set", host_intf, "up") + if not self.skip_ifconfig: + sudo("ip", "link", "set", host_intf, "up") # Ship it. Leave mac addresses alone. In this standalone test we use the real one. self.intf_map[req.label] = Intf(host_intf, None, None) @@ -198,16 +222,18 @@ def fetch_horsepower(self) -> tuple[int]: def setup(self, avail_interfaces: list[str]): logger.info("Preparing...") - # Check that the given interfaces are safe to use. We will wreck their config. - for intf in avail_interfaces: - output = sudo("ip", "addr", "show", "dev", intf) - if len(output.splitlines()) > 2: - logger.error(f"""\ - Interface {intf} appears to be in some kind of use. Cowardly refusing to modify it. - If you have a network manager, tell it to disable or ignore that interface. - Else, how about \"sudo ip addr flush dev {intf}\"? - """) - raise RuntimeError("Interface in use") + if not self.skip_ifconfig: + # Check that the given interfaces are safe to use. We will wreck their config. + for intf in avail_interfaces: + output = sudo("ip", "addr", "show", "dev", intf) + # The check below is too sloppy. Some systems yield false positives. + if False: # len(output.splitlines()) > 2: + logger.error(f"""\ + Interface {intf} appears to be in some kind of use. Cowardly refusing to modify + it. If you have a network manager, tell it to disable or ignore that interface. + Else, how about \"sudo ip addr flush dev {intf}\"? + """) + raise RuntimeError("Interface in use") # Looks safe. self.avail_interfaces = avail_interfaces @@ -216,7 +242,7 @@ def setup(self, avail_interfaces: list[str]): # We supply the label->host-side-name mapping to brload when we start it. logger.debug("==> Configuring host interfaces...") - output = self.brload("show-interfaces") + output = self.brload("show-interfaces", *self.intern_over_args, *self.public_over_args) lines = sorted(output.splitlines()) for line in lines: @@ -243,6 +269,14 @@ def setup(self, avail_interfaces: list[str]): # They'll be used to produce a performance index. self.fetch_horsepower() + # Optionally profile the router + if PROFILING_CPU: + cmd.curl[f"{self.profiling_addr}:30442/debug/pprof/cpu?seconds=70", + "-o", "router_cpu.pprof"] & BG + + if PROFILING_TRACE: + cmd.curl[f"{self.profiling_addr}:30442/debug/pprof/trace?seconds=70", + "-o", "router_trace.pprof"] & BG logger.info("Prepared") def cleanup(self, retcode: int): @@ -252,7 +286,7 @@ def cleanup(self, retcode: int): return retcode def instructions(self): - output = self.brload("show-interfaces") + output = self.brload("show-interfaces", *self.intern_over_args, *self.public_over_args) exclusives = [] multiplexed = [] @@ -268,6 +302,7 @@ def instructions(self): for line in lines: elems = line.split(",") if len(elems) != 5: + print("*******", line) continue req = IntfReq._make(elems) reqs.append(req) @@ -290,16 +325,17 @@ def instructions(self): print(f""" INSTRUCTIONS: -1 - Configure your subject router according to accept/router_benchmark/conf/router.toml") - If using openwrt, an easy way to do that is to install the bmtools.ipk package. In addition, - bmtools includes two microbenchmarks: scion-coremark and scion-mmbm. Those will run - automatically and the results will be used to improve the benchmark report. +1 - Configure your subject router according to "acceptance/router_benchmark/conf/*" (copy everything + to /etc/scion of the router). If using openwrt, an easy way to do that is to install + the bmtools.ipk package. In addition, bmtools includes two microbenchmarks: scion-coremark and + scion-mmbm. Those will run automatically and the results will be used to improve the benchmark + report. Optional: If you did not install bmtools.ipk, install and run those microbenchmarks and make a note of the results: (scion-coremark; scion-mmbm). 2 - Configure the following interfaces on your router (The procedure depends on your router - UI) - All interfaces should have the mtu set to 9000: + UI): - One physical interface with addresses: {", ".join(multiplexed)} {nl.join([' - One physical interface with address: ' + s for s in exclusives])} @@ -335,6 +371,14 @@ def instructions(self): """) def main(self, *interfaces: str): + for over in self.intern_overrides: + self.intern_over_args.append("--intern-addr-override") + self.intern_over_args.append(over) + + for over in self.public_overrides: + self.public_over_args.append("--public-addr-override") + self.public_over_args.append(over) + # brload cannot be set statically. It need the cli arguments to be # processed. self.brload = local[self.brload_path] diff --git a/acceptance/router_benchmark/benchmarklib.py b/acceptance/router_benchmark/benchmarklib.py index 4290de20bb..76da8a68ff 100644 --- a/acceptance/router_benchmark/benchmarklib.py +++ b/acceptance/router_benchmark/benchmarklib.py @@ -74,7 +74,7 @@ def perf_index(self, rate: int) -> float: def add_case(self, name: str, rate: int, droppage: int, raw_rate: int): dropRatio = round(float(droppage) / (rate + droppage), 2) - saturated = dropRatio > 0.03 + saturated = dropRatio >= 0.03 perf = 0.0 if self.cores == 3 and self.coremark and self.mmbm: perf = round(self.perf_index(rate), 1) @@ -166,11 +166,17 @@ def exec_br_load(self, case: str, map_args: list[str], duration: int) -> str: "run", "--artifacts", self.artifacts, *map_args, + *self.intern_over_args, + *self.public_over_args, "--case", case, "--duration", f"{duration}s", "--num-streams", "840", "--packet-size", f"{self.packet_size}", + "--log.console", "warn" if self.log_level == "warning" else f"{self.log_level}", ] + if self.debug_run: + brload_args.extend(["--num-packets", 1000]) + if self.brload_cpus: brload_args = [ "taskset", "-c", ",".join(map(str, self.brload_cpus)), @@ -319,13 +325,16 @@ def run_bm(self, test_cases: [str]) -> Results: # Run one test (30% size) as warm-up to trigger any frequency scaling, else the first test # can get much lower performance. - logger.debug("Warmup") - self.exec_br_load(test_cases[0], map_args, 5) - - # Fetch the core count once. It doesn't change while the router is running. - # We cannot get this until the router has been up for a few seconds. If you shorten - # the warmup for some reason, make sure to add a delay. - cores = self.core_count() + if self.debug_run: + cores = 3 + else: + logger.debug("Warmup") + self.exec_br_load(test_cases[0], map_args, 5) + + # Fetch the core count once. It doesn't change while the router is running. + # We cannot get this until the router has been up for a few seconds. If you shorten + # the warmup for some reason, make sure to add a delay. + cores = self.core_count() # At long last, run the tests. results = Results(cores, self.coremark, self.mmbm, self.packet_size) diff --git a/acceptance/router_benchmark/brload/main.go b/acceptance/router_benchmark/brload/main.go index 48cf770126..b537636b20 100644 --- a/acceptance/router_benchmark/brload/main.go +++ b/acceptance/router_benchmark/brload/main.go @@ -73,13 +73,16 @@ var ( "out_transit": cases.OutTransit, "br_transit": cases.BrTransit, } - logConsole string - dir string - testDuration time.Duration - packetSize int - numStreams uint16 - caseToRun caseChoice - interfaces []string + logConsole string + dir string + testDuration time.Duration + numPackets int + packetSize int + numStreams uint16 + caseToRun caseChoice + interfaces []string + internAddrOverrides []string + publicAddrOverrides []string ) func main() { @@ -102,6 +105,7 @@ func main() { }, } runCmd.Flags().DurationVar(&testDuration, "duration", time.Second*15, "Test duration") + runCmd.Flags().IntVar(&numPackets, "num-packets", -1, "Maximum number of packets") runCmd.Flags().IntVar(&packetSize, "packet-size", 172, "Total size of each packet sent") runCmd.Flags().Uint16Var(&numStreams, "num-streams", 4, "Number of independent streams (flowID) to use") @@ -113,9 +117,24 @@ func main() { `label=[,] where is the host device that matches the