From 7e1823428e627c12efe6314c60fdca6681c728fa Mon Sep 17 00:00:00 2001
From: Philipp Schlegel <observing@web.de>
Date: Thu, 2 Jan 2025 18:42:48 +0000
Subject: [PATCH 1/5] improve reading from tar files

---
 navis/io/base.py   | 261 ++++++++++++++++++++++++---------------------
 navis/io/swc_io.py |  70 ++++++------
 2 files changed, 177 insertions(+), 154 deletions(-)

diff --git a/navis/io/base.py b/navis/io/base.py
index 627890bd..1d6dcf1f 100644
--- a/navis/io/base.py
+++ b/navis/io/base.py
@@ -100,7 +100,9 @@ def wrapper(*args, **kwargs):
                     break
 
             if self.errors == "raise":
-                raise ReadError(f"Error reading {id}. See above traceback for details.") from e
+                raise ReadError(
+                    f"Error reading {id}. See above traceback for details."
+                ) from e
             elif self.errors == "log":
                 logger.exception(f"Failed to read {id}", exc_info=True)
 
@@ -299,7 +301,7 @@ def __init__(
         read_binary: bool = False,
         attrs: Optional[Dict[str, Any]] = None,
         ignore_hidden=True,
-        errors="raise"
+        errors="raise",
     ):
         self.attrs = attrs
         self.fmt = fmt
@@ -424,7 +426,6 @@ def read_from_zip(
         files: Union[str, List[str]],
         zippath: os.PathLike,
         attrs: Optional[Dict[str, Any]] = None,
-        on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore",
     ) -> "core.NeuronList":
         """Read given files from a zip into a NeuronList.
 
@@ -438,8 +439,6 @@ def read_from_zip(
                     Path to zip file.
         attrs :     dict or None
                     Arbitrary attributes to include in the TreeNeuron.
-        on_error :  'ignore' | 'raise'
-                    What do do when error is encountered.
 
         Returns
         -------
@@ -459,7 +458,7 @@ def read_from_zip(
                     n = self.read_bytes(zip.read(file), attrs=merge_dicts(props, attrs))
                     neurons.append(n)
                 except BaseException:
-                    if on_error == "ignore":
+                    if self.errors == "ignore":
                         logger.warning(f'Failed to read "{file.filename}" from zip.')
                     else:
                         raise
@@ -472,7 +471,6 @@ def read_zip(
         parallel="auto",
         limit: Optional[int] = None,
         attrs: Optional[Dict[str, Any]] = None,
-        on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore",
     ) -> "core.NeuronList":
         """Read files from a zip into a NeuronList.
 
@@ -486,8 +484,6 @@ def read_zip(
                     Limit the number of files read from this directory.
         attrs :     dict or None
                     Arbitrary attributes to include in the TreeNeuron.
-        on_error :  'ignore' | 'raise'
-                    What do do when error is encountered.
 
         Returns
         -------
@@ -496,7 +492,7 @@ def read_zip(
         """
         fpath = Path(fpath).expanduser()
         read_fn = partial(
-            self.read_from_zip, zippath=fpath, attrs=attrs, on_error=on_error
+            self.read_from_zip, zippath=fpath, attrs=attrs
         )
         neurons = parallel_read_archive(
             read_fn=read_fn,
@@ -507,67 +503,15 @@ def read_zip(
         )
         return self.format_output(neurons)
 
-    def read_from_tar(
-        self,
-        files: Union[str, List[str]],
-        tarpath: os.PathLike,
-        attrs: Optional[Dict[str, Any]] = None,
-        on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore",
-    ) -> "core.NeuronList":
-        """Read given files from a tar into a NeuronList.
-
-        Typically not used directly but via `read_tar()` dispatcher.
-
-        Parameters
-        ----------
-        files :     tarfile.TarInfo | list thereof
-                    Files inside the tar file to read.
-        tarpath :   str | os.PathLike
-                    Path to tar file.
-        attrs :     dict or None
-                    Arbitrary attributes to include in the TreeNeuron.
-        on_error :  'ignore' | 'raise'
-                    What do do when error is encountered.
-
-        Returns
-        -------
-        core.NeuronList
-
-        """
-        p = Path(tarpath)
-        files = utils.make_iterable(files)
-
-        neurons = []
-        with tarfile.open(p, "r") as tf:
-            for file in files:
-                # Note the `file` is of type tarfile.TarInfo here
-                props = self.parse_filename(file.name.split("/")[-1])
-                props["origin"] = str(p)
-                try:
-                    n = self.read_bytes(
-                        tf.extractfile(file).read(), attrs=merge_dicts(props, attrs)
-                    )
-                    neurons.append(n)
-                except BaseException:
-                    if on_error == "ignore":
-                        logger.warning(f'Failed to read "{file.filename}" from tar.')
-                    else:
-                        raise
-
-        return self.format_output(neurons)
-
     def read_tar(
         self,
         fpath: os.PathLike,
-        parallel="auto",
         limit: Optional[int] = None,
         attrs: Optional[Dict[str, Any]] = None,
-        on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore",
+        ignore_hidden: bool = True
     ) -> "core.NeuronList":
         """Read files from a tar archive into a NeuronList.
 
-        This is a dispatcher for `.read_from_tar`.
-
         Parameters
         ----------
         fpath :     str | os.PathLike
@@ -584,17 +528,89 @@ def read_tar(
         core.NeuronList
 
         """
-        fpath = Path(fpath).expanduser()
-        read_fn = partial(
-            self.read_from_tar, tarpath=fpath, attrs=attrs, on_error=on_error
-        )
-        neurons = parallel_read_archive(
-            read_fn=read_fn,
-            fpath=fpath,
-            file_ext=self.is_valid_file,
-            limit=limit,
-            parallel=parallel,
+        p = Path(fpath).expanduser()
+        file_ext = self.is_valid_file
+
+        # Check the content of the tar file
+        # N.B. the TarInfo objects are hashable but the hash changes
+        # when the archive is re-opened. Therefore, we track the
+        # filenames and not the TarInfo objects.
+        to_read = []
+        with tarfile.open(p, "r") as tf:
+            for i, file in enumerate(tf):
+                fpath = file.name  # full path inside the tar
+                fname = fpath.split("/")[-1]  # just the filename
+                if ignore_hidden and fname.startswith("._"):
+                    continue
+                if callable(file_ext):
+                    if self.is_valid_file(file):
+                        to_read.append(fpath)
+                elif file_ext == "*":
+                    to_read.append(fpath)
+                elif file_ext and fname.endswith(file_ext):
+                    to_read.append(fpath)
+                elif "." not in file.filename:
+                    to_read.append(fpath)
+
+                if isinstance(limit, int) and i >= limit:
+                    break
+
+        if isinstance(limit, list):
+            to_read = [f for f in to_read if f in limit]
+        elif isinstance(limit, slice):
+            to_read = to_read[limit]
+        elif isinstance(limit, str):
+            # Check if limit is a regex
+            if rgx.search(limit):
+                to_read = [f for f in to_read if re.search(limit, f.split("/")[-1])]
+            else:
+                to_read = [f for f in to_read if limit in f.split("/")[-1]]
+
+        # Wrapper for progess bar
+        prog = partial(
+            config.tqdm,
+            desc="Importing",
+            total=len(to_read),
+            disable=config.pbar_hide,
+            leave=config.pbar_leave,
         )
+
+        # N.B. tar.gz is a bunch of files concatenated and then compressed!
+        # In consequence, random access is very slow because we may have to seek
+        # through the whole archive to find the start of the requested file.
+        # The workaround is to open the archive in streaming (e.g. "r|gz") mode,
+        # iterate through the files in sequence and exract if the file is requested.
+        # This is also why we are not using parallel processing here.
+        # See also https://tinyurl.com/5n8wz54m (links to StackOverflow)
+        neurons = []
+        to_read = set(to_read)  # faster lookup
+        with prog() as pbar:
+            # Open the tar file in streaming mode with transparent compression
+            with tarfile.open(p, "r|*") as tf:
+                for t in tf:
+                    # Skip files we don't want to read
+                    if t.name not in to_read:
+                        continue
+                    try:
+                        props = self.parse_filename(t.name.split("/")[-1])
+                        props["origin"] = str(p)
+                        n = self.read_bytes(
+                            tf.extractfile(t).read(),
+                            attrs=merge_dicts(props, attrs),
+                        )
+                        neurons.append(n)
+                        to_read.remove(t.name)
+                        pbar.update()
+                    except BaseException as e:
+                        if self.errors == "ignore":
+                            logger.warning(f'Failed to read "{t.name}" from tar.')
+                        else:
+                            raise
+
+                    # If we have read all (requested) files we can stop
+                    if not len(to_read):
+                        break
+
         return self.format_output(neurons)
 
     def read_ftp(
@@ -783,7 +799,9 @@ def read_url(
             r.raise_for_status()
             props = self.parse_filename(url.split("/")[-1])
             props["origin"] = url
-            return self.read_buffer(io.BytesIO(r.content), attrs=merge_dicts(props, attrs))
+            return self.read_buffer(
+                io.BytesIO(r.content), attrs=merge_dicts(props, attrs)
+            )
 
     def read_string(
         self, s: str, attrs: Optional[Dict[str, Any]] = None
@@ -803,7 +821,8 @@ def read_string(
         """
         sio = io.StringIO(s)
         return self.read_buffer(
-            sio, attrs=merge_dicts({"name": self.name_fallback, "origin": "string"}, attrs)
+            sio,
+            attrs=merge_dicts({"name": self.name_fallback, "origin": "string"}, attrs),
         )
 
     def read_bytes(
@@ -824,7 +843,8 @@ def read_bytes(
         """
         sio = io.BytesIO(s)
         return self.read_buffer(
-            sio, attrs=merge_dicts({"name": self.name_fallback, "origin": "string"}, attrs)
+            sio,
+            attrs=merge_dicts({"name": self.name_fallback, "origin": "string"}, attrs),
         )
 
     @handle_errors
@@ -903,7 +923,9 @@ def read_any_single(
                 p = Path(obj).expanduser()
                 if p.suffix == ".zip":
                     return self.read_zip(p, attrs=attrs)
-                elif p.suffix in (".tar", "tar.gz", "tar.bz"):
+                elif any(
+                    str(p).endswith(f) for f in (".tar", "tar.gz", "tar.bz", "tar.bz2")
+                ):
                     return self.read_tar(p, attrs=attrs)
                 return self.read_file_path(p, attrs=attrs)
             if obj.startswith("http://") or obj.startswith("https://"):
@@ -1001,12 +1023,18 @@ def read_any(
         core.NeuronObject
         """
         if utils.is_iterable(obj) and not hasattr(obj, "read"):
-            return self.read_any_multi(obj, parallel, include_subdirs, attrs=attrs)
+            return self.read_any_multi(
+                obj, parallel=parallel, include_subdirs=include_subdirs, attrs=attrs
+            )
         else:
             try:
                 if is_dir(obj):
                     return self.read_directory(
-                        obj, include_subdirs, parallel, limit, attrs=attrs
+                        obj,
+                        include_subdirs,
+                        parallle=parallel,
+                        limit=limit,
+                        attrs=attrs,
                     )
             except TypeError:
                 pass
@@ -1014,11 +1042,15 @@ def read_any(
                 if os.path.isfile(os.path.expanduser(obj)) and str(obj).endswith(
                     ".zip"
                 ):
-                    return self.read_zip(obj, parallel, limit, attrs=attrs)
+                    return self.read_zip(
+                        obj, parallel=parallel, limit=limit, attrs=attrs
+                    )
                 if os.path.isfile(os.path.expanduser(obj)) and ".tar" in str(obj):
-                    return self.read_tar(obj, parallel, limit, attrs=attrs)
+                    return self.read_tar(obj, limit=limit, attrs=attrs)
                 if isinstance(obj, str) and obj.startswith("ftp://"):
-                    return self.read_ftp(obj, parallel, limit, attrs=attrs)
+                    return self.read_ftp(
+                        obj, parallel=parallel, limit=limit, attrs=attrs
+                    )
             except TypeError:
                 pass
             return self.read_any_single(obj, attrs=attrs)
@@ -1102,10 +1134,7 @@ def _extract_connectors(self, nodes: pd.DataFrame) -> Optional[pd.DataFrame]:
 
 
 class ImageReader(BaseReader):
-    """Reader for image data.
-
-
-    """
+    """Reader for image data."""
 
     def __init__(self, output, thin, threshold, dotprop_kwargs, **kwargs):
         super().__init__(**kwargs)
@@ -1286,9 +1315,15 @@ def parallel_read(read_fn, objs, parallel="auto") -> List["core.NeuronList"]:
 
 
 def parallel_read_archive(
-    read_fn, fpath, file_ext, limit=None, parallel="auto", ignore_hidden=True
+    read_fn,
+    fpath,
+    file_ext,
+    reader=None,
+    limit=None,
+    parallel="auto",
+    ignore_hidden=True,
 ) -> List["core.NeuronList"]:
-    """Read neurons from a archive (zip or tar), potentially in parallel.
+    """Read neurons from a ZIP archive, potentially in parallel.
 
     Reader function must be picklable.
 
@@ -1323,43 +1358,23 @@ def parallel_read_archive(
     # Check zip content
     p = Path(fpath)
     to_read = []
-
-    if p.name.endswith(".zip"):
-        with ZipFile(p, "r") as zip:
-            for i, file in enumerate(zip.filelist):
-                fname = file.filename.split("/")[-1]
-                if ignore_hidden and fname.startswith("._"):
-                    continue
-                if callable(file_ext):
-                    if file_ext(file):
-                        to_read.append(file)
-                elif file_ext == "*":
-                    to_read.append(file)
-                elif file_ext and fname.endswith(file_ext):
-                    to_read.append(file)
-                elif "." not in file.filename:
-                    to_read.append(file)
-
-                if isinstance(limit, int) and i >= limit:
-                    break
-    elif ".tar" in p.name:  # can be ".tar", "tar.gz" or "tar.bz"
-        with tarfile.open(p, "r") as tf:
-            for i, file in enumerate(tf):
-                fname = file.name.split("/")[-1]
-                if ignore_hidden and fname.startswith("._"):
-                    continue
-                if callable(file_ext):
-                    if file_ext(file):
-                        to_read.append(file)
-                elif file_ext == "*":
-                    to_read.append(file)
-                elif file_ext and fname.endswith(file_ext):
-                    to_read.append(file)
-                elif "." not in file.filename:
+    with ZipFile(p, "r") as zip:
+        for i, file in enumerate(zip.filelist):
+            fname = file.filename.split("/")[-1]
+            if ignore_hidden and fname.startswith("._"):
+                continue
+            if callable(file_ext):
+                if file_ext(file):
                     to_read.append(file)
+            elif file_ext == "*":
+                to_read.append(file)
+            elif file_ext and fname.endswith(file_ext):
+                to_read.append(file)
+            elif "." not in file.filename:
+                to_read.append(file)
 
-                if isinstance(limit, int) and i >= limit:
-                    break
+            if isinstance(limit, int) and i >= limit:
+                break
 
     if isinstance(limit, list):
         to_read = [f for f in to_read if f in limit]
diff --git a/navis/io/swc_io.py b/navis/io/swc_io.py
index 8444af52..debddaa5 100644
--- a/navis/io/swc_io.py
+++ b/navis/io/swc_io.py
@@ -252,18 +252,20 @@ def read_header_rows(f: TextIO):
     return out
 
 
-def read_swc(f: Union[str, pd.DataFrame, Iterable],
-             connector_labels: Optional[Dict[str, Union[str, int]]] = {},
-             soma_label: Union[str, int] = 1,
-             include_subdirs: bool = False,
-             delimiter: str = ' ',
-             parallel: Union[bool, int] = 'auto',
-             precision: int = 32,
-             fmt: str = "{name}.swc",
-             read_meta: bool = True,
-             limit: Optional[int] = None,
-             errors: str = 'raise',
-             **kwargs) -> 'core.NeuronObject':
+def read_swc(
+    f: Union[str, pd.DataFrame, Iterable],
+    connector_labels: Optional[Dict[str, Union[str, int]]] = {},
+    soma_label: Union[str, int] = 1,
+    include_subdirs: bool = False,
+    delimiter: str = " ",
+    parallel: Union[bool, int] = "auto",
+    precision: int = 32,
+    fmt: str = "{name}.swc",
+    read_meta: bool = True,
+    limit: Optional[int] = None,
+    errors: str = "raise",
+    **kwargs,
+) -> "core.NeuronObject":
     """Create Neuron/List from SWC file.
 
     This import is following format specified
@@ -292,13 +294,15 @@ def read_swc(f: Union[str, pd.DataFrame, Iterable],
     delimiter :         str
                         Delimiter to use. Passed to `pandas.read_csv`.
     parallel :          "auto" | bool | int
-                        Defaults to `auto` which means only use parallel
-                        processing if more than 200 SWC are imported. Spawning
-                        and joining processes causes overhead and is
-                        considerably slower for imports of small numbers of
-                        neurons. Integer will be interpreted as the
-                        number of processes to use (defaults to
-                        `os.cpu_count() // 2`).
+                        Whether to use parallel processes for reading:
+                         - "auto" (default): will use parallel processing if
+                            more than 200 SWCs are imported.
+                          - Integers will be interpreted as the number of
+                            processes to use. Defaults to `os.cpu_count() // 2`.
+                          - False will use a single process.
+                        Ignored for tar archives. Please note that spawning
+                        processes incurs an overhead and might not be faster
+                        for small numbers of files.
     precision :         int [8, 16, 32, 64] | None
                         Precision for data. Defaults to 32 bit integers/floats.
                         If `None` will let pandas infer data types - this
@@ -397,21 +401,25 @@ def read_swc(f: Union[str, pd.DataFrame, Iterable],
     # point to an existing file or a folder MUST be a SWC) which will lead to
     # strange error messages.
     # The easiest fix is to implement a small sanity check here:
-    if isinstance(f, str) and '\n' not in f and not utils.is_url(f):
+    if isinstance(f, str) and "\n" not in f and not utils.is_url(f):
         # If this looks like a path
         p = Path(f).expanduser()
         if not p.is_dir() and not p.is_file():
-            raise FileNotFoundError(f'"{f}" looks like a directory or filepath '
-                                    'but does not appear to exist.')
-
-    reader = SwcReader(connector_labels=connector_labels,
-                       soma_label=soma_label,
-                       delimiter=delimiter,
-                       precision=precision,
-                       read_meta=read_meta,
-                       fmt=fmt,
-                       errors=errors,
-                       attrs=kwargs)
+            raise FileNotFoundError(
+                f'"{f}" looks like a directory or filepath '
+                "but does not appear to exist."
+            )
+
+    reader = SwcReader(
+        connector_labels=connector_labels,
+        soma_label=soma_label,
+        delimiter=delimiter,
+        precision=precision,
+        read_meta=read_meta,
+        fmt=fmt,
+        errors=errors,
+        attrs=kwargs,
+    )
     res = reader.read_any(f, include_subdirs, parallel, limit=limit)
 
     failed = []

From 6d4390d8342c5c235e0be9fbff356ef1de786402 Mon Sep 17 00:00:00 2001
From: Philipp Schlegel <observing@web.de>
Date: Thu, 2 Jan 2025 18:43:30 +0000
Subject: [PATCH 2/5] read_swc: deal with potential additional columns +
 formatting

---
 navis/io/swc_io.py | 222 +++++++++++++++++++++++++--------------------
 1 file changed, 123 insertions(+), 99 deletions(-)

diff --git a/navis/io/swc_io.py b/navis/io/swc_io.py
index debddaa5..d42faead 100644
--- a/navis/io/swc_io.py
+++ b/navis/io/swc_io.py
@@ -31,12 +31,12 @@
 # Set up logging
 logger = config.get_logger(__name__)
 
-NODE_COLUMNS = ('node_id', 'label', 'x', 'y', 'z', 'radius', 'parent_id')
+NODE_COLUMNS = ("node_id", "label", "x", "y", "z", "radius", "parent_id")
 COMMENT = "#"
 DEFAULT_DELIMITER = " "
 DEFAULT_PRECISION = 32
 DEFAULT_FMT = "{name}.swc"
-NA_VALUES = [None, 'None']
+NA_VALUES = [None, "None"]
 
 
 class SwcReader(base.BaseReader):
@@ -48,17 +48,15 @@ def __init__(
         precision: int = DEFAULT_PRECISION,
         read_meta: bool = False,
         fmt: str = DEFAULT_FMT,
-        errors: str = 'raise',
-        attrs: Optional[Dict[str, Any]] = None
+        errors: str = "raise",
+        attrs: Optional[Dict[str, Any]] = None,
     ):
-        if not fmt.endswith('.swc'):
+        if not fmt.endswith(".swc"):
             raise ValueError('`fmt` must end with ".swc"')
 
-        super().__init__(fmt=fmt,
-                         attrs=attrs,
-                         file_ext='.swc',
-                         errors=errors,
-                         name_fallback='SWC')
+        super().__init__(
+            fmt=fmt, attrs=attrs, file_ext=".swc", errors=errors, name_fallback="SWC"
+        )
         self.connector_labels = connector_labels or dict()
         self.soma_label = soma_label
         self.delimiter = delimiter
@@ -66,19 +64,19 @@ def __init__(
 
         int_, float_ = base.parse_precision(precision)
         self._dtypes = {
-            'node_id': int_,
-            'parent_id': int_,
-            'label': 'category',
-            'x': float_,
-            'y': float_,
-            'z': float_,
-            'radius': float_,
+            "node_id": int_,
+            "parent_id": int_,
+            "label": "category",
+            "x": float_,
+            "y": float_,
+            "z": float_,
+            "radius": float_,
         }
 
     @base.handle_errors
     def read_buffer(
         self, f: IO, attrs: Optional[Dict[str, Any]] = None
-    ) -> 'core.TreeNeuron':
+    ) -> "core.TreeNeuron":
         """Read buffer into a TreeNeuron.
 
         Parameters
@@ -107,9 +105,22 @@ def read_buffer(
                 skiprows=len(header_rows),
                 comment=COMMENT,
                 header=None,
-                na_values=NA_VALUES
+                na_values=NA_VALUES,
             )
-            nodes.columns = NODE_COLUMNS
+            if len(nodes.columns) < len(NODE_COLUMNS):
+                raise ValueError("Not enough columns in SWC file.")
+            elif len(nodes.columns) > len(NODE_COLUMNS):
+                logger.warning(
+                    f"Found {len(nodes.columns)} instead of the expected 7 "
+                    "columns in SWC file. Assuming additional columns are "
+                    "custom properties. You can silence this warning by setting "
+                    "`navis.set_loggers('ERROR')`."
+                )
+                nodes.columns = (
+                    list(NODE_COLUMNS) + nodes.columns[len(NODE_COLUMNS) :].tolist()
+                )
+            else:
+                nodes.columns = NODE_COLUMNS
         except pd.errors.EmptyDataError:
             # If file is totally empty, return an empty neuron
             # Note that the TreeNeuron will still complain but it's a better
@@ -119,17 +130,19 @@ def read_buffer(
         # Check for row with JSON-formatted meta data
         # Expected format '# Meta: {"id": "12345"}'
         if self.read_meta:
-            meta_row = [r for r in header_rows if r.lower().startswith('# meta:')]
+            meta_row = [r for r in header_rows if r.lower().startswith("# meta:")]
             if meta_row:
                 meta_data = json.loads(meta_row[0][7:].strip())
                 attrs = base.merge_dicts(meta_data, attrs)
 
-        return self.read_dataframe(nodes, base.merge_dicts({'swc_header': '\n'.join(header_rows)}, attrs))
+        return self.read_dataframe(
+            nodes, base.merge_dicts({"swc_header": "\n".join(header_rows)}, attrs)
+        )
 
     @base.handle_errors
     def read_dataframe(
         self, nodes: pd.DataFrame, attrs: Optional[Dict[str, Any]] = None
-    ) -> 'core.TreeNeuron':
+    ) -> "core.TreeNeuron":
         """Convert a SWC-like DataFrame into a TreeNeuron.
 
         Parameters
@@ -143,20 +156,19 @@ def read_dataframe(
         core.TreeNeuron
         """
         n = core.TreeNeuron(
-            sanitise_nodes(
-                nodes.astype(self._dtypes, errors='ignore', copy=False)
-            ),
-            connectors=self._extract_connectors(nodes))
+            sanitise_nodes(nodes.astype(self._dtypes, errors="ignore", copy=False)),
+            connectors=self._extract_connectors(nodes),
+        )
 
         if self.soma_label is not None:
             is_soma_node = n.nodes.label.values == self.soma_label
             if any(is_soma_node):
                 n.soma = n.nodes.node_id.values[is_soma_node][0]
 
-        attrs = self._make_attributes({'name': 'SWC', 'origin': 'DataFrame'}, attrs)
+        attrs = self._make_attributes({"name": "SWC", "origin": "DataFrame"}, attrs)
 
         # SWC is special - we do not want to register it
-        n.swc_header = attrs.pop('swc_header', '')
+        n.swc_header = attrs.pop("swc_header", "")
 
         # Try adding properties one-by-one. If one fails, we'll keep track of it
         # in the `.meta` attribute
@@ -172,9 +184,7 @@ def read_dataframe(
 
         return n
 
-    def _extract_connectors(
-        self, nodes: pd.DataFrame
-    ) -> Optional[pd.DataFrame]:
+    def _extract_connectors(self, nodes: pd.DataFrame) -> Optional[pd.DataFrame]:
         """Infer outgoing/incoming connectors from node labels.
 
         Parameters
@@ -190,14 +200,12 @@ def _extract_connectors(
             return None
 
         to_concat = [
-            pd.DataFrame(
-                [], columns=['node_id', 'connector_id', 'type', 'x', 'y', 'z']
-            )
+            pd.DataFrame([], columns=["node_id", "connector_id", "type", "x", "y", "z"])
         ]
         for name, val in self.connector_labels.items():
-            cn = nodes[nodes.label == val][['node_id', 'x', 'y', 'z']].copy()
-            cn['connector_id'] = None
-            cn['type'] = name
+            cn = nodes[nodes.label == val][["node_id", "x", "y", "z"]].copy()
+            cn["connector_id"] = None
+            cn["type"] = name
             to_concat.append(cn)
 
         return pd.concat(to_concat, axis=0)
@@ -215,9 +223,9 @@ def sanitise_nodes(nodes: pd.DataFrame, allow_empty=True) -> pd.DataFrame:
     pandas.DataFrame
     """
     if not allow_empty and nodes.empty:
-        raise ValueError('No data found in SWC.')
+        raise ValueError("No data found in SWC.")
 
-    is_na = nodes[['node_id', 'parent_id', 'x', 'y', 'z']].isna().any(axis=1)
+    is_na = nodes[["node_id", "parent_id", "x", "y", "z"]].isna().any(axis=1)
 
     if is_na.any():
         # Remove nodes with missing data
@@ -225,7 +233,7 @@ def sanitise_nodes(nodes: pd.DataFrame, allow_empty=True) -> pd.DataFrame:
 
         # Because we removed nodes, we'll have to run a more complicated root
         # detection
-        nodes.loc[~nodes.parent_id.isin(nodes.node_id), 'parent_id'] = -1
+        nodes.loc[~nodes.parent_id.isin(nodes.node_id), "parent_id"] = -1
 
     return nodes
 
@@ -424,27 +432,31 @@ def read_swc(
 
     failed = []
     for n in core.NeuronList(res):
-        if not hasattr(n, 'meta'):
+        if not hasattr(n, "meta"):
             continue
         failed += list(n.meta.keys())
 
     if failed:
         failed = list(set(failed))
-        logger.warning('Some meta data could not be directly attached to the '
-                       'neuron(s) - probably some clash with intrinsic '
-                       'properties. You can find these data attached as '
-                       '`.meta` dictionary.')
+        logger.warning(
+            "Some meta data could not be directly attached to the "
+            "neuron(s) - probably some clash with intrinsic "
+            "properties. You can find these data attached as "
+            "`.meta` dictionary."
+        )
 
     return res
 
 
-def write_swc(x: 'core.NeuronObject',
-              filepath: Union[str, Path],
-              header: Optional[str] = None,
-              write_meta: Union[bool, List[str], dict] = True,
-              labels: Union[str, dict, bool] = True,
-              export_connectors: bool = False,
-              return_node_map: bool = False) -> None:
+def write_swc(
+    x: "core.NeuronObject",
+    filepath: Union[str, Path],
+    header: Optional[str] = None,
+    write_meta: Union[bool, List[str], dict] = True,
+    labels: Union[str, dict, bool] = True,
+    export_connectors: bool = False,
+    return_node_map: bool = False,
+) -> None:
     """Write TreeNeuron(s) to SWC.
 
     Follows the format specified
@@ -547,40 +559,50 @@ def write_swc(x: 'core.NeuronObject',
             if not isinstance(n, core.TreeNeuron):
                 msg = f'Can only write TreeNeurons to SWC, not "{type(n)}"'
                 if isinstance(n, core.Dotprops):
-                    msg += (". For Dotprops, you can use either `navis.write_nrrd`"
-                            " or `navis.write_parquet`.")
+                    msg += (
+                        ". For Dotprops, you can use either `navis.write_nrrd`"
+                        " or `navis.write_parquet`."
+                    )
                 raise TypeError(msg)
     elif not isinstance(x, core.TreeNeuron):
         msg = f'Can only write TreeNeurons to SWC, not "{type(n)}"'
         if isinstance(n, core.Dotprops):
-            msg += (". For Dotprops, you can use either `navis.write_nrrd`"
-                    " or `navis.write_parquet`.")
+            msg += (
+                ". For Dotprops, you can use either `navis.write_nrrd`"
+                " or `navis.write_parquet`."
+            )
         raise TypeError(msg)
 
-    writer = base.Writer(write_func=_write_swc, ext='.swc')
+    writer = base.Writer(write_func=_write_swc, ext=".swc")
 
-    return writer.write_any(x,
-                            filepath=filepath,
-                            header=header,
-                            write_meta=write_meta,
-                            labels=labels,
-                            export_connectors=export_connectors,
-                            return_node_map=return_node_map)
+    return writer.write_any(
+        x,
+        filepath=filepath,
+        header=header,
+        write_meta=write_meta,
+        labels=labels,
+        export_connectors=export_connectors,
+        return_node_map=return_node_map,
+    )
 
 
-def _write_swc(x: Union['core.TreeNeuron', 'core.Dotprops'],
-               filepath: Union[str, Path],
-               header: Optional[str] = None,
-               write_meta: Union[bool, List[str], dict] = True,
-               labels: Union[str, dict, bool] = True,
-               export_connectors: bool = False,
-               return_node_map: bool = False) -> None:
+def _write_swc(
+    x: Union["core.TreeNeuron", "core.Dotprops"],
+    filepath: Union[str, Path],
+    header: Optional[str] = None,
+    write_meta: Union[bool, List[str], dict] = True,
+    labels: Union[str, dict, bool] = True,
+    export_connectors: bool = False,
+    return_node_map: bool = False,
+) -> None:
     """Write single TreeNeuron to file."""
     # Generate SWC table
-    res = make_swc_table(x,
-                         labels=labels,
-                         export_connectors=export_connectors,
-                         return_node_map=return_node_map)
+    res = make_swc_table(
+        x,
+        labels=labels,
+        export_connectors=export_connectors,
+        return_node_map=return_node_map,
+    )
 
     if return_node_map:
         swc, node_map = res[0], res[1]
@@ -602,7 +624,7 @@ def _write_swc(x: Union['core.TreeNeuron', 'core.Dotprops'],
             elif isinstance(write_meta, list):
                 props = {k: str(getattr(x, k, None)) for k in write_meta}
             else:
-                props = {k: str(getattr(x, k, None)) for k in ['id', 'name', 'units']}
+                props = {k: str(getattr(x, k, None)) for k in ["id", "name", "units"]}
             header += f"# Meta: {json.dumps(props)}\n"
         header += dedent("""\
         # PointNo Label X Y Z Radius Parent
@@ -613,25 +635,27 @@ def _write_swc(x: Union['core.TreeNeuron', 'core.Dotprops'],
             header += dedent("""\
             # 7 = presynapses, 8 = postsynapses
             """)
-    elif not header.endswith('\n'):
-        header += '\n'
+    elif not header.endswith("\n"):
+        header += "\n"
 
-    with open(filepath, 'w') as file:
+    with open(filepath, "w") as file:
         # Write header
         file.write(header)
 
         # Write data
-        writer = csv.writer(file, delimiter=' ')
+        writer = csv.writer(file, delimiter=" ")
         writer.writerows(swc.astype(str).values)
 
     if return_node_map:
         return node_map
 
 
-def make_swc_table(x: Union['core.TreeNeuron', 'core.Dotprops'],
-                   labels: Union[str, dict, bool] = None,
-                   export_connectors: bool = False,
-                   return_node_map: bool = False) -> pd.DataFrame:
+def make_swc_table(
+    x: Union["core.TreeNeuron", "core.Dotprops"],
+    labels: Union[str, dict, bool] = None,
+    export_connectors: bool = False,
+    return_node_map: bool = False,
+) -> pd.DataFrame:
     """Generate a node table compliant with the SWC format.
 
     Follows the format specified
@@ -673,28 +697,28 @@ def make_swc_table(x: Union['core.TreeNeuron', 'core.Dotprops'],
     swc = x.nodes.copy()
 
     # Add labels
-    swc['label'] = 0
+    swc["label"] = 0
     if isinstance(labels, dict):
-        swc['label'] = swc.index.map(labels)
+        swc["label"] = swc.index.map(labels)
     elif isinstance(labels, str):
-        swc['label'] = swc[labels]
+        swc["label"] = swc[labels]
     elif labels:
         # Add end/branch labels
-        swc.loc[swc.type == 'branch', 'label'] = 5
-        swc.loc[swc.type == 'end', 'label'] = 6
+        swc.loc[swc.type == "branch", "label"] = 5
+        swc.loc[swc.type == "end", "label"] = 6
         # Add soma label
         if not isinstance(x.soma, type(None)):
             soma = utils.make_iterable(x.soma)
-            swc.loc[swc.node_id.isin(soma), 'label'] = 1
+            swc.loc[swc.node_id.isin(soma), "label"] = 1
         if export_connectors:
             # Add synapse label
             pre_ids = x.presynapses.node_id.values
             post_ids = x.postsynapses.node_id.values
-            swc.loc[swc.node_id.isin(pre_ids), 'label'] = 7
-            swc.loc[swc.node_id.isin(post_ids), 'label'] = 8
+            swc.loc[swc.node_id.isin(pre_ids), "label"] = 7
+            swc.loc[swc.node_id.isin(post_ids), "label"] = 8
 
     # Sort such that the parent is always before the child
-    swc.sort_values('parent_id', ascending=True, inplace=True)
+    swc.sort_values("parent_id", ascending=True, inplace=True)
 
     # Reset index
     swc.reset_index(drop=True, inplace=True)
@@ -702,18 +726,18 @@ def make_swc_table(x: Union['core.TreeNeuron', 'core.Dotprops'],
     # Generate mapping
     new_ids = dict(zip(swc.node_id.values, swc.index.values + 1))
 
-    swc['node_id'] = swc.node_id.map(new_ids)
+    swc["node_id"] = swc.node_id.map(new_ids)
     # Lambda prevents potential issue with missing parents
-    swc['parent_id'] = swc.parent_id.map(lambda x: new_ids.get(x, -1))
+    swc["parent_id"] = swc.parent_id.map(lambda x: new_ids.get(x, -1))
 
     # Get things in order
-    swc = swc[['node_id', 'label', 'x', 'y', 'z', 'radius', 'parent_id']]
+    swc = swc[["node_id", "label", "x", "y", "z", "radius", "parent_id"]]
 
     # Make sure radius has no `None`
-    swc['radius'] = swc.radius.fillna(0)
+    swc["radius"] = swc.radius.fillna(0)
 
     # Adjust column titles
-    swc.columns = ['PointNo', 'Label', 'X', 'Y', 'Z', 'Radius', 'Parent']
+    swc.columns = ["PointNo", "Label", "X", "Y", "Z", "Radius", "Parent"]
 
     if return_node_map:
         return swc, new_ids

From 44e4c312d461a133f9e524ab5e2abd9de99bc755 Mon Sep 17 00:00:00 2001
From: Philipp Schlegel <observing@web.de>
Date: Fri, 3 Jan 2025 10:05:56 +0000
Subject: [PATCH 3/5] BaseReader: fix typo in parameter name

---
 navis/io/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/navis/io/base.py b/navis/io/base.py
index 1d6dcf1f..f02c36ee 100644
--- a/navis/io/base.py
+++ b/navis/io/base.py
@@ -1032,7 +1032,7 @@ def read_any(
                     return self.read_directory(
                         obj,
                         include_subdirs,
-                        parallle=parallel,
+                        parallel=parallel,
                         limit=limit,
                         attrs=attrs,
                     )

From 4940a1ec1754898a56833cb1abb69c5b08113098 Mon Sep 17 00:00:00 2001
From: Philipp Schlegel <observing@web.de>
Date: Fri, 3 Jan 2025 10:35:01 +0000
Subject: [PATCH 4/5] tutorials: change URL download.brainlib.org:8811 ->
 download.brainimagelibrary.org

---
 docs/examples/0_io/tutorial_io_00_skeletons.py          | 2 +-
 docs/examples/1_plotting/tutorial_plotting_06_cortex.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/examples/0_io/tutorial_io_00_skeletons.py b/docs/examples/0_io/tutorial_io_00_skeletons.py
index 364571fd..b0796aab 100644
--- a/docs/examples/0_io/tutorial_io_00_skeletons.py
+++ b/docs/examples/0_io/tutorial_io_00_skeletons.py
@@ -75,7 +75,7 @@
 # %%
 
 # From an FTP folder:
-nl = navis.read_swc('ftp://download.brainlib.org:8811/biccn/zeng/pseq/morph/200526/', limit=3)
+nl = navis.read_swc('ftp://download.brainimagelibrary.org/biccn/zeng/pseq/morph/200526/', limit=3)
 
 
 # !!! tip
diff --git a/docs/examples/1_plotting/tutorial_plotting_06_cortex.py b/docs/examples/1_plotting/tutorial_plotting_06_cortex.py
index f94d1fed..b2b24377 100644
--- a/docs/examples/1_plotting/tutorial_plotting_06_cortex.py
+++ b/docs/examples/1_plotting/tutorial_plotting_06_cortex.py
@@ -32,7 +32,7 @@
 import navis
 
 nl = navis.read_swc(
-    "ftp://download.brainlib.org:8811/biccn/zeng/pseq/morph/200526/",
+    "ftp://download.brainimagelibrary.org/biccn/zeng/pseq/morph/200526/",
     limit=[f"{i}_transformed.swc" for i in ids],  #  Load only the files we need
     fmt="{name,id:int}_transformed.swc",  # Parse the name and id from the file name
 )

From 5be8f964de8772f7c3c4597318e1ec34f48b304f Mon Sep 17 00:00:00 2001
From: Philipp Schlegel <observing@web.de>
Date: Fri, 3 Jan 2025 10:39:16 +0000
Subject: [PATCH 5/5] i/o base: remove left-over on_error parameters, use
 self.errors instead

---
 navis/io/base.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/navis/io/base.py b/navis/io/base.py
index f02c36ee..319a0e59 100644
--- a/navis/io/base.py
+++ b/navis/io/base.py
@@ -520,8 +520,6 @@ def read_tar(
                     Limit the number of files read from this directory.
         attrs :     dict or None
                     Arbitrary attributes to include in the TreeNeuron.
-        on_error :  'ignore' | 'raise'
-                    What do do when error is encountered.
 
         Returns
         -------
@@ -618,8 +616,7 @@ def read_ftp(
         url,
         parallel="auto",
         limit: Optional[int] = None,
-        attrs: Optional[Dict[str, Any]] = None,
-        on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore",
+        attrs: Optional[Dict[str, Any]] = None
     ) -> "core.NeuronList":
         """Read files from an FTP server.
 
@@ -633,8 +630,6 @@ def read_ftp(
                     Limit the number of files read from this directory.
         attrs :     dict or None
                     Arbitrary attributes to include in the TreeNeuron.
-        on_error :  'ignore' | 'raise'
-                    What do do when error is encountered.
 
         Returns
         -------
@@ -654,7 +649,7 @@ def read_ftp(
         else:
             port = 21  # default port
 
-        read_fn = partial(self.read_from_ftp, attrs=attrs, on_error=on_error)
+        read_fn = partial(self.read_from_ftp, attrs=attrs)
         neurons = parallel_read_ftp(
             read_fn=read_fn,
             server=server,
@@ -670,8 +665,7 @@ def read_from_ftp(
         self,
         files: Union[str, List[str]],
         ftp: FTP,
-        attrs: Optional[Dict[str, Any]] = None,
-        on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore",
+        attrs: Optional[Dict[str, Any]] = None
     ) -> "core.NeuronList":
         """Read given files from an FTP server into a NeuronList.
 
@@ -687,8 +681,6 @@ def read_from_ftp(
                     `_FTP` global variable.
         attrs :     dict or None
                     Arbitrary attributes to include in the TreeNeuron.
-        on_error :  'ignore' | 'raise'
-                    What do do when error is encountered.
 
         Returns
         -------
@@ -716,7 +708,7 @@ def read_from_ftp(
                     n = self.read_buffer(f, attrs=merge_dicts(props, attrs))
                     neurons.append(n)
                 except BaseException:
-                    if on_error == "ignore":
+                    if self.errors == "ignore":
                         logger.warning(f'Failed to read "{file}" from FTP.')
                     else:
                         raise