From 7e1823428e627c12efe6314c60fdca6681c728fa Mon Sep 17 00:00:00 2001 From: Philipp Schlegel Date: Thu, 2 Jan 2025 18:42:48 +0000 Subject: [PATCH 1/5] improve reading from tar files --- navis/io/base.py | 261 ++++++++++++++++++++++++--------------------- navis/io/swc_io.py | 70 ++++++------ 2 files changed, 177 insertions(+), 154 deletions(-) diff --git a/navis/io/base.py b/navis/io/base.py index 627890bd..1d6dcf1f 100644 --- a/navis/io/base.py +++ b/navis/io/base.py @@ -100,7 +100,9 @@ def wrapper(*args, **kwargs): break if self.errors == "raise": - raise ReadError(f"Error reading {id}. See above traceback for details.") from e + raise ReadError( + f"Error reading {id}. See above traceback for details." + ) from e elif self.errors == "log": logger.exception(f"Failed to read {id}", exc_info=True) @@ -299,7 +301,7 @@ def __init__( read_binary: bool = False, attrs: Optional[Dict[str, Any]] = None, ignore_hidden=True, - errors="raise" + errors="raise", ): self.attrs = attrs self.fmt = fmt @@ -424,7 +426,6 @@ def read_from_zip( files: Union[str, List[str]], zippath: os.PathLike, attrs: Optional[Dict[str, Any]] = None, - on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore", ) -> "core.NeuronList": """Read given files from a zip into a NeuronList. @@ -438,8 +439,6 @@ def read_from_zip( Path to zip file. attrs : dict or None Arbitrary attributes to include in the TreeNeuron. - on_error : 'ignore' | 'raise' - What do do when error is encountered. Returns ------- @@ -459,7 +458,7 @@ def read_from_zip( n = self.read_bytes(zip.read(file), attrs=merge_dicts(props, attrs)) neurons.append(n) except BaseException: - if on_error == "ignore": + if self.errors == "ignore": logger.warning(f'Failed to read "{file.filename}" from zip.') else: raise @@ -472,7 +471,6 @@ def read_zip( parallel="auto", limit: Optional[int] = None, attrs: Optional[Dict[str, Any]] = None, - on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore", ) -> "core.NeuronList": """Read files from a zip into a NeuronList. @@ -486,8 +484,6 @@ def read_zip( Limit the number of files read from this directory. attrs : dict or None Arbitrary attributes to include in the TreeNeuron. - on_error : 'ignore' | 'raise' - What do do when error is encountered. Returns ------- @@ -496,7 +492,7 @@ def read_zip( """ fpath = Path(fpath).expanduser() read_fn = partial( - self.read_from_zip, zippath=fpath, attrs=attrs, on_error=on_error + self.read_from_zip, zippath=fpath, attrs=attrs ) neurons = parallel_read_archive( read_fn=read_fn, @@ -507,67 +503,15 @@ def read_zip( ) return self.format_output(neurons) - def read_from_tar( - self, - files: Union[str, List[str]], - tarpath: os.PathLike, - attrs: Optional[Dict[str, Any]] = None, - on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore", - ) -> "core.NeuronList": - """Read given files from a tar into a NeuronList. - - Typically not used directly but via `read_tar()` dispatcher. - - Parameters - ---------- - files : tarfile.TarInfo | list thereof - Files inside the tar file to read. - tarpath : str | os.PathLike - Path to tar file. - attrs : dict or None - Arbitrary attributes to include in the TreeNeuron. - on_error : 'ignore' | 'raise' - What do do when error is encountered. - - Returns - ------- - core.NeuronList - - """ - p = Path(tarpath) - files = utils.make_iterable(files) - - neurons = [] - with tarfile.open(p, "r") as tf: - for file in files: - # Note the `file` is of type tarfile.TarInfo here - props = self.parse_filename(file.name.split("/")[-1]) - props["origin"] = str(p) - try: - n = self.read_bytes( - tf.extractfile(file).read(), attrs=merge_dicts(props, attrs) - ) - neurons.append(n) - except BaseException: - if on_error == "ignore": - logger.warning(f'Failed to read "{file.filename}" from tar.') - else: - raise - - return self.format_output(neurons) - def read_tar( self, fpath: os.PathLike, - parallel="auto", limit: Optional[int] = None, attrs: Optional[Dict[str, Any]] = None, - on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore", + ignore_hidden: bool = True ) -> "core.NeuronList": """Read files from a tar archive into a NeuronList. - This is a dispatcher for `.read_from_tar`. - Parameters ---------- fpath : str | os.PathLike @@ -584,17 +528,89 @@ def read_tar( core.NeuronList """ - fpath = Path(fpath).expanduser() - read_fn = partial( - self.read_from_tar, tarpath=fpath, attrs=attrs, on_error=on_error - ) - neurons = parallel_read_archive( - read_fn=read_fn, - fpath=fpath, - file_ext=self.is_valid_file, - limit=limit, - parallel=parallel, + p = Path(fpath).expanduser() + file_ext = self.is_valid_file + + # Check the content of the tar file + # N.B. the TarInfo objects are hashable but the hash changes + # when the archive is re-opened. Therefore, we track the + # filenames and not the TarInfo objects. + to_read = [] + with tarfile.open(p, "r") as tf: + for i, file in enumerate(tf): + fpath = file.name # full path inside the tar + fname = fpath.split("/")[-1] # just the filename + if ignore_hidden and fname.startswith("._"): + continue + if callable(file_ext): + if self.is_valid_file(file): + to_read.append(fpath) + elif file_ext == "*": + to_read.append(fpath) + elif file_ext and fname.endswith(file_ext): + to_read.append(fpath) + elif "." not in file.filename: + to_read.append(fpath) + + if isinstance(limit, int) and i >= limit: + break + + if isinstance(limit, list): + to_read = [f for f in to_read if f in limit] + elif isinstance(limit, slice): + to_read = to_read[limit] + elif isinstance(limit, str): + # Check if limit is a regex + if rgx.search(limit): + to_read = [f for f in to_read if re.search(limit, f.split("/")[-1])] + else: + to_read = [f for f in to_read if limit in f.split("/")[-1]] + + # Wrapper for progess bar + prog = partial( + config.tqdm, + desc="Importing", + total=len(to_read), + disable=config.pbar_hide, + leave=config.pbar_leave, ) + + # N.B. tar.gz is a bunch of files concatenated and then compressed! + # In consequence, random access is very slow because we may have to seek + # through the whole archive to find the start of the requested file. + # The workaround is to open the archive in streaming (e.g. "r|gz") mode, + # iterate through the files in sequence and exract if the file is requested. + # This is also why we are not using parallel processing here. + # See also https://tinyurl.com/5n8wz54m (links to StackOverflow) + neurons = [] + to_read = set(to_read) # faster lookup + with prog() as pbar: + # Open the tar file in streaming mode with transparent compression + with tarfile.open(p, "r|*") as tf: + for t in tf: + # Skip files we don't want to read + if t.name not in to_read: + continue + try: + props = self.parse_filename(t.name.split("/")[-1]) + props["origin"] = str(p) + n = self.read_bytes( + tf.extractfile(t).read(), + attrs=merge_dicts(props, attrs), + ) + neurons.append(n) + to_read.remove(t.name) + pbar.update() + except BaseException as e: + if self.errors == "ignore": + logger.warning(f'Failed to read "{t.name}" from tar.') + else: + raise + + # If we have read all (requested) files we can stop + if not len(to_read): + break + return self.format_output(neurons) def read_ftp( @@ -783,7 +799,9 @@ def read_url( r.raise_for_status() props = self.parse_filename(url.split("/")[-1]) props["origin"] = url - return self.read_buffer(io.BytesIO(r.content), attrs=merge_dicts(props, attrs)) + return self.read_buffer( + io.BytesIO(r.content), attrs=merge_dicts(props, attrs) + ) def read_string( self, s: str, attrs: Optional[Dict[str, Any]] = None @@ -803,7 +821,8 @@ def read_string( """ sio = io.StringIO(s) return self.read_buffer( - sio, attrs=merge_dicts({"name": self.name_fallback, "origin": "string"}, attrs) + sio, + attrs=merge_dicts({"name": self.name_fallback, "origin": "string"}, attrs), ) def read_bytes( @@ -824,7 +843,8 @@ def read_bytes( """ sio = io.BytesIO(s) return self.read_buffer( - sio, attrs=merge_dicts({"name": self.name_fallback, "origin": "string"}, attrs) + sio, + attrs=merge_dicts({"name": self.name_fallback, "origin": "string"}, attrs), ) @handle_errors @@ -903,7 +923,9 @@ def read_any_single( p = Path(obj).expanduser() if p.suffix == ".zip": return self.read_zip(p, attrs=attrs) - elif p.suffix in (".tar", "tar.gz", "tar.bz"): + elif any( + str(p).endswith(f) for f in (".tar", "tar.gz", "tar.bz", "tar.bz2") + ): return self.read_tar(p, attrs=attrs) return self.read_file_path(p, attrs=attrs) if obj.startswith("http://") or obj.startswith("https://"): @@ -1001,12 +1023,18 @@ def read_any( core.NeuronObject """ if utils.is_iterable(obj) and not hasattr(obj, "read"): - return self.read_any_multi(obj, parallel, include_subdirs, attrs=attrs) + return self.read_any_multi( + obj, parallel=parallel, include_subdirs=include_subdirs, attrs=attrs + ) else: try: if is_dir(obj): return self.read_directory( - obj, include_subdirs, parallel, limit, attrs=attrs + obj, + include_subdirs, + parallle=parallel, + limit=limit, + attrs=attrs, ) except TypeError: pass @@ -1014,11 +1042,15 @@ def read_any( if os.path.isfile(os.path.expanduser(obj)) and str(obj).endswith( ".zip" ): - return self.read_zip(obj, parallel, limit, attrs=attrs) + return self.read_zip( + obj, parallel=parallel, limit=limit, attrs=attrs + ) if os.path.isfile(os.path.expanduser(obj)) and ".tar" in str(obj): - return self.read_tar(obj, parallel, limit, attrs=attrs) + return self.read_tar(obj, limit=limit, attrs=attrs) if isinstance(obj, str) and obj.startswith("ftp://"): - return self.read_ftp(obj, parallel, limit, attrs=attrs) + return self.read_ftp( + obj, parallel=parallel, limit=limit, attrs=attrs + ) except TypeError: pass return self.read_any_single(obj, attrs=attrs) @@ -1102,10 +1134,7 @@ def _extract_connectors(self, nodes: pd.DataFrame) -> Optional[pd.DataFrame]: class ImageReader(BaseReader): - """Reader for image data. - - - """ + """Reader for image data.""" def __init__(self, output, thin, threshold, dotprop_kwargs, **kwargs): super().__init__(**kwargs) @@ -1286,9 +1315,15 @@ def parallel_read(read_fn, objs, parallel="auto") -> List["core.NeuronList"]: def parallel_read_archive( - read_fn, fpath, file_ext, limit=None, parallel="auto", ignore_hidden=True + read_fn, + fpath, + file_ext, + reader=None, + limit=None, + parallel="auto", + ignore_hidden=True, ) -> List["core.NeuronList"]: - """Read neurons from a archive (zip or tar), potentially in parallel. + """Read neurons from a ZIP archive, potentially in parallel. Reader function must be picklable. @@ -1323,43 +1358,23 @@ def parallel_read_archive( # Check zip content p = Path(fpath) to_read = [] - - if p.name.endswith(".zip"): - with ZipFile(p, "r") as zip: - for i, file in enumerate(zip.filelist): - fname = file.filename.split("/")[-1] - if ignore_hidden and fname.startswith("._"): - continue - if callable(file_ext): - if file_ext(file): - to_read.append(file) - elif file_ext == "*": - to_read.append(file) - elif file_ext and fname.endswith(file_ext): - to_read.append(file) - elif "." not in file.filename: - to_read.append(file) - - if isinstance(limit, int) and i >= limit: - break - elif ".tar" in p.name: # can be ".tar", "tar.gz" or "tar.bz" - with tarfile.open(p, "r") as tf: - for i, file in enumerate(tf): - fname = file.name.split("/")[-1] - if ignore_hidden and fname.startswith("._"): - continue - if callable(file_ext): - if file_ext(file): - to_read.append(file) - elif file_ext == "*": - to_read.append(file) - elif file_ext and fname.endswith(file_ext): - to_read.append(file) - elif "." not in file.filename: + with ZipFile(p, "r") as zip: + for i, file in enumerate(zip.filelist): + fname = file.filename.split("/")[-1] + if ignore_hidden and fname.startswith("._"): + continue + if callable(file_ext): + if file_ext(file): to_read.append(file) + elif file_ext == "*": + to_read.append(file) + elif file_ext and fname.endswith(file_ext): + to_read.append(file) + elif "." not in file.filename: + to_read.append(file) - if isinstance(limit, int) and i >= limit: - break + if isinstance(limit, int) and i >= limit: + break if isinstance(limit, list): to_read = [f for f in to_read if f in limit] diff --git a/navis/io/swc_io.py b/navis/io/swc_io.py index 8444af52..debddaa5 100644 --- a/navis/io/swc_io.py +++ b/navis/io/swc_io.py @@ -252,18 +252,20 @@ def read_header_rows(f: TextIO): return out -def read_swc(f: Union[str, pd.DataFrame, Iterable], - connector_labels: Optional[Dict[str, Union[str, int]]] = {}, - soma_label: Union[str, int] = 1, - include_subdirs: bool = False, - delimiter: str = ' ', - parallel: Union[bool, int] = 'auto', - precision: int = 32, - fmt: str = "{name}.swc", - read_meta: bool = True, - limit: Optional[int] = None, - errors: str = 'raise', - **kwargs) -> 'core.NeuronObject': +def read_swc( + f: Union[str, pd.DataFrame, Iterable], + connector_labels: Optional[Dict[str, Union[str, int]]] = {}, + soma_label: Union[str, int] = 1, + include_subdirs: bool = False, + delimiter: str = " ", + parallel: Union[bool, int] = "auto", + precision: int = 32, + fmt: str = "{name}.swc", + read_meta: bool = True, + limit: Optional[int] = None, + errors: str = "raise", + **kwargs, +) -> "core.NeuronObject": """Create Neuron/List from SWC file. This import is following format specified @@ -292,13 +294,15 @@ def read_swc(f: Union[str, pd.DataFrame, Iterable], delimiter : str Delimiter to use. Passed to `pandas.read_csv`. parallel : "auto" | bool | int - Defaults to `auto` which means only use parallel - processing if more than 200 SWC are imported. Spawning - and joining processes causes overhead and is - considerably slower for imports of small numbers of - neurons. Integer will be interpreted as the - number of processes to use (defaults to - `os.cpu_count() // 2`). + Whether to use parallel processes for reading: + - "auto" (default): will use parallel processing if + more than 200 SWCs are imported. + - Integers will be interpreted as the number of + processes to use. Defaults to `os.cpu_count() // 2`. + - False will use a single process. + Ignored for tar archives. Please note that spawning + processes incurs an overhead and might not be faster + for small numbers of files. precision : int [8, 16, 32, 64] | None Precision for data. Defaults to 32 bit integers/floats. If `None` will let pandas infer data types - this @@ -397,21 +401,25 @@ def read_swc(f: Union[str, pd.DataFrame, Iterable], # point to an existing file or a folder MUST be a SWC) which will lead to # strange error messages. # The easiest fix is to implement a small sanity check here: - if isinstance(f, str) and '\n' not in f and not utils.is_url(f): + if isinstance(f, str) and "\n" not in f and not utils.is_url(f): # If this looks like a path p = Path(f).expanduser() if not p.is_dir() and not p.is_file(): - raise FileNotFoundError(f'"{f}" looks like a directory or filepath ' - 'but does not appear to exist.') - - reader = SwcReader(connector_labels=connector_labels, - soma_label=soma_label, - delimiter=delimiter, - precision=precision, - read_meta=read_meta, - fmt=fmt, - errors=errors, - attrs=kwargs) + raise FileNotFoundError( + f'"{f}" looks like a directory or filepath ' + "but does not appear to exist." + ) + + reader = SwcReader( + connector_labels=connector_labels, + soma_label=soma_label, + delimiter=delimiter, + precision=precision, + read_meta=read_meta, + fmt=fmt, + errors=errors, + attrs=kwargs, + ) res = reader.read_any(f, include_subdirs, parallel, limit=limit) failed = [] From 6d4390d8342c5c235e0be9fbff356ef1de786402 Mon Sep 17 00:00:00 2001 From: Philipp Schlegel Date: Thu, 2 Jan 2025 18:43:30 +0000 Subject: [PATCH 2/5] read_swc: deal with potential additional columns + formatting --- navis/io/swc_io.py | 222 +++++++++++++++++++++++++-------------------- 1 file changed, 123 insertions(+), 99 deletions(-) diff --git a/navis/io/swc_io.py b/navis/io/swc_io.py index debddaa5..d42faead 100644 --- a/navis/io/swc_io.py +++ b/navis/io/swc_io.py @@ -31,12 +31,12 @@ # Set up logging logger = config.get_logger(__name__) -NODE_COLUMNS = ('node_id', 'label', 'x', 'y', 'z', 'radius', 'parent_id') +NODE_COLUMNS = ("node_id", "label", "x", "y", "z", "radius", "parent_id") COMMENT = "#" DEFAULT_DELIMITER = " " DEFAULT_PRECISION = 32 DEFAULT_FMT = "{name}.swc" -NA_VALUES = [None, 'None'] +NA_VALUES = [None, "None"] class SwcReader(base.BaseReader): @@ -48,17 +48,15 @@ def __init__( precision: int = DEFAULT_PRECISION, read_meta: bool = False, fmt: str = DEFAULT_FMT, - errors: str = 'raise', - attrs: Optional[Dict[str, Any]] = None + errors: str = "raise", + attrs: Optional[Dict[str, Any]] = None, ): - if not fmt.endswith('.swc'): + if not fmt.endswith(".swc"): raise ValueError('`fmt` must end with ".swc"') - super().__init__(fmt=fmt, - attrs=attrs, - file_ext='.swc', - errors=errors, - name_fallback='SWC') + super().__init__( + fmt=fmt, attrs=attrs, file_ext=".swc", errors=errors, name_fallback="SWC" + ) self.connector_labels = connector_labels or dict() self.soma_label = soma_label self.delimiter = delimiter @@ -66,19 +64,19 @@ def __init__( int_, float_ = base.parse_precision(precision) self._dtypes = { - 'node_id': int_, - 'parent_id': int_, - 'label': 'category', - 'x': float_, - 'y': float_, - 'z': float_, - 'radius': float_, + "node_id": int_, + "parent_id": int_, + "label": "category", + "x": float_, + "y": float_, + "z": float_, + "radius": float_, } @base.handle_errors def read_buffer( self, f: IO, attrs: Optional[Dict[str, Any]] = None - ) -> 'core.TreeNeuron': + ) -> "core.TreeNeuron": """Read buffer into a TreeNeuron. Parameters @@ -107,9 +105,22 @@ def read_buffer( skiprows=len(header_rows), comment=COMMENT, header=None, - na_values=NA_VALUES + na_values=NA_VALUES, ) - nodes.columns = NODE_COLUMNS + if len(nodes.columns) < len(NODE_COLUMNS): + raise ValueError("Not enough columns in SWC file.") + elif len(nodes.columns) > len(NODE_COLUMNS): + logger.warning( + f"Found {len(nodes.columns)} instead of the expected 7 " + "columns in SWC file. Assuming additional columns are " + "custom properties. You can silence this warning by setting " + "`navis.set_loggers('ERROR')`." + ) + nodes.columns = ( + list(NODE_COLUMNS) + nodes.columns[len(NODE_COLUMNS) :].tolist() + ) + else: + nodes.columns = NODE_COLUMNS except pd.errors.EmptyDataError: # If file is totally empty, return an empty neuron # Note that the TreeNeuron will still complain but it's a better @@ -119,17 +130,19 @@ def read_buffer( # Check for row with JSON-formatted meta data # Expected format '# Meta: {"id": "12345"}' if self.read_meta: - meta_row = [r for r in header_rows if r.lower().startswith('# meta:')] + meta_row = [r for r in header_rows if r.lower().startswith("# meta:")] if meta_row: meta_data = json.loads(meta_row[0][7:].strip()) attrs = base.merge_dicts(meta_data, attrs) - return self.read_dataframe(nodes, base.merge_dicts({'swc_header': '\n'.join(header_rows)}, attrs)) + return self.read_dataframe( + nodes, base.merge_dicts({"swc_header": "\n".join(header_rows)}, attrs) + ) @base.handle_errors def read_dataframe( self, nodes: pd.DataFrame, attrs: Optional[Dict[str, Any]] = None - ) -> 'core.TreeNeuron': + ) -> "core.TreeNeuron": """Convert a SWC-like DataFrame into a TreeNeuron. Parameters @@ -143,20 +156,19 @@ def read_dataframe( core.TreeNeuron """ n = core.TreeNeuron( - sanitise_nodes( - nodes.astype(self._dtypes, errors='ignore', copy=False) - ), - connectors=self._extract_connectors(nodes)) + sanitise_nodes(nodes.astype(self._dtypes, errors="ignore", copy=False)), + connectors=self._extract_connectors(nodes), + ) if self.soma_label is not None: is_soma_node = n.nodes.label.values == self.soma_label if any(is_soma_node): n.soma = n.nodes.node_id.values[is_soma_node][0] - attrs = self._make_attributes({'name': 'SWC', 'origin': 'DataFrame'}, attrs) + attrs = self._make_attributes({"name": "SWC", "origin": "DataFrame"}, attrs) # SWC is special - we do not want to register it - n.swc_header = attrs.pop('swc_header', '') + n.swc_header = attrs.pop("swc_header", "") # Try adding properties one-by-one. If one fails, we'll keep track of it # in the `.meta` attribute @@ -172,9 +184,7 @@ def read_dataframe( return n - def _extract_connectors( - self, nodes: pd.DataFrame - ) -> Optional[pd.DataFrame]: + def _extract_connectors(self, nodes: pd.DataFrame) -> Optional[pd.DataFrame]: """Infer outgoing/incoming connectors from node labels. Parameters @@ -190,14 +200,12 @@ def _extract_connectors( return None to_concat = [ - pd.DataFrame( - [], columns=['node_id', 'connector_id', 'type', 'x', 'y', 'z'] - ) + pd.DataFrame([], columns=["node_id", "connector_id", "type", "x", "y", "z"]) ] for name, val in self.connector_labels.items(): - cn = nodes[nodes.label == val][['node_id', 'x', 'y', 'z']].copy() - cn['connector_id'] = None - cn['type'] = name + cn = nodes[nodes.label == val][["node_id", "x", "y", "z"]].copy() + cn["connector_id"] = None + cn["type"] = name to_concat.append(cn) return pd.concat(to_concat, axis=0) @@ -215,9 +223,9 @@ def sanitise_nodes(nodes: pd.DataFrame, allow_empty=True) -> pd.DataFrame: pandas.DataFrame """ if not allow_empty and nodes.empty: - raise ValueError('No data found in SWC.') + raise ValueError("No data found in SWC.") - is_na = nodes[['node_id', 'parent_id', 'x', 'y', 'z']].isna().any(axis=1) + is_na = nodes[["node_id", "parent_id", "x", "y", "z"]].isna().any(axis=1) if is_na.any(): # Remove nodes with missing data @@ -225,7 +233,7 @@ def sanitise_nodes(nodes: pd.DataFrame, allow_empty=True) -> pd.DataFrame: # Because we removed nodes, we'll have to run a more complicated root # detection - nodes.loc[~nodes.parent_id.isin(nodes.node_id), 'parent_id'] = -1 + nodes.loc[~nodes.parent_id.isin(nodes.node_id), "parent_id"] = -1 return nodes @@ -424,27 +432,31 @@ def read_swc( failed = [] for n in core.NeuronList(res): - if not hasattr(n, 'meta'): + if not hasattr(n, "meta"): continue failed += list(n.meta.keys()) if failed: failed = list(set(failed)) - logger.warning('Some meta data could not be directly attached to the ' - 'neuron(s) - probably some clash with intrinsic ' - 'properties. You can find these data attached as ' - '`.meta` dictionary.') + logger.warning( + "Some meta data could not be directly attached to the " + "neuron(s) - probably some clash with intrinsic " + "properties. You can find these data attached as " + "`.meta` dictionary." + ) return res -def write_swc(x: 'core.NeuronObject', - filepath: Union[str, Path], - header: Optional[str] = None, - write_meta: Union[bool, List[str], dict] = True, - labels: Union[str, dict, bool] = True, - export_connectors: bool = False, - return_node_map: bool = False) -> None: +def write_swc( + x: "core.NeuronObject", + filepath: Union[str, Path], + header: Optional[str] = None, + write_meta: Union[bool, List[str], dict] = True, + labels: Union[str, dict, bool] = True, + export_connectors: bool = False, + return_node_map: bool = False, +) -> None: """Write TreeNeuron(s) to SWC. Follows the format specified @@ -547,40 +559,50 @@ def write_swc(x: 'core.NeuronObject', if not isinstance(n, core.TreeNeuron): msg = f'Can only write TreeNeurons to SWC, not "{type(n)}"' if isinstance(n, core.Dotprops): - msg += (". For Dotprops, you can use either `navis.write_nrrd`" - " or `navis.write_parquet`.") + msg += ( + ". For Dotprops, you can use either `navis.write_nrrd`" + " or `navis.write_parquet`." + ) raise TypeError(msg) elif not isinstance(x, core.TreeNeuron): msg = f'Can only write TreeNeurons to SWC, not "{type(n)}"' if isinstance(n, core.Dotprops): - msg += (". For Dotprops, you can use either `navis.write_nrrd`" - " or `navis.write_parquet`.") + msg += ( + ". For Dotprops, you can use either `navis.write_nrrd`" + " or `navis.write_parquet`." + ) raise TypeError(msg) - writer = base.Writer(write_func=_write_swc, ext='.swc') + writer = base.Writer(write_func=_write_swc, ext=".swc") - return writer.write_any(x, - filepath=filepath, - header=header, - write_meta=write_meta, - labels=labels, - export_connectors=export_connectors, - return_node_map=return_node_map) + return writer.write_any( + x, + filepath=filepath, + header=header, + write_meta=write_meta, + labels=labels, + export_connectors=export_connectors, + return_node_map=return_node_map, + ) -def _write_swc(x: Union['core.TreeNeuron', 'core.Dotprops'], - filepath: Union[str, Path], - header: Optional[str] = None, - write_meta: Union[bool, List[str], dict] = True, - labels: Union[str, dict, bool] = True, - export_connectors: bool = False, - return_node_map: bool = False) -> None: +def _write_swc( + x: Union["core.TreeNeuron", "core.Dotprops"], + filepath: Union[str, Path], + header: Optional[str] = None, + write_meta: Union[bool, List[str], dict] = True, + labels: Union[str, dict, bool] = True, + export_connectors: bool = False, + return_node_map: bool = False, +) -> None: """Write single TreeNeuron to file.""" # Generate SWC table - res = make_swc_table(x, - labels=labels, - export_connectors=export_connectors, - return_node_map=return_node_map) + res = make_swc_table( + x, + labels=labels, + export_connectors=export_connectors, + return_node_map=return_node_map, + ) if return_node_map: swc, node_map = res[0], res[1] @@ -602,7 +624,7 @@ def _write_swc(x: Union['core.TreeNeuron', 'core.Dotprops'], elif isinstance(write_meta, list): props = {k: str(getattr(x, k, None)) for k in write_meta} else: - props = {k: str(getattr(x, k, None)) for k in ['id', 'name', 'units']} + props = {k: str(getattr(x, k, None)) for k in ["id", "name", "units"]} header += f"# Meta: {json.dumps(props)}\n" header += dedent("""\ # PointNo Label X Y Z Radius Parent @@ -613,25 +635,27 @@ def _write_swc(x: Union['core.TreeNeuron', 'core.Dotprops'], header += dedent("""\ # 7 = presynapses, 8 = postsynapses """) - elif not header.endswith('\n'): - header += '\n' + elif not header.endswith("\n"): + header += "\n" - with open(filepath, 'w') as file: + with open(filepath, "w") as file: # Write header file.write(header) # Write data - writer = csv.writer(file, delimiter=' ') + writer = csv.writer(file, delimiter=" ") writer.writerows(swc.astype(str).values) if return_node_map: return node_map -def make_swc_table(x: Union['core.TreeNeuron', 'core.Dotprops'], - labels: Union[str, dict, bool] = None, - export_connectors: bool = False, - return_node_map: bool = False) -> pd.DataFrame: +def make_swc_table( + x: Union["core.TreeNeuron", "core.Dotprops"], + labels: Union[str, dict, bool] = None, + export_connectors: bool = False, + return_node_map: bool = False, +) -> pd.DataFrame: """Generate a node table compliant with the SWC format. Follows the format specified @@ -673,28 +697,28 @@ def make_swc_table(x: Union['core.TreeNeuron', 'core.Dotprops'], swc = x.nodes.copy() # Add labels - swc['label'] = 0 + swc["label"] = 0 if isinstance(labels, dict): - swc['label'] = swc.index.map(labels) + swc["label"] = swc.index.map(labels) elif isinstance(labels, str): - swc['label'] = swc[labels] + swc["label"] = swc[labels] elif labels: # Add end/branch labels - swc.loc[swc.type == 'branch', 'label'] = 5 - swc.loc[swc.type == 'end', 'label'] = 6 + swc.loc[swc.type == "branch", "label"] = 5 + swc.loc[swc.type == "end", "label"] = 6 # Add soma label if not isinstance(x.soma, type(None)): soma = utils.make_iterable(x.soma) - swc.loc[swc.node_id.isin(soma), 'label'] = 1 + swc.loc[swc.node_id.isin(soma), "label"] = 1 if export_connectors: # Add synapse label pre_ids = x.presynapses.node_id.values post_ids = x.postsynapses.node_id.values - swc.loc[swc.node_id.isin(pre_ids), 'label'] = 7 - swc.loc[swc.node_id.isin(post_ids), 'label'] = 8 + swc.loc[swc.node_id.isin(pre_ids), "label"] = 7 + swc.loc[swc.node_id.isin(post_ids), "label"] = 8 # Sort such that the parent is always before the child - swc.sort_values('parent_id', ascending=True, inplace=True) + swc.sort_values("parent_id", ascending=True, inplace=True) # Reset index swc.reset_index(drop=True, inplace=True) @@ -702,18 +726,18 @@ def make_swc_table(x: Union['core.TreeNeuron', 'core.Dotprops'], # Generate mapping new_ids = dict(zip(swc.node_id.values, swc.index.values + 1)) - swc['node_id'] = swc.node_id.map(new_ids) + swc["node_id"] = swc.node_id.map(new_ids) # Lambda prevents potential issue with missing parents - swc['parent_id'] = swc.parent_id.map(lambda x: new_ids.get(x, -1)) + swc["parent_id"] = swc.parent_id.map(lambda x: new_ids.get(x, -1)) # Get things in order - swc = swc[['node_id', 'label', 'x', 'y', 'z', 'radius', 'parent_id']] + swc = swc[["node_id", "label", "x", "y", "z", "radius", "parent_id"]] # Make sure radius has no `None` - swc['radius'] = swc.radius.fillna(0) + swc["radius"] = swc.radius.fillna(0) # Adjust column titles - swc.columns = ['PointNo', 'Label', 'X', 'Y', 'Z', 'Radius', 'Parent'] + swc.columns = ["PointNo", "Label", "X", "Y", "Z", "Radius", "Parent"] if return_node_map: return swc, new_ids From 44e4c312d461a133f9e524ab5e2abd9de99bc755 Mon Sep 17 00:00:00 2001 From: Philipp Schlegel Date: Fri, 3 Jan 2025 10:05:56 +0000 Subject: [PATCH 3/5] BaseReader: fix typo in parameter name --- navis/io/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/navis/io/base.py b/navis/io/base.py index 1d6dcf1f..f02c36ee 100644 --- a/navis/io/base.py +++ b/navis/io/base.py @@ -1032,7 +1032,7 @@ def read_any( return self.read_directory( obj, include_subdirs, - parallle=parallel, + parallel=parallel, limit=limit, attrs=attrs, ) From 4940a1ec1754898a56833cb1abb69c5b08113098 Mon Sep 17 00:00:00 2001 From: Philipp Schlegel Date: Fri, 3 Jan 2025 10:35:01 +0000 Subject: [PATCH 4/5] tutorials: change URL download.brainlib.org:8811 -> download.brainimagelibrary.org --- docs/examples/0_io/tutorial_io_00_skeletons.py | 2 +- docs/examples/1_plotting/tutorial_plotting_06_cortex.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/examples/0_io/tutorial_io_00_skeletons.py b/docs/examples/0_io/tutorial_io_00_skeletons.py index 364571fd..b0796aab 100644 --- a/docs/examples/0_io/tutorial_io_00_skeletons.py +++ b/docs/examples/0_io/tutorial_io_00_skeletons.py @@ -75,7 +75,7 @@ # %% # From an FTP folder: -nl = navis.read_swc('ftp://download.brainlib.org:8811/biccn/zeng/pseq/morph/200526/', limit=3) +nl = navis.read_swc('ftp://download.brainimagelibrary.org/biccn/zeng/pseq/morph/200526/', limit=3) # !!! tip diff --git a/docs/examples/1_plotting/tutorial_plotting_06_cortex.py b/docs/examples/1_plotting/tutorial_plotting_06_cortex.py index f94d1fed..b2b24377 100644 --- a/docs/examples/1_plotting/tutorial_plotting_06_cortex.py +++ b/docs/examples/1_plotting/tutorial_plotting_06_cortex.py @@ -32,7 +32,7 @@ import navis nl = navis.read_swc( - "ftp://download.brainlib.org:8811/biccn/zeng/pseq/morph/200526/", + "ftp://download.brainimagelibrary.org/biccn/zeng/pseq/morph/200526/", limit=[f"{i}_transformed.swc" for i in ids], # Load only the files we need fmt="{name,id:int}_transformed.swc", # Parse the name and id from the file name ) From 5be8f964de8772f7c3c4597318e1ec34f48b304f Mon Sep 17 00:00:00 2001 From: Philipp Schlegel Date: Fri, 3 Jan 2025 10:39:16 +0000 Subject: [PATCH 5/5] i/o base: remove left-over on_error parameters, use self.errors instead --- navis/io/base.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/navis/io/base.py b/navis/io/base.py index f02c36ee..319a0e59 100644 --- a/navis/io/base.py +++ b/navis/io/base.py @@ -520,8 +520,6 @@ def read_tar( Limit the number of files read from this directory. attrs : dict or None Arbitrary attributes to include in the TreeNeuron. - on_error : 'ignore' | 'raise' - What do do when error is encountered. Returns ------- @@ -618,8 +616,7 @@ def read_ftp( url, parallel="auto", limit: Optional[int] = None, - attrs: Optional[Dict[str, Any]] = None, - on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore", + attrs: Optional[Dict[str, Any]] = None ) -> "core.NeuronList": """Read files from an FTP server. @@ -633,8 +630,6 @@ def read_ftp( Limit the number of files read from this directory. attrs : dict or None Arbitrary attributes to include in the TreeNeuron. - on_error : 'ignore' | 'raise' - What do do when error is encountered. Returns ------- @@ -654,7 +649,7 @@ def read_ftp( else: port = 21 # default port - read_fn = partial(self.read_from_ftp, attrs=attrs, on_error=on_error) + read_fn = partial(self.read_from_ftp, attrs=attrs) neurons = parallel_read_ftp( read_fn=read_fn, server=server, @@ -670,8 +665,7 @@ def read_from_ftp( self, files: Union[str, List[str]], ftp: FTP, - attrs: Optional[Dict[str, Any]] = None, - on_error: Union[Literal["ignore", Literal["raise"]]] = "ignore", + attrs: Optional[Dict[str, Any]] = None ) -> "core.NeuronList": """Read given files from an FTP server into a NeuronList. @@ -687,8 +681,6 @@ def read_from_ftp( `_FTP` global variable. attrs : dict or None Arbitrary attributes to include in the TreeNeuron. - on_error : 'ignore' | 'raise' - What do do when error is encountered. Returns ------- @@ -716,7 +708,7 @@ def read_from_ftp( n = self.read_buffer(f, attrs=merge_dicts(props, attrs)) neurons.append(n) except BaseException: - if on_error == "ignore": + if self.errors == "ignore": logger.warning(f'Failed to read "{file}" from FTP.') else: raise