From 32c6a94da42047658ce3c03d025394f39d885912 Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Fri, 3 Apr 2026 13:46:45 -0700
Subject: [PATCH 01/14] cleaning up legacy logging util usage

---
 orion/build_manager.py                        |  87 +++++++-------
 orion/ingest_pipeline.py                      |   7 +-
 orion/kgx_file_merger.py                      |   7 +-
 orion/kgx_file_normalizer.py                  |  44 ++++----
 orion/kgx_file_writer.py                      |  18 ++-
 orion/loader_interface.py                     |   8 +-
 orion/logging.py                              |  49 ++++++++
 orion/memgraph_tools.py                       |   6 +-
 orion/merging.py                              |   7 +-
 orion/neo4j_tools.py                          |   6 +-
 orion/normalization.py                        |  27 ++---
 orion/supplementation.py                      |  19 ++--
 orion/utils.py                                | 106 +++---------------
 parsers/LitCoin/src/bagel/bagel_gpt.py        |   6 +-
 parsers/PHAROS/src/legacy_pharos_mysql.py     |   5 +-
 parsers/PHAROS/src/loadPHAROS.py              |   2 +-
 .../src/get_uniref_taxon_indexes.py           |   5 +-
 parsers/ViralProteome/src/loadUniRef.py       |   4 +-
 18 files changed, 179 insertions(+), 234 deletions(-)
 create mode 100644 orion/logging.py

diff --git a/orion/build_manager.py b/orion/build_manager.py
index 5fb6c2ce..33afc1d6 100644
--- a/orion/build_manager.py
+++ b/orion/build_manager.py
@@ -8,7 +8,8 @@
 from pathlib import Path
 from xxhash import xxh64_hexdigest
 
-from orion.utils import LoggingUtil, GetDataPullError
+from orion.utils import GetDataPullError
+from orion.logging import get_orion_logger
 from orion.data_sources import get_available_data_sources, get_data_source_metadata_path
 from orion.exceptions import DataVersionError, GraphSpecError
 from orion.ingest_pipeline import IngestPipeline
@@ -27,6 +28,8 @@
 from orion.kgx_metadata import KGXGraphMetadata, KGXKnowledgeSource, generate_kgx_schema_file
 
 
+logger = get_orion_logger("orion.build_manager")
+
 NODES_FILENAME = 'nodes.jsonl'
 EDGES_FILENAME = 'edges.jsonl'
 REDUNDANT_EDGES_FILENAME = 'redundant_edges.jsonl'
@@ -41,10 +44,6 @@ def __init__(self,
                  graph_specs_dir=None,
                  graph_output_dir=None):
 
-        self.logger = LoggingUtil.init_logging("ORION.orion.GraphBuilder",
-                                               line_format='medium',
-                                               log_file_path=os.getenv('ORION_LOGS'))
-
         self.graphs_dir = graph_output_dir if graph_output_dir else self.get_graph_output_dir()
         self.ingest_pipeline = IngestPipeline()  # access to the data sources and their metadata
         self.graph_specs = {}   # graph_id -> GraphSpec all potential graphs that could be built, including sub-graphs
@@ -54,7 +53,7 @@ def __init__(self,
     def build_graph(self, graph_spec: GraphSpec):
 
         graph_id = graph_spec.graph_id
-        self.logger.info(f'Building graph {graph_id}...')
+        logger.info(f'Building graph {graph_id}...')
 
         graph_version = self.determine_graph_version(graph_spec)
         graph_metadata = self.get_graph_metadata(graph_id, graph_version)
@@ -64,28 +63,28 @@ def build_graph(self, graph_spec: GraphSpec):
         # check for previous builds of this same graph
         build_status = graph_metadata.get_build_status()
         if build_status == Metadata.IN_PROGRESS:
-            self.logger.info(f'Graph {graph_id} version {graph_version} has status: in progress. '
+            logger.info(f'Graph {graph_id} version {graph_version} has status: in progress. '
                              f'This means either the graph is already in the process of being built, '
                              f'or an error occurred previously that could not be handled. '
                              f'You may need to clean up and/or remove the failed build.')
             return False
 
         if build_status == Metadata.BROKEN or build_status == Metadata.FAILED:
-            self.logger.info(f'Graph {graph_id} version {graph_version} previously failed to build. Skipping..')
+            logger.info(f'Graph {graph_id} version {graph_version} previously failed to build. Skipping..')
             return False
 
         if build_status == Metadata.STABLE:
             self.build_results[graph_id] = {'version': graph_version}
-            self.logger.info(f'Graph {graph_id} version {graph_version} was already built.')
+            logger.info(f'Graph {graph_id} version {graph_version} was already built.')
         else:
             # if we get here we need to build the graph
-            self.logger.info(f'Building graph {graph_id} version {graph_version}, checking dependencies...')
+            logger.info(f'Building graph {graph_id} version {graph_version}, checking dependencies...')
             if not self.build_dependencies(graph_spec):
-                self.logger.warning(f'Aborting graph {graph_spec.graph_id} version {graph_version}, building '
+                logger.warning(f'Aborting graph {graph_spec.graph_id} version {graph_version}, building '
                                     f'dependencies failed.')
                 return False
 
-            self.logger.info(f'Building graph {graph_id} version {graph_version}. '
+            logger.info(f'Building graph {graph_id} version {graph_version}. '
                              f'Dependencies ready, merging sources...')
             graph_metadata.set_build_status(Metadata.IN_PROGRESS)
             graph_metadata.set_graph_version(graph_version)
@@ -106,52 +105,52 @@ def build_graph(self, graph_spec: GraphSpec):
             if "merge_error" in merge_metadata:
                 graph_metadata.set_build_error(merge_metadata["merge_error"], current_time)
                 graph_metadata.set_build_status(Metadata.FAILED)
-                self.logger.error(f'Merge error occured while building graph {graph_id}: '
+                logger.error(f'Merge error occured while building graph {graph_id}: '
                                   f'{merge_metadata["merge_error"]}')
                 return False
 
             graph_metadata.set_build_info(merge_metadata, current_time)
             graph_metadata.set_build_status(Metadata.STABLE)
-            self.logger.info(f'Building graph {graph_id} complete!')
+            logger.info(f'Building graph {graph_id} complete!')
             self.build_results[graph_id] = {'version': graph_version}
 
         nodes_filepath = os.path.join(graph_output_dir, NODES_FILENAME)
         edges_filepath = os.path.join(graph_output_dir, EDGES_FILENAME)
 
         if not graph_metadata.has_qc():
-            self.logger.info(f'Running QC for graph {graph_id}...')
+            logger.info(f'Running QC for graph {graph_id}...')
             qc_results = validate_graph(nodes_file_path=nodes_filepath,
                                         edges_file_path=edges_filepath,
                                         graph_id=graph_id,
                                         graph_version=graph_version,
-                                        logger=self.logger)
+                                        logger=logger)
             graph_metadata.set_qc_results(qc_results)
             if qc_results['pass']:
-                self.logger.info(f'QC passed for graph {graph_id}.')
+                logger.info(f'QC passed for graph {graph_id}.')
             else:
-                self.logger.warning(f'QC failed for graph {graph_id}.')
+                logger.warning(f'QC failed for graph {graph_id}.')
 
         # Generate KGX metadata and schema files
         if not self.has_kgx_metadata(graph_output_dir):
-            self.logger.info(f'Generating KGX metadata for {graph_id}...')
+            logger.info(f'Generating KGX metadata for {graph_id}...')
             self.generate_kgx_metadata_files(graph_metadata=graph_metadata,
                                              graph_output_dir=graph_output_dir,
                                              graph_output_url=graph_output_url)
-            self.logger.info(f'KGX metadata generated for {graph_id}.')
+            logger.info(f'KGX metadata generated for {graph_id}.')
         if not self.has_kgx_schema(graph_output_dir):
-            self.logger.info(f'Generating KGX Schema for {graph_id}...')
+            logger.info(f'Generating KGX Schema for {graph_id}...')
             generate_kgx_schema_file(nodes_filepath=nodes_filepath,
                                      edges_filepath=edges_filepath,
                                      output_dir=graph_output_dir,
                                      graph_output_url=graph_output_url,
                                      graph_name=graph_spec.graph_name,
                                      biolink_version=graph_metadata.get_biolink_version())
-            self.logger.info(f'KGX Schema generated for {graph_id}.')
+            logger.info(f'KGX Schema generated for {graph_id}.')
 
         needs_meta_kg = not self.has_meta_kg(graph_directory=graph_output_dir)
         needs_test_data = not self.has_test_data(graph_directory=graph_output_dir)
         if needs_meta_kg or needs_test_data:
-            self.logger.info(f'Generating MetaKG and test data for {graph_id}...')
+            logger.info(f'Generating MetaKG and test data for {graph_id}...')
             self.generate_meta_kg_and_test_data(graph_directory=graph_output_dir,
                                                 generate_meta_kg=needs_meta_kg,
                                                 generate_test_data=needs_test_data)
@@ -170,16 +169,16 @@ def build_graph(self, graph_spec: GraphSpec):
         #  combinations, like:
         #  output_format: [['redundant', 'neo4j', 'answercoalesce'], ['collapsed_qualifiers'], ['neo4j']]
         if 'redundant_jsonl' in output_formats:
-            self.logger.info(f'Generating redundant edge KG for {graph_id}...')
+            logger.info(f'Generating redundant edge KG for {graph_id}...')
             redundant_filepath = edges_filepath.replace(EDGES_FILENAME, REDUNDANT_EDGES_FILENAME)
             generate_redundant_kg(edges_filepath, redundant_filepath)
 
         if 'redundant_neo4j' in output_formats:
-            self.logger.info(f'Generating redundant edge KG for {graph_id}...')
+            logger.info(f'Generating redundant edge KG for {graph_id}...')
             redundant_filepath = edges_filepath.replace(EDGES_FILENAME, REDUNDANT_EDGES_FILENAME)
             if not os.path.exists(redundant_filepath):
                 generate_redundant_kg(edges_filepath, redundant_filepath)
-            self.logger.info(f'Starting Neo4j dump pipeline for redundant {graph_id}...')
+            logger.info(f'Starting Neo4j dump pipeline for redundant {graph_id}...')
             dump_success = create_neo4j_dump(nodes_filepath=nodes_filepath,
                                              edges_filepath=redundant_filepath,
                                              output_directory=graph_output_dir,
@@ -192,16 +191,16 @@ def build_graph(self, graph_spec: GraphSpec):
                                         dump_url=f'{graph_output_url}graph_{graph_version}_redundant.db.dump')
 
         if 'collapsed_qualifiers_jsonl' in output_formats:
-            self.logger.info(f'Generating collapsed qualifier predicates KG for {graph_id}...')
+            logger.info(f'Generating collapsed qualifier predicates KG for {graph_id}...')
             collapsed_qualifiers_filepath = edges_filepath.replace(EDGES_FILENAME, COLLAPSED_QUALIFIERS_FILENAME)
             generate_collapsed_qualifiers_kg(edges_filepath, collapsed_qualifiers_filepath)
 
         if 'collapsed_qualifiers_neo4j' in output_formats:
-            self.logger.info(f'Generating collapsed qualifier predicates KG for {graph_id}...')
+            logger.info(f'Generating collapsed qualifier predicates KG for {graph_id}...')
             collapsed_qualifiers_filepath = edges_filepath.replace(EDGES_FILENAME, COLLAPSED_QUALIFIERS_FILENAME)
             if not os.path.exists(collapsed_qualifiers_filepath):
                 generate_collapsed_qualifiers_kg(edges_filepath, collapsed_qualifiers_filepath)
-            self.logger.info(f'Starting Neo4j dump pipeline for {graph_id} with collapsed qualifiers...')
+            logger.info(f'Starting Neo4j dump pipeline for {graph_id} with collapsed qualifiers...')
             dump_success = create_neo4j_dump(nodes_filepath=nodes_filepath,
                                              edges_filepath=collapsed_qualifiers_filepath,
                                              output_directory=graph_output_dir,
@@ -215,7 +214,7 @@ def build_graph(self, graph_spec: GraphSpec):
                                                      f'_collapsed_qualifiers.db.dump')
 
         if 'neo4j' in output_formats:
-            self.logger.info(f'Starting Neo4j dump pipeline for {graph_id}...')
+            logger.info(f'Starting Neo4j dump pipeline for {graph_id}...')
             dump_success = create_neo4j_dump(nodes_filepath=nodes_filepath,
                                              edges_filepath=edges_filepath,
                                              output_directory=graph_output_dir,
@@ -228,7 +227,7 @@ def build_graph(self, graph_spec: GraphSpec):
                                         dump_url=f'{graph_output_url}graph_{graph_version}.db.dump')
 
         if 'memgraph' in output_formats:
-            self.logger.info(f'Starting memgraph dump pipeline for {graph_id}...')
+            logger.info(f'Starting memgraph dump pipeline for {graph_id}...')
             dump_success = create_memgraph_dump(nodes_filepath=nodes_filepath,
                                                 edges_filepath=edges_filepath,
                                                 output_directory=graph_output_dir,
@@ -241,7 +240,7 @@ def build_graph(self, graph_spec: GraphSpec):
                                         dump_url=f'{graph_output_url}memgraph_{graph_version}.cypher')
 
         if 'answercoalesce' in output_formats:
-            self.logger.info(f'Generating answercoalesce files for {graph_id}...')
+            logger.info(f'Generating answercoalesce files for {graph_id}...')
             if 'redundant_jsonl' in output_formats or 'redundant_neo4j' in output_formats:
                 edge_filepath_to_use = edges_filepath.replace(EDGES_FILENAME, REDUNDANT_EDGES_FILENAME)
             else:
@@ -262,7 +261,7 @@ def determine_graph_version(self, graph_spec: GraphSpec):
             for source in graph_spec.sources:
                 if not source.source_version:
                     source.source_version = self.ingest_pipeline.get_latest_source_version(source.id)
-                self.logger.info(f'Using {source.id} version: {source.version}')
+                logger.info(f'Using {source.id} version: {source.version}')
 
             # for sub-graphs, if a graph version isn't specified,
             # use the graph spec for that subgraph to determine a graph version
@@ -271,7 +270,7 @@ def determine_graph_version(self, graph_spec: GraphSpec):
                     subgraph_graph_spec = self.graph_specs.get(subgraph.id, None)
                     if subgraph_graph_spec:
                         subgraph.graph_version = self.determine_graph_version(subgraph_graph_spec)
-                        self.logger.info(f'Using subgraph {graph_spec.graph_id} version: {subgraph.graph_version}')
+                        logger.info(f'Using subgraph {graph_spec.graph_id} version: {subgraph.graph_version}')
                     else:
                         raise GraphSpecError(f'Subgraph {subgraph.id} requested for graph {graph_spec.graph_id} '
                                              f'but the version was not specified and could not be determined without '
@@ -293,7 +292,7 @@ def determine_graph_version(self, graph_spec: GraphSpec):
                                                   for sub_graph_source in graph_spec.subgraphs])
         graph_version = xxh64_hexdigest(composite_version_string)
         graph_spec.graph_version = graph_version
-        self.logger.info(f'Version determined for graph {graph_spec.graph_id}: {graph_version} ({composite_version_string})')
+        logger.info(f'Version determined for graph {graph_spec.graph_id}: {graph_version} ({composite_version_string})')
         return graph_version
 
     def build_dependencies(self, graph_spec: GraphSpec):
@@ -306,12 +305,12 @@ def build_dependencies(self, graph_spec: GraphSpec):
                 # subgraph as generated by the current graph spec, otherwise we won't be able to build it.
                 subgraph_graph_spec = self.graph_specs.get(subgraph_id, None)
                 if not subgraph_graph_spec:
-                    self.logger.warning(f'Subgraph {subgraph_id} version {subgraph_version} was requested for graph '
+                    logger.warning(f'Subgraph {subgraph_id} version {subgraph_version} was requested for graph '
                                         f'{graph_id} but it was not found and could not be built without a Graph Spec.')
                     return False
 
                 if subgraph_version != subgraph_graph_spec.graph_version:
-                    self.logger.error(f'Subgraph {subgraph_id} version {subgraph_version} was specified, but that '
+                    logger.error(f'Subgraph {subgraph_id} version {subgraph_version} was specified, but that '
                                       f'version of the graph could not be found. It can not be built now because the '
                                       f'current version is {subgraph_graph_spec.graph_version}. Either specify a '
                                       f'version that is already built, or remove the subgraph version specification to '
@@ -319,7 +318,7 @@ def build_dependencies(self, graph_spec: GraphSpec):
                     return False
 
                 # here the graph specs and versions all look right, but we still need to build the subgraph
-                self.logger.warning(f'Graph {graph_id}, subgraph dependency {subgraph_id} is not ready. Building now..')
+                logger.warning(f'Graph {graph_id}, subgraph dependency {subgraph_id} is not ready. Building now..')
                 subgraph_build_success = self.build_graph(subgraph_graph_spec)
                 if not subgraph_build_success:
                     return False
@@ -333,7 +332,7 @@ def build_dependencies(self, graph_spec: GraphSpec):
                 subgraph_edges_path = self.get_graph_edges_file_path(subgraph_dir)
                 subgraph_source.file_paths = [subgraph_nodes_path, subgraph_edges_path]
             else:
-                self.logger.warning(f'Attempting to build graph {graph_id} failed, dependency subgraph {subgraph_id} '
+                logger.warning(f'Attempting to build graph {graph_id} failed, dependency subgraph {subgraph_id} '
                                     f'version {subgraph_version} was not built successfully.')
                 return False
 
@@ -344,7 +343,7 @@ def build_dependencies(self, graph_spec: GraphSpec):
             release_version = data_source.generate_version()
             release_metadata = source_metadata.get_release_info(release_version)
             if release_metadata is None:
-                self.logger.info(
+                logger.info(
                     f'Attempting to build graph {graph_id}, '
                     f'dependency {source_id} is not ready. Building now...')
                 pipeline_sucess = self.ingest_pipeline.run_pipeline(source_id,
@@ -353,7 +352,7 @@ def build_dependencies(self, graph_spec: GraphSpec):
                                                                     normalization_scheme=data_source.normalization_scheme,
                                                                     supplementation_version=data_source.supplementation_version)
                 if not pipeline_sucess:
-                    self.logger.info(f'While attempting to build {graph_spec.graph_id}, '
+                    logger.info(f'While attempting to build {graph_spec.graph_id}, '
                                      f'data source pipeline failed for dependency {source_id}...')
                     return False
                 release_metadata = source_metadata.get_release_info(release_version)
@@ -391,7 +390,7 @@ def generate_meta_kg_and_test_data(self,
         graph_edges_file_path = os.path.join(graph_directory, EDGES_FILENAME)
         mkgb = MetaKnowledgeGraphBuilder(nodes_file_path=graph_nodes_file_path,
                                          edges_file_path=graph_edges_file_path,
-                                         logger=self.logger)
+                                         logger=logger)
         if generate_meta_kg:
             meta_kg_file_path = os.path.join(graph_directory, META_KG_FILENAME)
             mkgb.write_meta_kg_to_file(meta_kg_file_path)
@@ -522,7 +521,7 @@ def load_graph_specs(self, graph_specs_dir=None):
                 graph_specs_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'graph_specs')
             graph_spec_path = os.path.join(graph_specs_dir, graph_spec_file)
             if os.path.exists(graph_spec_path):
-                self.logger.info(f'Loading graph spec: {graph_spec_file}')
+                logger.info(f'Loading graph spec: {graph_spec_file}')
                 with open(graph_spec_path) as graph_spec_file:
                     graph_spec_yaml = yaml.safe_load(graph_spec_file)
                     self.parse_graph_spec(graph_spec_yaml)
@@ -634,7 +633,7 @@ def parse_data_source_spec(self, source_yml):
         source_id = source_yml['source_id']
         if source_id not in get_available_data_sources():
             error_message = f'Data source {source_id} is not a valid data source id.'
-            self.logger.error(error_message + " " +
+            logger.error(error_message + " " +
                               f'Valid sources are: {", ".join(get_available_data_sources())}')
             raise GraphSpecError(error_message)
 
diff --git a/orion/ingest_pipeline.py b/orion/ingest_pipeline.py
index 208011ac..e80eeb7a 100644
--- a/orion/ingest_pipeline.py
+++ b/orion/ingest_pipeline.py
@@ -7,7 +7,8 @@
 
 from orion.data_sources import SourceDataLoaderClassFactory, RESOURCE_HOGS, get_available_data_sources
 from orion.exceptions import DataVersionError
-from orion.utils import LoggingUtil, GetDataPullError
+from orion.utils import GetDataPullError
+from orion.logging import get_orion_logger
 from orion.kgx_file_normalizer import KGXFileNormalizer
 from orion.kgx_validation import validate_graph
 from orion.normalization import NormalizationScheme, NodeNormalizer, EdgeNormalizer, NormalizationFailedError
@@ -18,9 +19,7 @@
 
 SOURCE_DATA_LOADER_CLASSES = SourceDataLoaderClassFactory()
 
-logger = LoggingUtil.init_logging("ORION.orion.IngestPipeline",
-                                  line_format='medium',
-                                  log_file_path=os.getenv('ORION_LOGS'))
+logger = get_orion_logger("orion.ingest_pipeline")
 
 
 class IngestPipeline:
diff --git a/orion/kgx_file_merger.py b/orion/kgx_file_merger.py
index 6e0b423a..c9f0fa47 100644
--- a/orion/kgx_file_merger.py
+++ b/orion/kgx_file_merger.py
@@ -3,15 +3,14 @@
 import json
 from datetime import datetime
 from itertools import chain
-from orion.utils import LoggingUtil, quick_jsonl_file_iterator
+from orion.utils import quick_jsonl_file_iterator
+from orion.logging import get_orion_logger
 from orion.kgxmodel import GraphSpec, GraphSource, SubGraphSource
 from orion.biolink_constants import SUBJECT_ID, OBJECT_ID
 from orion.merging import GraphMerger, DiskGraphMerger, MemoryGraphMerger
 from orion.ingest_pipeline import RESOURCE_HOGS
 
-logger = LoggingUtil.init_logging("ORION.orion.KGXFileMerger",
-                                  line_format='medium',
-                                  log_file_path=os.getenv('ORION_LOGS'))
+logger = get_orion_logger("orion.kgx_file_merger")
 
 CONNECTED_EDGE_SUBSET = 'connected_edge_subset'
 DONT_MERGE = 'dont_merge_edges'
diff --git a/orion/kgx_file_normalizer.py b/orion/kgx_file_normalizer.py
index 3e7628ce..1c55238a 100644
--- a/orion/kgx_file_normalizer.py
+++ b/orion/kgx_file_normalizer.py
@@ -7,7 +7,8 @@
                                      SUBCLASS_OF, ORIGINAL_OBJECT, ORIGINAL_SUBJECT)
 from orion.normalization import NormalizationScheme, NodeNormalizer, EdgeNormalizer, EdgeNormalizationResult, \
     NormalizationFailedError
-from orion.utils import LoggingUtil, chunk_iterator
+from orion.utils import chunk_iterator
+from orion.logging import get_orion_logger
 from orion.kgx_file_writer import KGXFileWriter
 
 
@@ -16,16 +17,11 @@
 EDGE_NORMALIZATION_BATCH_SIZE = 1_000_000
 
 
-#
-# This piece takes KGX-like files and normalizes the nodes and edges for biolink compliance.
-# Then it writes the normalized nodes and edges to new files.
-#
-class KGXFileNormalizer:
+logger = get_orion_logger("orion.kgx_file_normalizer")
 
-    logger = LoggingUtil.init_logging("ORION.orion.KGXFileNormalizer",
-                                      line_format='medium',
-                                      level=logging.INFO,
-                                      log_file_path=os.getenv('ORION_LOGS'))
+# KGXFileNormalizer takes KGX jsonl files, normalizes nodes using Babel's Node Normalizer and converts edge
+# predicates to biolink compliant curies according to biolink model predicate mappings, outputting normalized KGX files.
+class KGXFileNormalizer:
 
     def __init__(self,
                  source_nodes_file_path: str,
@@ -105,7 +101,7 @@ def normalize_node_file(self):
         variant_nodes_split_count = 0
         variant_nodes_post_norm = 0
 
-        self.logger.info(f'Normalizing nodes and writing to file...')
+        logger.info(f'Normalizing nodes and writing to file...')
         try:
             with jsonlines.open(self.source_nodes_file_path) as source_json_reader,\
                     KGXFileWriter(nodes_output_file_path=self.nodes_output_file_path) as output_file_writer:
@@ -131,7 +127,7 @@ def normalize_node_file(self):
                     # because nodes that fail to normalize are removed from the list
                     regular_nodes_pre_norm += len(regular_nodes)
                     if regular_nodes:
-                        self.logger.debug(f'Normalizing {len(regular_nodes)} regular nodes...')
+                        logger.debug(f'Normalizing {len(regular_nodes)} regular nodes...')
                         try:
                             self.node_normalizer.normalize_node_data(regular_nodes)
                         except Exception as e:
@@ -139,12 +135,12 @@ def normalize_node_file(self):
                                                            actual_error=e)
                     regular_nodes_post_norm += len(regular_nodes)
                     if regular_nodes:
-                        self.logger.info(f'Normalized {regular_nodes_pre_norm} nodes so far...')
+                        logger.info(f'Normalized {regular_nodes_pre_norm} nodes so far...')
 
                     variant_nodes_pre_norm += len(variant_nodes)
                     if self.has_sequence_variants:
                         if not self.sequence_variants_pre_normalized:
-                            self.logger.debug(f'Normalizing {len(variant_nodes)} sequence variant nodes...')
+                            logger.debug(f'Normalizing {len(variant_nodes)} sequence variant nodes...')
                             self.node_normalizer.normalize_sequence_variants(variant_nodes)
                         else:
                             # skip normalizing variants but still
@@ -165,13 +161,13 @@ def normalize_node_file(self):
                         variant_nodes_split_count = 0
                     variant_nodes_post_norm += len(variant_nodes)
                     if variant_nodes:
-                        self.logger.info(f'Normalized {variant_nodes_pre_norm} variant nodes so far...')
+                        logger.info(f'Normalized {variant_nodes_pre_norm} variant nodes so far...')
 
                     if regular_nodes:
-                        self.logger.debug(f'Writing nodes to file...')
+                        logger.debug(f'Writing nodes to file...')
                         output_file_writer.write_normalized_nodes(regular_nodes)
                     if variant_nodes:
-                        self.logger.debug(f'Writing sequence variant nodes to file...')
+                        logger.debug(f'Writing sequence variant nodes to file...')
                         output_file_writer.write_normalized_nodes(variant_nodes)
 
                 # grab the number of repeat writes from the file writer
@@ -186,7 +182,7 @@ def normalize_node_file(self):
                              f'{e.line}'
             raise NormalizationFailedError(error_message=norm_error_msg, actual_error=e)
 
-        self.logger.debug(f'Writing normalization map to file...')
+        logger.debug(f'Writing normalization map to file...')
         normalization_map_info = {'normalization_map': self.node_normalizer.node_normalization_lookup}
         with open(self.node_norm_map_file_path, "w") as node_norm_map_file:
             json.dump(normalization_map_info, node_norm_map_file, indent=4)
@@ -195,7 +191,7 @@ def normalize_node_file(self):
         regular_node_norm_failures = self.node_normalizer.failed_to_normalize_ids
         variant_node_norm_failures = self.node_normalizer.failed_to_normalize_variant_ids
         if regular_node_norm_failures or variant_node_norm_failures:
-            self.logger.debug(f'Writing normalization failures to file...')
+            logger.debug(f'Writing normalization failures to file...')
             with open(self.node_norm_failures_file_path, "w") as failed_norm_file:
                 for failed_node_id in regular_node_norm_failures:
                     failed_norm_file.write(f'{failed_node_id}\n')
@@ -247,7 +243,7 @@ def normalize_edge_file(self):
                         current_edge_norm_failures = self.edge_normalizer.normalize_edge_data(edges_subset)
                         if current_edge_norm_failures:
                             edge_norm_failures.update(current_edge_norm_failures)
-                            self.logger.error(
+                            logger.error(
                                 f'Edge normalization service failed to return results for {edge_norm_failures}')
 
                     for edge in edges_subset:
@@ -263,7 +259,7 @@ def normalize_edge_file(self):
                             else:
                                 normalized_object_ids = node_norm_lookup[edge[OBJECT_ID]]
                         except KeyError as e:
-                            self.logger.error(f"One of the node IDs from the edge file was missing from the normalizer look up, "
+                            logger.error(f"One of the node IDs from the edge file was missing from the normalizer look up, "
                                               f"it's probably not in the node file. ({e})")
                         if not (normalized_subject_ids and normalized_object_ids):
                             edges_failed_due_to_nodes += 1
@@ -277,7 +273,7 @@ def normalize_edge_file(self):
                                     normalized_edge_properties = edge_norm_result.properties
                                 except KeyError as e:
                                     norm_error_msg = f'Edge norm lookup failure - missing {edge[PREDICATE]}!'
-                                    self.logger.error(norm_error_msg)
+                                    logger.error(norm_error_msg)
                                     raise NormalizationFailedError(error_message=norm_error_msg, actual_error=e)
                             else:
                                 normalized_predicate = edge[PREDICATE]
@@ -331,14 +327,14 @@ def normalize_edge_file(self):
                             if edge_count > 1:
                                 edge_splits += edge_count - 1
 
-                    self.logger.info(f'Processed {number_of_source_edges} edges so far...')
+                    logger.info(f'Processed {number_of_source_edges} edges so far...')
 
         except OSError as e:
             norm_error_msg = f'Error normalizing edges file {self.source_edges_file_path}'
             raise NormalizationFailedError(error_message=norm_error_msg, actual_error=e)
 
         try:
-            self.logger.debug(f'Writing predicate map to file...')
+            logger.debug(f'Writing predicate map to file...')
             edge_norm_json = {}
             for original_predicate, edge_normalization in edge_norm_lookup.items():
                 edge_norm_json[original_predicate] = edge_normalization.__dict__
diff --git a/orion/kgx_file_writer.py b/orion/kgx_file_writer.py
index 01f268ca..0295dfba 100644
--- a/orion/kgx_file_writer.py
+++ b/orion/kgx_file_writer.py
@@ -2,18 +2,16 @@
 import jsonlines
 import logging
 
-from orion.utils import LoggingUtil
+from orion.logging import get_orion_logger
 from orion.kgxmodel import kgxnode, kgxedge
 from orion.biolink_constants import PRIMARY_KNOWLEDGE_SOURCE, AGGREGATOR_KNOWLEDGE_SOURCES, \
     SUBJECT_ID, OBJECT_ID, PREDICATE
 
 
-class KGXFileWriter:
+logger = get_orion_logger("orion.kgx_file_writer")
+
 
-    logger = LoggingUtil.init_logging("ORION.orion.KGXFileWriter",
-                                      line_format='medium',
-                                      level=logging.INFO,
-                                      log_file_path=os.getenv('ORION_LOGS'))
+class KGXFileWriter:
     """
     constructor
     :param nodes_output_file_path: the file path for the nodes file
@@ -35,7 +33,7 @@ def __init__(self,
         if nodes_output_file_path:
             if os.path.isfile(nodes_output_file_path):
                 # TODO verify - do we really want to overwrite existing files? we could remove them on previous errors instead
-                self.logger.warning(f'KGXFileWriter warning.. file already existed: {nodes_output_file_path}! Overwriting it!')
+                logger.warning(f'KGXFileWriter warning.. file already existed: {nodes_output_file_path}! Overwriting it!')
             self.nodes_output_file_handler = open(nodes_output_file_path, 'w')
             self.nodes_jsonl_writer = jsonlines.Writer(self.nodes_output_file_handler)
 
@@ -43,7 +41,7 @@ def __init__(self,
         if edges_output_file_path:
             if os.path.isfile(edges_output_file_path):
                 # TODO verify - do we really want to overwrite existing files? we could remove them on previous errors instead
-                self.logger.warning(f'KGXFileWriter warning.. file already existed: {edges_output_file_path}! Overwriting it!')
+                logger.warning(f'KGXFileWriter warning.. file already existed: {edges_output_file_path}! Overwriting it!')
             self.edges_output_file_handler = open(edges_output_file_path, 'w')
             self.edges_jsonl_writer = jsonlines.Writer(self.edges_output_file_handler)
 
@@ -103,7 +101,7 @@ def __write_node_to_file(self, node):
             self.nodes_jsonl_writer.write(node)
             self.nodes_written += 1
         except jsonlines.InvalidLineError as e:
-            self.logger.error(f'KGXFileWriter: Failed to write json data: {e.line}.')
+            logger.error(f'KGXFileWriter: Failed to write json data: {e.line}.')
             raise e
 
     def write_edge(self,
@@ -155,5 +153,5 @@ def __write_edge_to_file(self, edge):
             self.edges_jsonl_writer.write(edge)
             self.edges_written += 1
         except jsonlines.InvalidLineError as e:
-            self.logger.error(f'KGXFileWriter: Failed to write json data: {e.line}.')
+            logger.error(f'KGXFileWriter: Failed to write json data: {e.line}.')
             raise e
diff --git a/orion/loader_interface.py b/orion/loader_interface.py
index 3c771618..2f033f55 100644
--- a/orion/loader_interface.py
+++ b/orion/loader_interface.py
@@ -3,7 +3,7 @@
 import json
 import inspect
 from orion.kgx_file_writer import KGXFileWriter
-from orion.utils import LoggingUtil
+from orion.logging import get_orion_logger
 
 
 class SourceDataLoader:
@@ -46,10 +46,8 @@ def __init__(self, test_mode: bool = False, source_data_dir: str = None):
         self.output_file_writer: KGXFileWriter = None
 
         # create a logger
-        self.logger = LoggingUtil.init_logging(f"ORION.parsers.{self.get_name()}",
-                                               level=logging.INFO,
-                                               line_format='medium',
-                                               log_file_path=os.getenv('ORION_LOGS'))
+        # this uses an instance level logger instead of a module level because the name changes based on the ingest
+        self.logger = get_orion_logger(f"parsers.{self.get_name()}")
 
     def get_latest_source_version(self):
         """Determine and return the latest source version ie. a unique identifier associated with the latest version."""
diff --git a/orion/logging.py b/orion/logging.py
new file mode 100644
index 00000000..f8a95513
--- /dev/null
+++ b/orion/logging.py
@@ -0,0 +1,49 @@
+import os
+import logging
+from logging.handlers import RotatingFileHandler
+
+from orion.config import config
+
+
+def get_orion_logger(name):
+    """
+        Logging utility controlling format and setting initial logging level
+    """
+
+    # get the logger with the specified name
+    logger = logging.getLogger(name)
+
+    # if it already has handlers, it was already instantiated - return it
+    if logger.hasHandlers():
+        return logger
+
+    formatter = logging.Formatter('%(asctime)-15s - %(funcName)s(): %(message)s')
+
+    level = logging.DEBUG if config.ORION_TEST_MODE else logging.INFO
+    logger.setLevel(level)
+
+    # if ORION_LOGS is set, write logs to files there
+    if config.ORION_LOGS is not None:
+        # create a rotating file handler, 100mb max per file with a max number of 10 files
+        file_handler = RotatingFileHandler(filename=os.path.join(config.ORION_LOGS, name + '.log'), maxBytes=100000000, backupCount=10)
+
+        # set the formatter
+        file_handler.setFormatter(formatter)
+
+        # set the log level
+        file_handler.setLevel(level)
+
+        # add the handler to the logger
+        logger.addHandler(file_handler)
+
+    # create a stream handler as well (default to console/stdout)
+    stream_handler = logging.StreamHandler()
+
+    # set the formatter on the console stream
+    stream_handler.setFormatter(formatter)
+
+    # add the console handler to the logger
+    logger.addHandler(stream_handler)
+
+    # return to the caller
+    return logger
\ No newline at end of file
diff --git a/orion/memgraph_tools.py b/orion/memgraph_tools.py
index 35b9157a..c0eedef7 100644
--- a/orion/memgraph_tools.py
+++ b/orion/memgraph_tools.py
@@ -1,10 +1,8 @@
 import os
 import orion.kgx_file_converter as kgx_file_converter
-from orion.utils import LoggingUtil
+from orion.logging import get_orion_logger
 
-logger = LoggingUtil.init_logging("ORION.orion.memgraph_tools", 
-                                  line_format='medium',
-                                  log_file_path=os.getenv('ORION_LOGS'))
+logger = get_orion_logger("orion.memgraph_tools")
 
 
 def create_memgraph_dump(nodes_filepath: str,
diff --git a/orion/merging.py b/orion/merging.py
index 25bded66..ca41d115 100644
--- a/orion/merging.py
+++ b/orion/merging.py
@@ -5,7 +5,8 @@
 from xxhash import xxh64_hexdigest
 from orion.biolink_utils import BiolinkUtils
 from orion.biolink_constants import *
-from orion.utils import quick_json_loads, quick_json_dumps, LoggingUtil
+from orion.utils import quick_json_loads, quick_json_dumps
+from orion.logging import get_orion_logger
 
 ORION_UUID_NAMESPACE = uuid.UUID('e2a5b21f-4e4d-4a6e-b64a-1f3c78e2a9d0')
 
@@ -15,9 +16,7 @@
 # TODO ideally we'd make the biolink model version configurable here
 bmt = BiolinkUtils()
 
-logger = LoggingUtil.init_logging("ORION.orion.merging",
-                                  line_format='medium',
-                                  log_file_path=os.getenv('ORION_LOGS'))
+logger = get_orion_logger("orion.merging")
 
 # Key functions for identifying duplicates during entity merging.
 # Add entries to CUSTOM_KEY_FUNCTIONS to define custom matching logic for specific properties.
diff --git a/orion/neo4j_tools.py b/orion/neo4j_tools.py
index 300890b3..75890c0c 100644
--- a/orion/neo4j_tools.py
+++ b/orion/neo4j_tools.py
@@ -4,12 +4,10 @@
 import subprocess
 import orion.kgx_file_converter as kgx_file_converter
 from orion.biolink_constants import NAMED_THING
-from orion.utils import LoggingUtil
+from orion.logging import get_orion_logger
 
 
-logger = LoggingUtil.init_logging("ORION.orion.neo4j_tools",
-                                  line_format='medium',
-                                  log_file_path=os.getenv('ORION_LOGS'))
+logger = get_orion_logger("orion.neo4j_tools")
 
 
 class Neo4jTools:
diff --git a/orion/normalization.py b/orion/normalization.py
index 750c1175..ad33a300 100644
--- a/orion/normalization.py
+++ b/orion/normalization.py
@@ -8,7 +8,9 @@
 
 from robokop_genetics.genetics_normalization import GeneticsNormalizer
 from orion.biolink_constants import *
-from orion.utils import LoggingUtil
+from orion.logging import get_orion_logger
+
+logger = get_orion_logger("orion.normalization")
 
 NORMALIZATION_CODE_VERSION = '1.4'
 
@@ -66,7 +68,6 @@ class NodeNormalizer:
     """
 
     def __init__(self,
-                 log_level=logging.INFO,
                  node_normalization_version: str = 'latest',
                  biolink_version: str = 'latest',
                  strict_normalization: bool = True,
@@ -74,14 +75,8 @@ def __init__(self,
                  include_taxa: bool = False):
         """
         constructor
-        :param log_level - overrides default log level
         :param node_normalization_version - not implemented yet
         """
-        # create a logger
-        self.logger = LoggingUtil.init_logging("ORION.orion.NodeNormalizer",
-                                               level=log_level,
-                                               line_format='medium',
-                                               log_file_path=os.getenv('ORION_LOGS'))
         # storage for regular nodes that failed to normalize
         self.failed_to_normalize_ids = set()
         # storage for variant nodes that failed to normalize
@@ -121,7 +116,7 @@ def hit_node_norm_service(self, curies, retries=0):
                 raise NormalizationFailedError(error_message=error_message)
         else:
             error_message = f'Node norm response code: {resp.status_code} (curies: {curies})'
-            self.logger.error(error_message)
+            logger.error(error_message)
             resp.raise_for_status()
 
     def normalize_node_data(self, node_list: list, batch_size: int = 5000) -> list:
@@ -282,7 +277,7 @@ def normalize_node_data(self, node_list: list, batch_size: int = 5000) -> list:
             if self.strict_normalization:
                 node_list[:] = [d for d in node_list if d is not None]
 
-        self.logger.debug(f'End of normalize_node_data.')
+        logger.debug(f'End of normalize_node_data.')
 
         # return the failed list to the caller
         return failed_to_normalize
@@ -390,14 +385,10 @@ class EdgeNormalizer:
     DEFAULT_EDGE_NORM_ENDPOINT = f'https://bl-lookup-sri.renci.org/'
 
     def __init__(self,
-                 edge_normalization_version: str = 'latest',
-                 log_level=logging.INFO):
+                 edge_normalization_version: str = 'latest'):
         """
         constructor
-        :param log_level - overrides default log level
         """
-        # create a logger
-        self.logger = LoggingUtil.init_logging("ORION.orion.EdgeNormalizer", level=log_level, line_format='medium', log_file_path=os.getenv('ORION_LOGS'))
         # normalization map for future look up of all normalized predicates
         self.edge_normalization_lookup = {}
         self.cached_edge_norms = {}
@@ -458,7 +449,7 @@ def normalize_edge_data(self,
             # hit the edge normalization service
             request_url = f'{self.edge_norm_endpoint}resolve_predicate?version={self.edge_norm_version}&predicate='
             request_url += '&predicate='.join(predicate_chunk)
-            self.logger.debug(f'Sending request: {request_url}')
+            logger.debug(f'Sending request: {request_url}')
             resp: requests.models.Response = requests.get(request_url)
 
             # if we get a success status code
@@ -473,7 +464,7 @@ def normalize_edge_data(self,
             else:
                 # this is a real error with the edge normalizer so we bail
                 error_message = f'Edge norm response code: {resp.status_code}'
-                self.logger.error(error_message)
+                logger.error(error_message)
                 resp.raise_for_status()
 
             # move on down the list
@@ -505,7 +496,7 @@ def normalize_edge_data(self,
 
         # if something failed to normalize output it
         # if failed_to_normalize:
-        #    self.logger.error(f'Failed to normalize: {", ".join(failed_to_normalize)}')
+        #    logger.error(f'Failed to normalize: {", ".join(failed_to_normalize)}')
 
         # return the failed list to the caller
         return failed_to_normalize
diff --git a/orion/supplementation.py b/orion/supplementation.py
index 9f3a7ff9..4d17bf79 100644
--- a/orion/supplementation.py
+++ b/orion/supplementation.py
@@ -9,10 +9,12 @@
 from collections import defaultdict
 from orion.biolink_constants import *
 from orion.normalization import FALLBACK_EDGE_PREDICATE, NormalizationScheme
-from orion.utils import LoggingUtil
+from orion.logging import get_orion_logger
 from orion.kgx_file_writer import KGXFileWriter
 from orion.kgx_file_normalizer import KGXFileNormalizer
 
+logger = get_orion_logger("orion.supplementation")
+
 SNPEFF_PROVENANCE = "infores:robokop-snpeff"
 
 # These are terms from Sequence Ontology that SNPEFF uses for annotations. SNPEFF doesn't provide the SO identifiers,
@@ -61,15 +63,12 @@ class SequenceVariantSupplementation:
 
     def __init__(self, output_dir="."):
 
-        self.logger = LoggingUtil.init_logging("ORION.orion.SequenceVariantSupplementation",
-                                               line_format='medium',
-                                               log_file_path=os.getenv('ORION_LOGS'))
         workspace_dir = os.getenv("ORION_STORAGE", output_dir)
 
         # if the snpEff dir exists, assume we already downloaded it
         self.snpeff_dir = path.join(workspace_dir, "snpEff")
         if not path.isdir(self.snpeff_dir):
-            self.logger.info('SNPEFF not found, downloading and installing..')
+            logger.info('SNPEFF not found, downloading and installing..')
 
             # TODO
             # Snpeff is building their latest versions with Java 21 which is not compatible with the docker
@@ -98,15 +97,15 @@ def find_supplemental_data(self,
         workspace_dir = supp_nodes_norm_file_path.rsplit("/", 1)[0]
         vcf_file_path = f'{workspace_dir}/variants.vcf'
 
-        self.logger.info('Creating VCF file from source nodes..')
+        logger.info('Creating VCF file from source nodes..')
         self.create_vcf_from_variant_nodes(nodes_file_path,
                                            vcf_file_path)
-        self.logger.info('Running SNPEFF, creating annotated VCF..')
+        logger.info('Running SNPEFF, creating annotated VCF..')
         annotated_vcf_path = f'{workspace_dir}/variants_ann.vcf'
         self.run_snpeff(vcf_file_path,
                         annotated_vcf_path)
 
-        self.logger.info('Converting annotated VCF to KGX File..')
+        logger.info('Converting annotated VCF to KGX File..')
         supplementation_metadata = self.convert_snpeff_to_kgx(annotated_vcf_path,
                                                               supp_nodes_file_path,
                                                               supp_edges_file_path)
@@ -114,7 +113,7 @@ def find_supplemental_data(self,
         os.remove(vcf_file_path)
         os.remove(annotated_vcf_path)
 
-        self.logger.info('Normalizing Supplemental KGX File..')
+        logger.info('Normalizing Supplemental KGX File..')
         file_normalizer = KGXFileNormalizer(source_nodes_file_path=supp_nodes_file_path,
                                             nodes_output_file_path=supp_nodes_norm_file_path,
                                             node_norm_map_file_path=supp_node_norm_map_file_path,
@@ -148,7 +147,7 @@ def run_snpeff(self,
             if snpeff_results.returncode != 0:
                 error_message = f'SNPEFF subprocess error (ExitCode {snpeff_results.returncode}): ' \
                                 f'{snpeff_results.stderr.decode("UTF-8")}'
-                self.logger.error(error_message)
+                logger.error(error_message)
                 raise SupplementationFailedError(error_message)
 
     def convert_snpeff_to_kgx(self,
diff --git a/orion/utils.py b/orion/utils.py
index c3a97f7f..843bcb19 100644
--- a/orion/utils.py
+++ b/orion/utils.py
@@ -1,6 +1,4 @@
 import os
-import logging
-import tarfile
 import gzip
 import requests
 import orjson
@@ -14,82 +12,9 @@
 from csv import DictReader
 from ftplib import FTP
 from datetime import datetime
-from logging.handlers import RotatingFileHandler
+from orion.logging import get_orion_logger
 
-
-class LoggingUtil(object):
-    """
-    creates and configures a logger
-    """
-    @staticmethod
-    def init_logging(name, level=logging.INFO, line_format='minimum', log_file_path=None):
-        """
-            Logging utility controlling format and setting initial logging level
-        """
-
-        # get the logger with the specified name
-        logger = logging.getLogger(name)
-
-        # if it already has handlers, it was already instantiated - return it
-        if logger.hasHandlers():
-            return logger
-
-        # define the various output formats
-        format_type = {
-            "minimum": '%(message)s',
-            "short": '%(funcName)s(): %(message)s',
-            "medium": '%(asctime)-15s - %(funcName)s(): %(message)s',
-            "long": '%(asctime)-15s  - %(filename)s %(funcName)s() %(levelname)s: %(message)s'
-        }[line_format]
-
-        # create a formatter
-        formatter = logging.Formatter(format_type)
-
-        # set the logging level
-        if os.getenv('ORION_TEST_MODE'):
-            level = logging.DEBUG
-        logger.setLevel(level)
-
-        # if there was a file path passed in use it
-        if log_file_path is not None:
-            # create a rotating file handler, 100mb max per file with a max number of 10 files
-            file_handler = RotatingFileHandler(filename=os.path.join(log_file_path, name + '.log'), maxBytes=100000000, backupCount=10)
-
-            # set the formatter
-            file_handler.setFormatter(formatter)
-
-            # set the log level
-            file_handler.setLevel(level)
-
-            # add the handler to the logger
-            logger.addHandler(file_handler)
-
-        # create a stream handler as well (default to console)
-        stream_handler = logging.StreamHandler()
-
-        # set the formatter on the console stream
-        stream_handler.setFormatter(formatter)
-
-        # add the console handler to the logger
-        logger.addHandler(stream_handler)
-
-        # return to the caller
-        return logger
-
-    @staticmethod
-    def print_debug_msg(msg: str):
-        """
-        Adds a timestamp to a printed message
-
-        :param msg: the message that gets appended onto a timestamp and output to console
-        :return: None
-        """
-
-        # get the timestamp
-        now: datetime = datetime.now()
-
-        # output the text
-        print(f'{now.strftime("%Y/%m/%d %H:%M:%S")} - {msg}')
+logger = get_orion_logger("orion.utils")
 
 
 class GetDataPullError(Exception):
@@ -102,13 +27,10 @@ class GetData:
     Class that contains methods that can be used to get various data sets.
     """
 
-    def __init__(self, log_level=logging.INFO):
+    def __init__(self):
         """
         constructor
-        :param log_level - overrides default log level
         """
-        # create a logger
-        self.logger = LoggingUtil.init_logging("ORION.orion.GetData", level=log_level, line_format='medium', log_file_path=os.getenv('ORION_LOGS'))
 
     @staticmethod
     def pull_via_ftp_binary(ftp_site, ftp_dir, ftp_file):
@@ -183,7 +105,7 @@ def get_ftp_file_date(self, ftp_site, ftp_dir, ftp_file, exclude_day=False) -> s
 
         except Exception as e:
             error_message = f'Error getting modification date for ftp file: {ftp_site}{ftp_dir}{ftp_file}. {e}'
-            self.logger.error(error_message)
+            logger.error(error_message)
             raise GetDataPullError(error_message)
 
     def pull_via_ftp(self, ftp_site: str, ftp_dir: str, ftp_files: list, data_file_path: str) -> int:
@@ -212,7 +134,7 @@ def pull_via_ftp(self, ftp_site: str, ftp_dir: str, ftp_files: list, data_file_p
 
             # for each file requested
             for f in ftp_files:
-                self.logger.debug(f'Retrieving {ftp_site}{ftp_dir}{f} -> {data_file_path}')
+                logger.debug(f'Retrieving {ftp_site}{ftp_dir}{f} -> {data_file_path}')
 
                 # does the file exist and has data in it
                 try:
@@ -233,15 +155,15 @@ def pull_via_ftp(self, ftp_site: str, ftp_dir: str, ftp_files: list, data_file_p
 
                 # progress output
                 if file_counter % 50 == 0:
-                    self.logger.debug(f'{file_counter} files retrieved, {len(ftp_files) - file_counter} to go.')
+                    logger.debug(f'{file_counter} files retrieved, {len(ftp_files) - file_counter} to go.')
 
-            self.logger.debug(f'{file_counter} file(s) retrieved of {len(ftp_files)} requested.')
+            logger.debug(f'{file_counter} file(s) retrieved of {len(ftp_files)} requested.')
 
             # close the ftp object
             ftp.quit()
         except Exception as e:
             error_message = f'GetDataPullError pull_via_ftp() failed for {ftp_site}. Exception: {e}'
-            self.logger.error(error_message)
+            logger.error(error_message)
             raise GetDataPullError(error_message)
 
         # return pass/fail to the caller
@@ -257,7 +179,7 @@ def get_http_file_modified_date(self, file_url: str):
             return modified_datetime.strftime("%-m_%-d_%Y")
         except Exception as e:
             error_message = f'Error getting modification date for http file: {file_url}. {repr(e)}-{e}'
-            self.logger.error(error_message)
+            logger.error(error_message)
             raise GetDataPullError(error_message)
 
     def pull_via_http(self, url: str, data_dir: str, is_gzip=False, saved_file_name: str = None) -> int:
@@ -286,7 +208,7 @@ def pull_via_http(self, url: str, data_dir: str, is_gzip=False, saved_file_name:
         # check if the file exists already
         if not os.path.exists(os.path.join(data_dir, data_file)):
 
-            self.logger.debug(f'Retrieving {url} -> {data_dir}')
+            logger.debug(f'Retrieving {url} -> {data_dir}')
             try:
                 hdr = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'}
                 req = request.Request(url, headers=hdr)
@@ -314,7 +236,7 @@ def pull_via_http(self, url: str, data_dir: str, is_gzip=False, saved_file_name:
                         fp.write(buffer)
             except Exception as e:
                 error_message = f'GetDataPullError pull_via_http() failed. URL: {url}. Exception: {e}'
-                self.logger.error(error_message)
+                logger.error(error_message)
                 raise GetDataPullError(error_message)
 
         else:
@@ -333,7 +255,7 @@ def get_swiss_prot_id_set(self, data_dir: str, debug_mode=False) -> set:
         :return: a set of uniprot kb ids
         """
 
-        self.logger.debug('Start of swiss-prot curated uniprot id retrieval')
+        logger.debug('Start of swiss-prot curated uniprot id retrieval')
 
         # init the return value
         ret_val: set = set()
@@ -362,11 +284,11 @@ def get_swiss_prot_id_set(self, data_dir: str, debug_mode=False) -> set:
                         ret_val.add(item.strip(';\n'))
 
         # do not remove the file if in debug mode
-        # if self.logger.level != logging.DEBUG and not debug_mode:
+        # if logger.level != logging.DEBUG and not debug_mode:
         #     # remove the target file
         #     os.remove(os.path.join(data_dir, data_file_name))
 
-        self.logger.debug(f'End of swiss-prot uniprot id retrieval. {len(ret_val)} retrieved.')
+        logger.debug(f'End of swiss-prot uniprot id retrieval. {len(ret_val)} retrieved.')
 
         # return the list
         return ret_val
diff --git a/parsers/LitCoin/src/bagel/bagel_gpt.py b/parsers/LitCoin/src/bagel/bagel_gpt.py
index 843ba4d3..c38ec552 100644
--- a/parsers/LitCoin/src/bagel/bagel_gpt.py
+++ b/parsers/LitCoin/src/bagel/bagel_gpt.py
@@ -3,16 +3,14 @@
 from collections import defaultdict
 
 from orion.config import CONFIG
-from orion.utils import LoggingUtil
+from orion.logging import get_orion_logger
 
 OPENAI_API_KEY = CONFIG.get("OPENAI_API_KEY")
 
 LLM_RESULTS = []
 
 
-logger = LoggingUtil.init_logging("ORION.orion.BagelGPT",
-                                  line_format='medium',
-                                  log_file_path=os.getenv('ORION_LOGS'))
+logger = get_orion_logger("orion.bagel_gpt")
 
 def ask_classes_and_descriptions(text, term, termlist, abstract_id, requests_session):
     """Get GPT results based only on the labels of the terms."""
diff --git a/parsers/PHAROS/src/legacy_pharos_mysql.py b/parsers/PHAROS/src/legacy_pharos_mysql.py
index fc354bef..a5961be8 100644
--- a/parsers/PHAROS/src/legacy_pharos_mysql.py
+++ b/parsers/PHAROS/src/legacy_pharos_mysql.py
@@ -1,12 +1,13 @@
 import os
 import mysql.connector
 import logging
-from orion.utils import LoggingUtil, GetData, NodeNormUtils, EdgeNormUtils
+from orion.utils import GetData, NodeNormUtils, EdgeNormUtils
+from orion.logging import get_orion_logger
 from pathlib import Path
 
 
 # create a logger
-logger = LoggingUtil.init_logging("ORION.PHAROS.PHAROSLoader", line_format='medium', log_file_path=os.path.join(Path(__file__).parents[2], 'logs'))
+logger = get_orion_logger("parsers.pharos")
 
 class PharosMySQL():
     def __init__(self, context):
diff --git a/parsers/PHAROS/src/loadPHAROS.py b/parsers/PHAROS/src/loadPHAROS.py
index 44a95f17..e3449523 100644
--- a/parsers/PHAROS/src/loadPHAROS.py
+++ b/parsers/PHAROS/src/loadPHAROS.py
@@ -108,7 +108,7 @@ def get_latest_source_version(self) -> str:
         return 'v6_13_4'
 
     def get_data(self):
-        gd: GetData = GetData(self.logger.level)
+        gd: GetData = GetData()
         byte_count: int = gd.pull_via_http(f'{self.data_url}{self.data_file}',
                                            self.data_path)
         if not byte_count:
diff --git a/parsers/ViralProteome/src/get_uniref_taxon_indexes.py b/parsers/ViralProteome/src/get_uniref_taxon_indexes.py
index aa681b58..5f46ae78 100644
--- a/parsers/ViralProteome/src/get_uniref_taxon_indexes.py
+++ b/parsers/ViralProteome/src/get_uniref_taxon_indexes.py
@@ -1,11 +1,12 @@
 import os
 import argparse
 # from parsers.ViralProteome.src.loadUniRef import UniRefSimLoader
-from orion.utils import LoggingUtil, GetData
+from orion.utils import GetData
+from orion.logging import get_orion_logger
 from pathlib import Path
 
 # create a logger
-logger = LoggingUtil.init_logging("ORION.ViralProteome.get_uniref_taxon_indexes", line_format='medium', log_file_path=os.path.join(Path(__file__).parents[2], 'logs'))
+logger = get_orion_logger("parsers.get_uniref_taxon_indexes")
 
 
 if __name__ == '__main__':
diff --git a/parsers/ViralProteome/src/loadUniRef.py b/parsers/ViralProteome/src/loadUniRef.py
index be5181bf..e8d019f3 100644
--- a/parsers/ViralProteome/src/loadUniRef.py
+++ b/parsers/ViralProteome/src/loadUniRef.py
@@ -5,7 +5,7 @@
 import datetime
 
 from xml.etree import ElementTree as ETree
-from orion.utils import LoggingUtil, GetData
+from orion.utils import GetData
 from orion.kgx_file_writer import KGXFileWriter
 from orion.loader_interface import SourceDataLoader
 
@@ -81,7 +81,7 @@ def get_uniref_data(self) -> set:
         """
 
         # get a reference to the get data util class
-        gd: GetData = GetData(self.logger.level)
+        gd: GetData = GetData()
 
         # are we in test mode
         if not self.test_mode:

From c5fcf13f65da025b5edd0db07f994fd70dea032a Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Fri, 3 Apr 2026 13:59:35 -0700
Subject: [PATCH 02/14] reorganizing .env and set_up_test_env script

---
 .env               |  5 ----
 .env.example       | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 README.md          |  2 +-
 docs/ORION.ipynb   |  8 +++++--
 set_up_dev_env.sh  | 17 +++++++++++++
 set_up_test_env.sh | 40 -------------------------------
 6 files changed, 83 insertions(+), 48 deletions(-)
 delete mode 100644 .env
 create mode 100644 .env.example
 create mode 100644 set_up_dev_env.sh
 delete mode 100644 set_up_test_env.sh

diff --git a/.env b/.env
deleted file mode 100644
index da0872f7..00000000
--- a/.env
+++ /dev/null
@@ -1,5 +0,0 @@
-OPENAI_API_KEY=fake-key-do-not-commit-a-real-one!!!
-OPENAI_API_ORGANIZATION=fake-org-do-not-commit-a-real-one!!!
-BAGEL_SERVICE_USERNAME=fake-username-do-not-commit-a-real-one!!!
-BAGEL_SERVICE_PASSWORD=fake-password-do-not-commit-a-real-one!!!
-SHARED_SOURCE_DATA_PATH=/tmp/shared_data
diff --git a/.env.example b/.env.example
new file mode 100644
index 00000000..2165f415
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,59 @@
+# ---- Storage & Output ----
+
+# Directory for source data downloads and ingest pipeline files
+# ORION_STORAGE=
+
+# Directory for final graph releases
+# ORION_GRAPHS=
+
+# Directory for log files (if unset, logs go to stdout only)
+# ORION_LOGS=
+
+# Base URL used when generating graph metadata
+# ORION_OUTPUT_URL=https://localhost/
+
+# ---- Graph Spec ----
+
+# Local graph spec filename (set one of ORION_GRAPH_SPEC or ORION_GRAPH_SPEC_URL, not both)
+# ORION_GRAPH_SPEC=example-graph-spec.yaml
+
+# URL pointing to a remote graph spec file
+# ORION_GRAPH_SPEC_URL=
+
+# ---- Mode ----
+
+# Enable test/debug mode (sets log level to DEBUG and runs ingests with a smaller subset of data if possible)
+# ORION_TEST_MODE=false
+
+# ---- Normalization Endpoints ----
+
+# Edge normalization / BioLink Lookup endpoint
+# EDGE_NORMALIZATION_ENDPOINT=https://bl-lookup-sri.renci.org/
+
+# Node normalization endpoint
+# NODE_NORMALIZATION_ENDPOINT=https://nodenormalization-sri.renci.org/
+
+# ---- LitCoin / Bagel (may be removed in the future) ----
+
+# Name resolution service URL
+# NAMERES_URL=https://name-resolution-sri.renci.org/
+
+# SapBERT service URL
+# SAPBERT_URL=https://babel-sapbert.apps.renci.org/
+
+# Shared source data path for LitCoin pipeline
+# SHARED_SOURCE_DATA_PATH=/tmp/shared_data
+
+# LitCoin predicate mapping service URL
+# LITCOIN_PRED_MAPPING_URL=https://pred-mapping.apps.renci.org/
+
+# Bagel service endpoint
+# BAGEL_ENDPOINT=https://bagel.apps.renci.org/
+
+# Bagel service credentials
+# BAGEL_SERVICE_USERNAME=
+# BAGEL_SERVICE_PASSWORD=
+
+# OpenAI credentials for LitCoin GPT features
+# OPENAI_API_KEY=
+# OPENAI_API_ORGANIZATION=
\ No newline at end of file
diff --git a/README.md b/README.md
index 41fa5ec3..6fbb6740 100644
--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ ORION uses three directories for its data, configured via environment variables:
 You can set these up manually or use the provided script:
 
 ```bash
-source ./set_up_test_env.sh
+source ./set_up_dev_env.sh
 ```
 
 #### Graph Spec
diff --git a/docs/ORION.ipynb b/docs/ORION.ipynb
index 46c44dc7..5d5fce00 100644
--- a/docs/ORION.ipynb
+++ b/docs/ORION.ipynb
@@ -85,7 +85,11 @@
   {
    "cell_type": "code",
    "id": "g6i460bvtda",
-   "source": "%%bash\ncd ~/ORION_root/ORION/\nsource ./set_up_test_env.sh",
+   "source": [
+    "%%bash\n",
+    "cd ~/ORION_root/ORION/\n",
+    "source ./set_up_dev_env.sh"
+   ],
    "metadata": {},
    "execution_count": null,
    "outputs": []
@@ -130,4 +134,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
\ No newline at end of file
+}
diff --git a/set_up_dev_env.sh b/set_up_dev_env.sh
new file mode 100644
index 00000000..f824f091
--- /dev/null
+++ b/set_up_dev_env.sh
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+# ORION requires directories to store data ingest files and graph outputs.
+# This script creates those directories and sets the environment variables pointing to them.
+# See the README for more information.
+
+# ORION_STORAGE - a directory for storing ingest pipeline files
+mkdir -p "$PWD/../ORION_storage"
+export ORION_STORAGE="$PWD/../ORION_storage/"
+
+# ORION_GRAPHS - a directory for storing knowledge graph outputs
+mkdir -p "$PWD/../ORION_graphs"
+export ORION_GRAPHS="$PWD/../ORION_graphs/"
+
+# ORION_LOGS - a directory for storing logs
+mkdir -p "$PWD/../ORION_logs"
+export ORION_LOGS="$PWD/../ORION_logs/"
diff --git a/set_up_test_env.sh b/set_up_test_env.sh
deleted file mode 100644
index 46ed120c..00000000
--- a/set_up_test_env.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env bash
-
-#These environment variables are required by Data Services. See the README for more information.
-
-#ORION_STORAGE - a directory for storing data sources
-mkdir -p "$PWD/../ORION_storage"
-export ORION_STORAGE="$PWD/../ORION_storage/"
-
-#ORION_GRAPHS - a directory for storing knowledge graphs
-mkdir -p "$PWD/../ORION_graphs"
-export ORION_GRAPHS="$PWD/../ORION_graphs/"
-
-#ORION_LOGS - a directory for storing logs
-mkdir -p "$PWD/../ORION_logs"
-export ORION_LOGS="$PWD/../ORION_logs/"
-
-#Use EITHER of the following, ORION_GRAPH_SPEC or ORION_GRAPH_SPEC_URL
-
-#ORION_GRAPH_SPEC - the name of a Graph Spec file located in the graph_specs directory of ORION
-export ORION_GRAPH_SPEC=example-graph-spec.yaml
-
-#ORION_GRAPH_SPEC_URL - a URL pointing to a Graph Spec file
-#export ORION_GRAPH_SPEC_URL=https://raw.githubusercontent.com/RENCI-AUTOMAT/ORION/helm_deploy/graph_specs/yeast-graph-spec.yml
-
-export PYTHONPATH="$PYTHONPATH:$PWD"
-
-# The following environment variables are optional
-#
-# export EDGE_NORMALIZATION_ENDPOINT=https://bl-lookup-sri.renci.org/
-# export NODE_NORMALIZATION_ENDPOINT=https://nodenormalization-sri.renci.org/
-# export NAMERES_URL=https://name-resolution-sri.renci.org/
-# export SAPBERT_URL=https://babel-sapbert.apps.renci.org/
-# export LITCOIN_PRED_MAPPING_URL=https://pred-mapping.apps.renci.org/
-
-# export ORION_OUTPUT_URL=https://localhost/  # this is currently only used to generate metadata
-# export BL_VERSION=4.2.1
-
-# if you are building your own docker image and issues occur, setting the correct platform may help
-# export DOCKER_PLATFORM=linux/arm64
-

From e60309644b2720e7ee9d7be10f2c95edd7fbd0b0 Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Thu, 2 Apr 2026 16:12:38 -0700
Subject: [PATCH 03/14] use pydantic settings config for env vars

---
 orion/biolink_utils.py                     |  4 ++-
 orion/build_manager.py                     |  9 ++---
 orion/config.py                            | 40 ++++++++++++++++++----
 orion/ingest_pipeline.py                   | 17 ++++-----
 orion/loader_interface.py                  |  3 +-
 orion/neo4j_tools.py                       |  5 ++-
 orion/normalization.py                     | 12 +++----
 orion/supplementation.py                   |  9 +++--
 parsers/LitCoin/src/NER/nameres.py         |  4 +--
 parsers/LitCoin/src/NER/sapbert.py         |  4 +--
 parsers/LitCoin/src/bagel/bagel_gpt.py     |  4 +--
 parsers/LitCoin/src/bagel/bagel_service.py | 18 ++++------
 parsers/LitCoin/src/loadLitCoin.py         |  5 +--
 pyproject.toml                             |  2 +-
 uv.lock                                    | 18 ++++++++--
 15 files changed, 97 insertions(+), 57 deletions(-)

diff --git a/orion/biolink_utils.py b/orion/biolink_utils.py
index 011f9f96..28e5b178 100644
--- a/orion/biolink_utils.py
+++ b/orion/biolink_utils.py
@@ -6,7 +6,9 @@
 from requests.adapters import HTTPAdapter, Retry
 from functools import cache
 
-BIOLINK_MODEL_VERSION = os.environ.get("BL_VERSION", "v4.3.4")
+from orion.config import config
+
+BIOLINK_MODEL_VERSION = config.BL_VERSION
 
 def get_biolink_model_toolkit(biolink_version: str = None) -> Toolkit:
     version = biolink_version if biolink_version else BIOLINK_MODEL_VERSION
diff --git a/orion/build_manager.py b/orion/build_manager.py
index 33afc1d6..505e0950 100644
--- a/orion/build_manager.py
+++ b/orion/build_manager.py
@@ -10,6 +10,7 @@
 
 from orion.utils import GetDataPullError
 from orion.logging import get_orion_logger
+from orion.config import config
 from orion.data_sources import get_available_data_sources, get_data_source_metadata_path
 from orion.exceptions import DataVersionError, GraphSpecError
 from orion.ingest_pipeline import IngestPipeline
@@ -508,8 +509,8 @@ def generate_kgx_metadata_files(self,
             f.write(kgx_graph_metadata.to_json())
 
     def load_graph_specs(self, graph_specs_dir=None):
-        graph_spec_file = os.getenv('ORION_GRAPH_SPEC')
-        graph_spec_url = os.getenv('ORION_GRAPH_SPEC_URL')
+        graph_spec_file = config.ORION_GRAPH_SPEC
+        graph_spec_url = config.ORION_GRAPH_SPEC_URL
 
         if graph_spec_file and graph_spec_url:
             raise GraphSpecError(f'Configuration Error - the environment variables ORION_GRAPH_SPEC and '
@@ -685,7 +686,7 @@ def get_graph_dir_path(self, graph_id: str, graph_version: str):
 
     @staticmethod
     def get_graph_output_url(graph_id: str, graph_version: str):
-        graph_output_url = os.environ.get('ORION_OUTPUT_URL', "https://localhost/").removesuffix('/')
+        graph_output_url = config.ORION_OUTPUT_URL.removesuffix('/')
         return f'{graph_output_url}/{graph_id}/{graph_version}/'
 
     @staticmethod
@@ -712,7 +713,7 @@ def get_graph_metadata(self, graph_id: str, graph_version: str):
     @staticmethod
     def get_graph_output_dir():
         # confirm the directory specified by the environment variable ORION_GRAPHS is valid
-        graphs_dir = os.getenv('ORION_GRAPHS')
+        graphs_dir = config.ORION_GRAPHS
         if graphs_dir and Path(graphs_dir).is_dir():
             return graphs_dir
 
diff --git a/orion/config.py b/orion/config.py
index f5ec3b7d..2cb314ff 100644
--- a/orion/config.py
+++ b/orion/config.py
@@ -1,10 +1,38 @@
-import os
 from pathlib import Path
-from dotenv import dotenv_values
+from pydantic_settings import BaseSettings, SettingsConfigDict
 
-CONFIG = {
-    **dotenv_values(Path(__file__).parents[1] / '.env'),  # load config variables from .env
-    **os.environ,  # override loaded values with environment variables
-}
 
+class Config(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_file=Path(__file__).parent.parent/".env",
+        env_file_encoding="utf-8",
+        env_ignore_empty=True
+    )
 
+    ORION_STORAGE: str | None = None
+    ORION_GRAPHS: str | None = None
+    ORION_LOGS: str | None = None
+
+    ORION_OUTPUT_URL: str = "https://localhost"
+    ORION_TEST_MODE: bool = False
+
+    ORION_GRAPH_SPEC: str = "example-graph-spec.yaml"
+    ORION_GRAPH_SPEC_URL: str = ""
+
+    BL_VERSION: str = "v4.3.4"
+
+    EDGE_NORMALIZATION_ENDPOINT: str = "https://bl-lookup-sri.renci.org"
+    NODE_NORMALIZATION_ENDPOINT: str = "https://nodenormalization-sri.renci.org"
+
+    # the following were used for the LitCoin project and may be removed in the future
+    NAMERES_URL: str = "https://name-resolution-sri.renci.org"
+    SAPBERT_URL: str = "https://babel-sapbert.apps.renci.org"
+    SHARED_SOURCE_DATA_PATH: str = "/tmp/shared_data"
+    LITCOIN_PRED_MAPPING_URL: str = "https://pred-mapping.apps.renci.org"
+    BAGEL_ENDPOINT: str = "https://bagel.apps.renci.org"
+    BAGEL_SERVICE_USERNAME: str | None = None
+    BAGEL_SERVICE_PASSWORD: str | None = None
+    OPENAI_API_KEY: str | None = None
+    OPENAI_API_ORGANIZATION: str | None = None
+
+config = Config()
\ No newline at end of file
diff --git a/orion/ingest_pipeline.py b/orion/ingest_pipeline.py
index e80eeb7a..1f0a41c7 100644
--- a/orion/ingest_pipeline.py
+++ b/orion/ingest_pipeline.py
@@ -9,6 +9,7 @@
 from orion.exceptions import DataVersionError
 from orion.utils import GetDataPullError
 from orion.logging import get_orion_logger
+from orion.config import config
 from orion.kgx_file_normalizer import KGXFileNormalizer
 from orion.kgx_validation import validate_graph
 from orion.normalization import NormalizationScheme, NodeNormalizer, EdgeNormalizer, NormalizationFailedError
@@ -698,14 +699,13 @@ def init_storage_dir(storage_dir: str=None):
                 raise IOError(f'Storage directory not valid: {storage_dir}')
         # otherwise use the storage directory specified by the environment variable ORION_STORAGE
         # check to make sure it's set and valid, otherwise fail
-        storage_dir_from_env = os.getenv("ORION_STORAGE")
-        if storage_dir_from_env is None:
+        if config.ORION_STORAGE is None:
             raise Exception(f'No storage directory was specified. You must either provide a path programmatically or '
                             f'use the environment variable ORION_STORAGE to configure a storage directory.')
-        if os.path.isdir(storage_dir_from_env):
-            return storage_dir_from_env
+        if os.path.isdir(config.ORION_STORAGE):
+            return config.ORION_STORAGE
         else:
-            raise IOError(f'Storage directory not valid: {storage_dir_from_env}')
+            raise IOError(f'Storage directory not valid: {config.ORION_STORAGE}')
 
     def init_source_output_dir(self, source_id: str):
         source_dir_path = os.path.join(self.storage_dir, source_id)
@@ -730,12 +730,7 @@ def main():
                              'in the finalized kgx files.')
     args = parser.parse_args()
 
-    if 'ORION_TEST_MODE' in os.environ:
-        test_mode_from_env = os.environ['ORION_TEST_MODE']
-    else:
-        test_mode_from_env = False
-
-    loader_test_mode = args.test_mode or test_mode_from_env
+    loader_test_mode = args.test_mode or config.ORION_TEST_MODE
     loader_strict_normalization = (not args.lenient_normalization)
     ingest_pipeline = IngestPipeline(test_mode=loader_test_mode,
                                      fresh_start_mode=args.fresh_start_mode)
diff --git a/orion/loader_interface.py b/orion/loader_interface.py
index 2f033f55..23158c50 100644
--- a/orion/loader_interface.py
+++ b/orion/loader_interface.py
@@ -4,6 +4,7 @@
 import inspect
 from orion.kgx_file_writer import KGXFileWriter
 from orion.logging import get_orion_logger
+from orion.config import config
 
 
 class SourceDataLoader:
@@ -36,7 +37,7 @@ def __init__(self, test_mode: bool = False, source_data_dir: str = None):
             if not os.path.exists(self.data_path):
                 os.mkdir(self.data_path)
         else:
-            self.data_path = os.environ.get("ORION_STORAGE")
+            self.data_path = config.ORION_STORAGE
 
         # the final output lists of nodes and edges
         self.final_node_list: list = []
diff --git a/orion/neo4j_tools.py b/orion/neo4j_tools.py
index 75890c0c..107db9a5 100644
--- a/orion/neo4j_tools.py
+++ b/orion/neo4j_tools.py
@@ -5,6 +5,7 @@
 import orion.kgx_file_converter as kgx_file_converter
 from orion.biolink_constants import NAMED_THING
 from orion.logging import get_orion_logger
+from orion.config import config
 
 
 logger = get_orion_logger("orion.neo4j_tools")
@@ -22,7 +23,9 @@ def __init__(self,
         self.http_port = http_port
         self.https_port = https_port
         self.bolt_port = bolt_port
-        self.password = password if password else os.environ.get('ORION_NEO4J_PASSWORD', 'orion-password')
+        # This is only the password for generating a dump on a temporary neo4j instance
+        # (so it doesn't really matter what it is or that its secure)
+        self.password = 'orion-password'
         self.graph_db_uri = f'bolt://{neo4j_host}:{bolt_port}'
         self.graph_db_auth = ("neo4j", self.password)
         self.neo4j_driver = neo4j.GraphDatabase.driver(self.graph_db_uri, auth=self.graph_db_auth)
diff --git a/orion/normalization.py b/orion/normalization.py
index ad33a300..1e72f6bb 100644
--- a/orion/normalization.py
+++ b/orion/normalization.py
@@ -9,6 +9,7 @@
 from robokop_genetics.genetics_normalization import GeneticsNormalizer
 from orion.biolink_constants import *
 from orion.logging import get_orion_logger
+from orion.config import config
 
 logger = get_orion_logger("orion.normalization")
 
@@ -51,7 +52,7 @@ def __init__(self, error_message: str, actual_error: Exception = None):
         self.error_message = error_message
         self.actual_error = actual_error
 
-NODE_NORMALIZATION_URL = os.environ.get('NODE_NORMALIZATION_ENDPOINT', 'https://nodenormalization-sri.renci.org/')
+NODE_NORMALIZATION_URL = config.NODE_NORMALIZATION_URL
 
 
 class NodeNormalizer:
@@ -382,8 +383,6 @@ class EdgeNormalizer:
     Class that contains methods relating to edge normalization.
     """
 
-    DEFAULT_EDGE_NORM_ENDPOINT = f'https://bl-lookup-sri.renci.org/'
-
     def __init__(self,
                  edge_normalization_version: str = 'latest'):
         """
@@ -393,10 +392,7 @@ def __init__(self,
         self.edge_normalization_lookup = {}
         self.cached_edge_norms = {}
 
-        if 'EDGE_NORMALIZATION_ENDPOINT' in os.environ and os.environ['EDGE_NORMALIZATION_ENDPOINT']:
-            self.edge_norm_endpoint = os.environ['EDGE_NORMALIZATION_ENDPOINT']
-        else:
-            self.edge_norm_endpoint = self.DEFAULT_EDGE_NORM_ENDPOINT
+        self.edge_norm_endpoint = config.EDGE_NORMALIZATION_ENDPOINT
 
         if edge_normalization_version != 'latest':
             if self.check_bl_version_valid(edge_normalization_version):
@@ -552,7 +548,7 @@ def get_valid_node_types(self):
             resp.raise_for_status()
 
 
-NAME_RESOLVER_URL = os.getenv('NAMERES_URL', 'https://name-resolution-sri.renci.org')
+NAME_RESOLVER_URL = config.NAMERES_URL
 NAME_RESOLVER_ENDPOINT = f'{NAME_RESOLVER_URL}/lookup'
 NAME_RESOLVER_HEADERS = {"accept": "application/json"}
 NAME_RESOLVER_API_ERROR = 'api_error'
diff --git a/orion/supplementation.py b/orion/supplementation.py
index 4d17bf79..a1acd252 100644
--- a/orion/supplementation.py
+++ b/orion/supplementation.py
@@ -10,6 +10,7 @@
 from orion.biolink_constants import *
 from orion.normalization import FALLBACK_EDGE_PREDICATE, NormalizationScheme
 from orion.logging import get_orion_logger
+from orion.config import config
 from orion.kgx_file_writer import KGXFileWriter
 from orion.kgx_file_normalizer import KGXFileNormalizer
 
@@ -61,10 +62,12 @@ class SequenceVariantSupplementation:
 
     SUPPLEMENTATION_VERSION = "1.1"
 
-    def __init__(self, output_dir="."):
-
-        workspace_dir = os.getenv("ORION_STORAGE", output_dir)
+    def __init__(self, output_dir=None):
 
+        workspace_dir = output_dir or config.ORION_STORAGE
+        if not path.isdir(workspace_dir):
+            raise RuntimeError(f'Workspace directory not valid for SequenceVariantSupplementation.')
+        
         # if the snpEff dir exists, assume we already downloaded it
         self.snpeff_dir = path.join(workspace_dir, "snpEff")
         if not path.isdir(self.snpeff_dir):
diff --git a/parsers/LitCoin/src/NER/nameres.py b/parsers/LitCoin/src/NER/nameres.py
index 5156e3c8..774f903b 100644
--- a/parsers/LitCoin/src/NER/nameres.py
+++ b/parsers/LitCoin/src/NER/nameres.py
@@ -1,12 +1,12 @@
-import os
 import logging
 
 import requests
 
 from parsers.LitCoin.src.NER.base import BaseNEREngine
+from orion.config import config
 
 # Configuration: NameRes
-NAMERES_URL = os.getenv('NAMERES_URL', 'https://name-resolution-sri.renci.org/')
+NAMERES_URL = config.NAMERES_URL
 NAMERES_ENDPOINT = f'{NAMERES_URL}lookup'
 NAMERES_RL_ENDPOINT = f'{NAMERES_URL}reverse_lookup'
 
diff --git a/parsers/LitCoin/src/NER/sapbert.py b/parsers/LitCoin/src/NER/sapbert.py
index ab50389b..59907302 100644
--- a/parsers/LitCoin/src/NER/sapbert.py
+++ b/parsers/LitCoin/src/NER/sapbert.py
@@ -1,12 +1,12 @@
-import os
 import logging
 
 import requests
 
 from parsers.LitCoin.src.NER.base import BaseNEREngine
+from orion.config import config
 
 # Configuration: get the SAPBERT URL and figure out the annotate path.
-SAPBERT_URL = os.getenv('SAPBERT_URL', 'https://babel-sapbert.apps.renci.org/')
+SAPBERT_URL = config.SAPBERT_URL
 SAPBERT_ANNOTATE_ENDPOINT = SAPBERT_URL + 'annotate/'
 SAPBERT_MODEL_NAME = "sapbert"
 SAPBERT_COUNT = 1000  # We've found that 1000 is about the minimum you need for reasonable results.
diff --git a/parsers/LitCoin/src/bagel/bagel_gpt.py b/parsers/LitCoin/src/bagel/bagel_gpt.py
index c38ec552..a4b863e9 100644
--- a/parsers/LitCoin/src/bagel/bagel_gpt.py
+++ b/parsers/LitCoin/src/bagel/bagel_gpt.py
@@ -2,10 +2,10 @@
 import os
 from collections import defaultdict
 
-from orion.config import CONFIG
+from orion.config import config
 from orion.logging import get_orion_logger
 
-OPENAI_API_KEY = CONFIG.get("OPENAI_API_KEY")
+OPENAI_API_KEY = config.OPENAI_API_KEY
 
 LLM_RESULTS = []
 
diff --git a/parsers/LitCoin/src/bagel/bagel_service.py b/parsers/LitCoin/src/bagel/bagel_service.py
index 772691e1..4af5d472 100644
--- a/parsers/LitCoin/src/bagel/bagel_service.py
+++ b/parsers/LitCoin/src/bagel/bagel_service.py
@@ -1,21 +1,17 @@
 import requests
 from requests.auth import HTTPBasicAuth
-from orion.config import CONFIG
+from orion.config import config
 
-BAGEL_ENDPOINT = 'https://bagel.apps.renci.org/'
-BAGEL_ENDPOINT += 'find_curies_openai'
+BAGEL_ENDPOINT = config.BAGEL_ENDPOINT + 'find_curies_openai'
 
-bagel_nameres_url = CONFIG.get('NAMERES_ENDPOINT', 'https://name-resolution-sri.renci.org/')
-bagel_nameres_url += 'lookup?autocomplete=false&offset=0&limit=10&string="'
+bagel_nameres_url = config.NAMERES_URL + 'lookup?autocomplete=false&offset=0&limit=10&string="'
 
-bagel_sapbert_url = CONFIG.get('SAPBERT_URL', 'https://sap-qdrant.apps.renci.org/')
-bagel_sapbert_url += "annotate/"
+bagel_sapbert_url = config.SAPBERT_URL + "annotate/"
 
-bagel_nodenorm_url = CONFIG.get('NODE_NORMALIZATION_ENDPOINT', 'https://nodenormalization-sri.renci.org/')
-bagel_nodenorm_url += 'get_normalized_nodes'
+bagel_nodenorm_url = config.NODE_NORMALIZATION_ENDPOINT + 'get_normalized_nodes'
 
-BAGEL_SERVICE_USERNAME = CONFIG.get("BAGEL_SERVICE_USERNAME", 'default_bagel_username')
-BAGEL_SERVICE_PASSWORD = CONFIG.get("BAGEL_SERVICE_PASSWORD", 'default_bagel_password')
+BAGEL_SERVICE_USERNAME = config.BAGEL_SERVICE_USERNAME
+BAGEL_SERVICE_PASSWORD = config.BAGEL_SERVICE_PASSWORD
 
 
 def call_bagel_service(text, entity, entity_type=''):
diff --git a/parsers/LitCoin/src/loadLitCoin.py b/parsers/LitCoin/src/loadLitCoin.py
index 292365bc..6e708d25 100644
--- a/parsers/LitCoin/src/loadLitCoin.py
+++ b/parsers/LitCoin/src/loadLitCoin.py
@@ -12,6 +12,7 @@
 from orion.utils import GetData, quick_jsonl_file_iterator
 from orion.normalization import call_name_resolution, NAME_RESOLVER_API_ERROR
 from orion.prefixes import PUBMED
+from orion.config import config
 
 
 from parsers.LitCoin.src.bagel.bagel_service import call_bagel_service
@@ -68,7 +69,7 @@ class LITCOIN:
 ABSTRACT_JOURNAL_EDGE_PROP = 'journal'
 
 
-LITCOIN_PRED_MAPPING_URL = os.getenv('LITCOIN_PRED_MAPPING_URL', 'https://pred-mapping.apps.renci.org')
+LITCOIN_PRED_MAPPING_URL = config.LITCOIN_PRED_MAPPING_URL
 PRED_MAPPING_ENDPOINT = f'{LITCOIN_PRED_MAPPING_URL}/query/'
 
 
@@ -133,7 +134,7 @@ def __init__(self, test_mode: bool = False, source_data_dir: str = None):
         :param source_data_dir - the specific storage directory to save files in
         """
         super().__init__(test_mode=test_mode, source_data_dir=source_data_dir)
-        self.shared_source_data_path = os.getenv('SHARED_SOURCE_DATA_PATH', None)
+        self.shared_source_data_path = config.SHARED_SOURCE_DATA_PATH
         self.data_url = 'https://stars.renci.org/var/data_services/litcoin/'
         self.version_file = 'litcoin.yaml'
         self.abstracts_file = 'abstracts_CompAndHeal.json'
diff --git a/pyproject.toml b/pyproject.toml
index 6fe1b2b9..fcadba77 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ dependencies = [
     "bmt>=1.4.6",
     "jsonlines>=4.0.0",
     "orjson>=3.11.7",
-    "python-dotenv>=1.0.1",
+    "pydantic-settings>=2.13.0",
     "pyyaml>=6.0.1",
     "requests>=2.33.1",
     "requests-toolbelt>=1.0.0",
diff --git a/uv.lock b/uv.lock
index ba525c96..c6eeb1e7 100644
--- a/uv.lock
+++ b/uv.lock
@@ -733,6 +733,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" },
 ]
 
+[[package]]
+name = "pydantic-settings"
+version = "2.13.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" },
+]
+
 [[package]]
 name = "pygments"
 version = "2.19.2"
@@ -937,7 +951,7 @@ dependencies = [
     { name = "bmt" },
     { name = "jsonlines" },
     { name = "orjson" },
-    { name = "python-dotenv" },
+    { name = "pydantic-settings" },
     { name = "pyyaml" },
     { name = "requests" },
     { name = "requests-toolbelt" },
@@ -984,8 +998,8 @@ requires-dist = [
     { name = "polars", marker = "extra == 'robokop'", specifier = ">=1.19.0" },
     { name = "prefixmaps", marker = "extra == 'robokop'", specifier = ">=0.2.6" },
     { name = "psycopg2-binary", marker = "extra == 'robokop'", specifier = ">=2.9.9" },
+    { name = "pydantic-settings", specifier = ">=2.13.0" },
     { name = "pyoxigraph", marker = "extra == 'robokop'", specifier = ">=0.3.22" },
-    { name = "python-dotenv", specifier = ">=1.0.1" },
     { name = "pyyaml", specifier = ">=6.0.1" },
     { name = "redis", marker = "extra == 'robokop'", specifier = ">=5.2.1" },
     { name = "requests", specifier = ">=2.33.1" },

From 3a1121ab0af7621fb8826b708b996e6d1da1f523 Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Fri, 3 Apr 2026 15:05:46 -0700
Subject: [PATCH 04/14] standardize url slash usage and nomenclature

---
 .env.example                               | 27 +++++++++++++---------
 helm/orion/templates/graph-builder.yaml    | 12 +++++-----
 orion/build_manager.py                     |  3 +--
 orion/config.py                            | 14 ++++++++---
 orion/normalization.py                     | 12 +++++-----
 parsers/LitCoin/src/NER/nameres.py         |  4 ++--
 parsers/LitCoin/src/NER/sapbert.py         |  2 +-
 parsers/LitCoin/src/bagel/bagel.py         |  4 ++--
 parsers/LitCoin/src/bagel/bagel_service.py |  8 +++----
 9 files changed, 49 insertions(+), 37 deletions(-)

diff --git a/.env.example b/.env.example
index 2165f415..97529bc3 100644
--- a/.env.example
+++ b/.env.example
@@ -10,7 +10,7 @@
 # ORION_LOGS=
 
 # Base URL used when generating graph metadata
-# ORION_OUTPUT_URL=https://localhost/
+# ORION_OUTPUT_URL=https://localhost
 
 # ---- Graph Spec ----
 
@@ -25,30 +25,35 @@
 # Enable test/debug mode (sets log level to DEBUG and runs ingests with a smaller subset of data if possible)
 # ORION_TEST_MODE=false
 
-# ---- Normalization Endpoints ----
+# ---- Biolink Model ----
 
-# Edge normalization / BioLink Lookup endpoint
-# EDGE_NORMALIZATION_ENDPOINT=https://bl-lookup-sri.renci.org/
+# Biolink model version
+# BL_VERSION=v4.3.4
 
-# Node normalization endpoint
-# NODE_NORMALIZATION_ENDPOINT=https://nodenormalization-sri.renci.org/
+# ---- Normalization URLs ----
+
+# Edge normalization / BioLink Lookup URL
+# EDGE_NORMALIZATION_URL=https://bl-lookup-sri.renci.org
+
+# Node normalization URL
+# NODE_NORMALIZATION_URL=https://nodenormalization-sri.renci.org
 
 # ---- LitCoin / Bagel (may be removed in the future) ----
 
 # Name resolution service URL
-# NAMERES_URL=https://name-resolution-sri.renci.org/
+# NAMERES_URL=https://name-resolution-sri.renci.org
 
 # SapBERT service URL
-# SAPBERT_URL=https://babel-sapbert.apps.renci.org/
+# SAPBERT_URL=https://babel-sapbert.apps.renci.org
 
 # Shared source data path for LitCoin pipeline
 # SHARED_SOURCE_DATA_PATH=/tmp/shared_data
 
 # LitCoin predicate mapping service URL
-# LITCOIN_PRED_MAPPING_URL=https://pred-mapping.apps.renci.org/
+# LITCOIN_PRED_MAPPING_URL=https://pred-mapping.apps.renci.org
 
-# Bagel service endpoint
-# BAGEL_ENDPOINT=https://bagel.apps.renci.org/
+# Bagel service URL
+# BAGEL_URL=https://bagel.apps.renci.org
 
 # Bagel service credentials
 # BAGEL_SERVICE_USERNAME=
diff --git a/helm/orion/templates/graph-builder.yaml b/helm/orion/templates/graph-builder.yaml
index ef2e9a8b..6e6513f8 100644
--- a/helm/orion/templates/graph-builder.yaml
+++ b/helm/orion/templates/graph-builder.yaml
@@ -70,15 +70,15 @@ spec:
             - name: BL_VERSION
               value: {{ .Values.orion.normalization.bl_version }}
             {{- if .Values.orion.normalization.nodeNormEndpoint }}
-            - name: NODE_NORMALIZATION_ENDPOINT
+            - name: NODE_NORMALIZATION_URL
               value: {{ .Values.orion.normalization.nodeNormEndpoint }}
             {{- end }}
             {{- if .Values.orion.normalization.edgeNormEndpoint }}
-            - name: EDGE_NORMALIZATION_ENDPOINT
+            - name: EDGE_NORMALIZATION_URL
               value: {{ .Values.orion.normalization.edgeNormEndpoint }}
             {{- end }}
             {{- if .Values.orion.normalization.nameResolverEndpoint }}
-            - name: NAMERES_ENDPOINT
+            - name: NAMERES_URL
               value: {{ .Values.orion.normalization.nameResolverEndpoint }}
             {{- end }}
             {{- if .Values.orion.normalization.sapbertEndpoint }}
@@ -157,15 +157,15 @@ spec:
             - name: BL_VERSION
               value: {{ .Values.orion.normalization.bl_version }}
             {{- if .Values.orion.normalization.nodeNormEndpoint }}
-            - name: NODE_NORMALIZATION_ENDPOINT
+            - name: NODE_NORMALIZATION_URL
               value: {{ .Values.orion.normalization.nodeNormEndpoint }}
             {{- end }}
             {{- if .Values.orion.normalization.edgeNormEndpoint }}
-            - name: EDGE_NORMALIZATION_ENDPOINT
+            - name: EDGE_NORMALIZATION_URL
               value: {{ .Values.orion.normalization.edgeNormEndpoint }}
             {{- end }}
             {{- if .Values.orion.normalization.nameResolverEndpoint }}
-            - name: NAMERES_ENDPOINT
+            - name: NAMERES_URL
               value: {{ .Values.orion.normalization.nameResolverEndpoint }}
             {{- end }}
             {{- if .Values.orion.normalization.sapbertEndpoint }}
diff --git a/orion/build_manager.py b/orion/build_manager.py
index 505e0950..73e6233e 100644
--- a/orion/build_manager.py
+++ b/orion/build_manager.py
@@ -686,8 +686,7 @@ def get_graph_dir_path(self, graph_id: str, graph_version: str):
 
     @staticmethod
     def get_graph_output_url(graph_id: str, graph_version: str):
-        graph_output_url = config.ORION_OUTPUT_URL.removesuffix('/')
-        return f'{graph_output_url}/{graph_id}/{graph_version}/'
+        return f'{config.ORION_OUTPUT_URL}/{graph_id}/{graph_version}/'
 
     @staticmethod
     def get_graph_nodes_file_path(graph_output_dir: str):
diff --git a/orion/config.py b/orion/config.py
index 2cb314ff..f80a4fee 100644
--- a/orion/config.py
+++ b/orion/config.py
@@ -1,4 +1,5 @@
 from pathlib import Path
+from pydantic import field_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
 
 
@@ -9,6 +10,13 @@ class Config(BaseSettings):
         env_ignore_empty=True
     )
 
+    @field_validator("*", mode="before")
+    @classmethod
+    def strip_trailing_slashes(cls, v, info):
+        if isinstance(v, str) and info.field_name.endswith("_URL"):
+            return v.rstrip("/")
+        return v
+
     ORION_STORAGE: str | None = None
     ORION_GRAPHS: str | None = None
     ORION_LOGS: str | None = None
@@ -21,15 +29,15 @@ class Config(BaseSettings):
 
     BL_VERSION: str = "v4.3.4"
 
-    EDGE_NORMALIZATION_ENDPOINT: str = "https://bl-lookup-sri.renci.org"
-    NODE_NORMALIZATION_ENDPOINT: str = "https://nodenormalization-sri.renci.org"
+    EDGE_NORMALIZATION_URL: str = "https://bl-lookup-sri.renci.org"
+    NODE_NORMALIZATION_URL: str = "https://nodenormalization-sri.renci.org"
 
     # the following were used for the LitCoin project and may be removed in the future
     NAMERES_URL: str = "https://name-resolution-sri.renci.org"
     SAPBERT_URL: str = "https://babel-sapbert.apps.renci.org"
     SHARED_SOURCE_DATA_PATH: str = "/tmp/shared_data"
     LITCOIN_PRED_MAPPING_URL: str = "https://pred-mapping.apps.renci.org"
-    BAGEL_ENDPOINT: str = "https://bagel.apps.renci.org"
+    BAGEL_URL: str = "https://bagel.apps.renci.org"
     BAGEL_SERVICE_USERNAME: str | None = None
     BAGEL_SERVICE_PASSWORD: str | None = None
     OPENAI_API_KEY: str | None = None
diff --git a/orion/normalization.py b/orion/normalization.py
index 1e72f6bb..aff29759 100644
--- a/orion/normalization.py
+++ b/orion/normalization.py
@@ -100,7 +100,7 @@ def __init__(self,
 
     def hit_node_norm_service(self, curies, retries=0):
         resp: requests.models.Response = \
-            self.requests_session.post(f'{NODE_NORMALIZATION_URL}get_normalized_nodes',
+            self.requests_session.post(f'{NODE_NORMALIZATION_URL}/get_normalized_nodes',
                                        json={'curies': curies,
                                              'conflate': self.conflate_node_types,
                                              'drug_chemical_conflate': self.conflate_node_types,
@@ -346,7 +346,7 @@ def get_current_node_norm_version(self):
         Retrieves the current production version from the node normalization service
         """
         # hit the node norm status endpoint
-        node_norm_status_url = f'{NODE_NORMALIZATION_URL}status'
+        node_norm_status_url = f'{NODE_NORMALIZATION_URL}/status'
         resp: requests.models.Response = requests.get(node_norm_status_url)
         resp.raise_for_status()
         status: dict = resp.json()
@@ -392,7 +392,7 @@ def __init__(self,
         self.edge_normalization_lookup = {}
         self.cached_edge_norms = {}
 
-        self.edge_norm_endpoint = config.EDGE_NORMALIZATION_ENDPOINT
+        self.edge_norm_endpoint = config.EDGE_NORMALIZATION_URL
 
         if edge_normalization_version != 'latest':
             if self.check_bl_version_valid(edge_normalization_version):
@@ -443,7 +443,7 @@ def normalize_edge_data(self,
             predicate_chunk: list = predicates_to_normalize_list[start_index: end_index]
 
             # hit the edge normalization service
-            request_url = f'{self.edge_norm_endpoint}resolve_predicate?version={self.edge_norm_version}&predicate='
+            request_url = f'{self.edge_norm_endpoint}/resolve_predicate?version={self.edge_norm_version}&predicate='
             request_url += '&predicate='.join(predicate_chunk)
             logger.debug(f'Sending request: {request_url}')
             resp: requests.models.Response = requests.get(request_url)
@@ -515,7 +515,7 @@ def check_bl_version_valid(self, bl_version: str):
 
     def get_available_versions(self):
         # call the versions endpoint
-        edge_norm_versions_url = f'{self.edge_norm_endpoint}versions'
+        edge_norm_versions_url = f'{self.edge_norm_endpoint}/versions'
         resp: requests.models.Response = requests.get(edge_norm_versions_url)
 
         # did we get a good status code
@@ -535,7 +535,7 @@ def check_node_type_valid(self, node_type: str):
 
     def get_valid_node_types(self):
         # call the descendants endpoint with the root node type
-        edge_norm_descendants_url = f'{self.edge_norm_endpoint}bl/{NAMED_THING}/descendants?version={self.edge_norm_version}'
+        edge_norm_descendants_url = f'{self.edge_norm_endpoint}/bl/{NAMED_THING}/descendants?version={self.edge_norm_version}'
         resp: requests.models.Response = requests.get(edge_norm_descendants_url)
 
         # did we get a good status code
diff --git a/parsers/LitCoin/src/NER/nameres.py b/parsers/LitCoin/src/NER/nameres.py
index 774f903b..1c4cbbbc 100644
--- a/parsers/LitCoin/src/NER/nameres.py
+++ b/parsers/LitCoin/src/NER/nameres.py
@@ -7,8 +7,8 @@
 
 # Configuration: NameRes
 NAMERES_URL = config.NAMERES_URL
-NAMERES_ENDPOINT = f'{NAMERES_URL}lookup'
-NAMERES_RL_ENDPOINT = f'{NAMERES_URL}reverse_lookup'
+NAMERES_ENDPOINT = f'{NAMERES_URL}/lookup'
+NAMERES_RL_ENDPOINT = f'{NAMERES_URL}/reverse_lookup'
 
 
 class NameResNEREngine(BaseNEREngine):
diff --git a/parsers/LitCoin/src/NER/sapbert.py b/parsers/LitCoin/src/NER/sapbert.py
index 59907302..ae28c66c 100644
--- a/parsers/LitCoin/src/NER/sapbert.py
+++ b/parsers/LitCoin/src/NER/sapbert.py
@@ -7,7 +7,7 @@
 
 # Configuration: get the SAPBERT URL and figure out the annotate path.
 SAPBERT_URL = config.SAPBERT_URL
-SAPBERT_ANNOTATE_ENDPOINT = SAPBERT_URL + 'annotate/'
+SAPBERT_ANNOTATE_ENDPOINT = f'{SAPBERT_URL}/annotate/'
 SAPBERT_MODEL_NAME = "sapbert"
 SAPBERT_COUNT = 1000  # We've found that 1000 is about the minimum you need for reasonable results.
 
diff --git a/parsers/LitCoin/src/bagel/bagel.py b/parsers/LitCoin/src/bagel/bagel.py
index 88ea96f3..4d14eaec 100644
--- a/parsers/LitCoin/src/bagel/bagel.py
+++ b/parsers/LitCoin/src/bagel/bagel.py
@@ -119,7 +119,7 @@ def augment_results(terms, nameres, taxes):
     augs = nameres.reverse_lookup(curies)
     for curie in augs:
         terms[curie].update(augs[curie])
-        resp = requests.get(f"{NODE_NORMALIZATION_URL}get_normalized_nodes?curie="+curie+"&conflate=true&drug_chemical_conflate=true&description=true")
+        resp = requests.get(f"{NODE_NORMALIZATION_URL}/get_normalized_nodes?curie="+curie+"&conflate=true&drug_chemical_conflate=true&description=true")
         if resp.status_code == 200:
             result = resp.json()
             try:
@@ -131,7 +131,7 @@ def augment_results(terms, nameres, taxes):
         if len(annotation["taxa"]) > 0:
             tax_id = annotation["taxa"][0]
             if tax_id not in taxes:
-                resp = requests.get(f"{NODE_NORMALIZATION_URL}get_normalized_nodes?curie="+tax_id)
+                resp = requests.get(f"{NODE_NORMALIZATION_URL}/get_normalized_nodes?curie="+tax_id)
                 if resp.status_code == 200:
                     result = resp.json()
                     try:
diff --git a/parsers/LitCoin/src/bagel/bagel_service.py b/parsers/LitCoin/src/bagel/bagel_service.py
index 4af5d472..3d5e672e 100644
--- a/parsers/LitCoin/src/bagel/bagel_service.py
+++ b/parsers/LitCoin/src/bagel/bagel_service.py
@@ -2,13 +2,13 @@
 from requests.auth import HTTPBasicAuth
 from orion.config import config
 
-BAGEL_ENDPOINT = config.BAGEL_ENDPOINT + 'find_curies_openai'
+BAGEL_ENDPOINT = f'{config.BAGEL_URL}/find_curies_openai'
 
-bagel_nameres_url = config.NAMERES_URL + 'lookup?autocomplete=false&offset=0&limit=10&string="'
+bagel_nameres_url = f'{config.NAMERES_URL}/lookup?autocomplete=false&offset=0&limit=10&string="'
 
-bagel_sapbert_url = config.SAPBERT_URL + "annotate/"
+bagel_sapbert_url = f'{config.SAPBERT_URL}/annotate/'
 
-bagel_nodenorm_url = config.NODE_NORMALIZATION_ENDPOINT + 'get_normalized_nodes'
+bagel_nodenorm_url = f'{config.NODE_NORMALIZATION_URL}/get_normalized_nodes'
 
 BAGEL_SERVICE_USERNAME = config.BAGEL_SERVICE_USERNAME
 BAGEL_SERVICE_PASSWORD = config.BAGEL_SERVICE_PASSWORD

From a62b56375c0c42439aa4a84520803363b4ee35ba Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Mon, 6 Apr 2026 13:12:10 -0700
Subject: [PATCH 05/14] dont hardcode env passthroughs, use env file if there

---
 docker-compose-worker.yml | 33 ++++++++-------------------------
 docker-compose.yml        | 23 +++--------------------
 2 files changed, 11 insertions(+), 45 deletions(-)

diff --git a/docker-compose-worker.yml b/docker-compose-worker.yml
index 94018b3f..52857447 100644
--- a/docker-compose-worker.yml
+++ b/docker-compose-worker.yml
@@ -5,40 +5,23 @@ services:
       dockerfile: Dockerfile
     container_name: orion-worker
     command: [celery, "-A", "celery_worker.celery_app", "worker", "--loglevel=info", "-Q", "orion"]
+    env_file:
+      - .env
     environment:
-      - CELERY_BROKER_URL=redis://redis:6379/0
-      - CELERY_RESULT_BACKEND=redis://redis:6379/0
-      - SHARED_SOURCE_DATA_PATH=/tmp/shared_data
+      # override paths from env, use paths volumes are mounted to inside the container
       - ORION_STORAGE=/ORION_storage
       - ORION_GRAPHS=/ORION_graphs
       - ORION_LOGS=/ORION_logs
-      - BAGEL_SERVICE_USERNAME=fake-username-do-not-commit-a-real-one!!!
-      - BAGEL_SERVICE_PASSWORD=fake-password-do-not-commit-a-real-one!!!  
-      - ORION_GRAPH_SPEC
-      - ORION_GRAPH_SPEC_URL
-      - ORION_OUTPUT_URL
-      - EDGE_NORMALIZATION_ENDPOINT
-      - NODE_NORMALIZATION_ENDPOINT
-      - NAMERES_URL
-      - SAPBERT_URL
-      - LITCOIN_PRED_MAPPING_URL
-      - BL_VERSION
-      - PHAROS_DB_HOST
-      - PHAROS_DB_USER
-      - PHAROS_DB_PASSWORD
-      - PHAROS_DB_NAME
-      - PHAROS_DB_PORT
-      - DRUGCENTRAL_DB_HOST
-      - DRUGCENTRAL_DB_USER
-      - DRUGCENTRAL_DB_PASSWORD
-      - DRUGCENTRAL_DB_NAME
-      - DRUGCENTRAL_DB_PORT
+      - SHARED_SOURCE_DATA_PATH=/tmp/shared_data
+      # specific to celery
+      - CELERY_BROKER_URL=redis://redis:6379/0
+      - CELERY_RESULT_BACKEND=redis://redis:6379/0
     volumes:
       - .:/ORION
-      - "${SHARED_SOURCE_DATA_PATH}:/tmp/shared_data"
       - "${ORION_STORAGE}:/ORION_storage"
       - "${ORION_GRAPHS}:/ORION_graphs"
       - "${ORION_LOGS}:/ORION_logs"
+      - "${SHARED_SOURCE_DATA_PATH}:/tmp/shared_data"
     user: 1000:7474
     networks:
       - app-network
diff --git a/docker-compose.yml b/docker-compose.yml
index c8794725..6b9842ef 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,33 +3,16 @@ services:
     build:
       context: .
     command: [orion-build, all]
+    env_file:
+      - .env
     environment:
+      # override paths from env, use paths volumes are mounted to inside the container
       - ORION_STORAGE=/ORION_storage
       - ORION_GRAPHS=/ORION_graphs
       - ORION_LOGS=/ORION_logs
-      - ORION_GRAPH_SPEC
-      - ORION_GRAPH_SPEC_URL
-      - ORION_OUTPUT_URL
-      - EDGE_NORMALIZATION_ENDPOINT
-      - NODE_NORMALIZATION_ENDPOINT
-      - NAMERES_URL
-      - SAPBERT_URL
-      - BL_VERSION
-      - PHAROS_DB_HOST
-      - PHAROS_DB_USER
-      - PHAROS_DB_PASSWORD
-      - PHAROS_DB_NAME
-      - PHAROS_DB_PORT
-      - DRUGCENTRAL_DB_HOST
-      - DRUGCENTRAL_DB_USER
-      - DRUGCENTRAL_DB_PASSWORD
-      - DRUGCENTRAL_DB_NAME
-      - DRUGCENTRAL_DB_PORT
     volumes:
       - .:/ORION
       - "${ORION_STORAGE}:/ORION_storage"
       - "${ORION_GRAPHS}:/ORION_graphs"
       - "${ORION_LOGS}:/ORION_logs"
     user: 7474:7474
-
-

From 10364adb901ae343937da6d56e99689cdf139bfa Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Mon, 6 Apr 2026 16:08:43 -0700
Subject: [PATCH 06/14] making env file optional, removing persistent logs from
 docker compose

---
 docker-compose-worker.yml | 3 ++-
 docker-compose.yml        | 5 ++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docker-compose-worker.yml b/docker-compose-worker.yml
index 52857447..5a9efc57 100644
--- a/docker-compose-worker.yml
+++ b/docker-compose-worker.yml
@@ -6,7 +6,8 @@ services:
     container_name: orion-worker
     command: [celery, "-A", "celery_worker.celery_app", "worker", "--loglevel=info", "-Q", "orion"]
     env_file:
-      - .env
+      - path: .env
+        required: false
     environment:
       # override paths from env, use paths volumes are mounted to inside the container
       - ORION_STORAGE=/ORION_storage
diff --git a/docker-compose.yml b/docker-compose.yml
index 6b9842ef..8898f706 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -4,15 +4,14 @@ services:
       context: .
     command: [orion-build, all]
     env_file:
-      - .env
+      - path: .env
+        required: false
     environment:
       # override paths from env, use paths volumes are mounted to inside the container
       - ORION_STORAGE=/ORION_storage
       - ORION_GRAPHS=/ORION_graphs
-      - ORION_LOGS=/ORION_logs
     volumes:
       - .:/ORION
       - "${ORION_STORAGE}:/ORION_storage"
       - "${ORION_GRAPHS}:/ORION_graphs"
-      - "${ORION_LOGS}:/ORION_logs"
     user: 7474:7474

From f51b5ea6fcf928e7981efc22f4407a6809606356 Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Mon, 6 Apr 2026 16:10:27 -0700
Subject: [PATCH 07/14] lazy load the storage dir

---
 orion/ingest_pipeline.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/orion/ingest_pipeline.py b/orion/ingest_pipeline.py
index 1f0a41c7..b057ff5d 100644
--- a/orion/ingest_pipeline.py
+++ b/orion/ingest_pipeline.py
@@ -39,7 +39,9 @@ def __init__(self,
             logger.info(f'IngestPipeline running in fresh start mode... previous state and files ignored.')
 
         # lazy load the storage directory path
-        self.storage_dir = self.init_storage_dir(storage_dir)
+        # store the storage_dir parameter to override the Config if provided programmatically or through CLI
+        self._storage_dir_override = storage_dir
+        self._storage_dir = None
 
         # dict of source_id -> latest source version (to prevent double lookups)
         self.latest_source_version_lookup = {}
@@ -689,8 +691,14 @@ def get_final_file_paths(self, source_id: str, source_version: str, parsing_vers
     def get_source_version_path(self, source_id: str, source_version: str):
         return os.path.join(self.storage_dir, source_id, source_version)
 
+    @property
+    def storage_dir(self):
+        if self._storage_dir is None:
+            self._storage_dir = self._resolve_storage_dir(self._storage_dir_override)
+        return self._storage_dir
+
     @staticmethod
-    def init_storage_dir(storage_dir: str=None):
+    def _resolve_storage_dir(storage_dir: str = None):
         # if a dir was provided programmatically try to use that
         if storage_dir is not None:
             if os.path.isdir(storage_dir):

From 7662095067f2d69f2c289e49e52f2965e13db9fc Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Mon, 6 Apr 2026 16:10:40 -0700
Subject: [PATCH 08/14] adapt tests for new config

---
 tests/test_graph_spec.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/tests/test_graph_spec.py b/tests/test_graph_spec.py
index 91030e90..e76d7a68 100644
--- a/tests/test_graph_spec.py
+++ b/tests/test_graph_spec.py
@@ -4,16 +4,21 @@
 
 from unittest.mock import MagicMock
 from orion.build_manager import GraphBuilder, GraphSpecError
+from orion import config as config_module
+
+
+def set_config(**overrides):
+    """Override the config"""
+    for key, value in overrides.items():
+        object.__setattr__(config_module.config, key, value)
 
 
 def clear_graph_spec_config():
-    os.environ['ORION_GRAPH_SPEC'] = ''
-    os.environ['ORION_GRAPH_SPEC_URL'] = ''
+    set_config(ORION_GRAPH_SPEC='', ORION_GRAPH_SPEC_URL='')
 
 
 def reset_graph_spec_config():
-    os.environ['ORION_GRAPH_SPEC'] = 'testing-graph-spec.yaml'
-    os.environ['ORION_GRAPH_SPEC_URL'] = ''
+    set_config(ORION_GRAPH_SPEC='testing-graph-spec.yaml', ORION_GRAPH_SPEC_URL='')
 
 
 @pytest.fixture(scope='module')
@@ -43,16 +48,14 @@ def test_empty_graph_spec_config(test_graph_spec_dir, test_graph_output_dir):
 
 
 def test_invalid_graph_spec_config(test_graph_spec_dir, test_graph_output_dir):
-    clear_graph_spec_config()
-    os.environ['ORION_GRAPH_SPEC'] = 'invalid-spec.yaml'
+    set_config(ORION_GRAPH_SPEC='invalid-spec.yaml', ORION_GRAPH_SPEC_URL='')
     with pytest.raises(GraphSpecError):
         graph_builder = GraphBuilder(graph_specs_dir=test_graph_spec_dir,
                                      graph_output_dir=test_graph_output_dir)
 
 
 def test_invalid_graph_spec_url_config(test_graph_output_dir):
-    clear_graph_spec_config()
-    os.environ['ORION_GRAPH_SPEC_URL'] = 'http://localhost/invalid_graph_spec_url'
+    set_config(ORION_GRAPH_SPEC='', ORION_GRAPH_SPEC_URL='http://localhost/invalid_graph_spec_url')
     with pytest.raises(requests.exceptions.ConnectionError):
         graph_builder = GraphBuilder(graph_output_dir=test_graph_output_dir)
 

From 020ab4ab41c58d08a6ed23009b9f47e20a06d88d Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Mon, 6 Apr 2026 16:41:23 -0700
Subject: [PATCH 09/14] updating readme

---
 .env.example |  4 ++--
 README.md    | 43 ++++++++++++++++++++++++-------------------
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/.env.example b/.env.example
index 97529bc3..75cecf91 100644
--- a/.env.example
+++ b/.env.example
@@ -1,10 +1,10 @@
 # ---- Storage & Output ----
 
 # Directory for source data downloads and ingest pipeline files
-# ORION_STORAGE=
+ORION_STORAGE=~/ORION_storage/
 
 # Directory for final graph releases
-# ORION_GRAPHS=
+ORION_GRAPHS=~/ORION_graphs/
 
 # Directory for log files (if unset, logs go to stdout only)
 # ORION_LOGS=
diff --git a/README.md b/README.md
index 6fbb6740..067d762f 100644
--- a/README.md
+++ b/README.md
@@ -42,31 +42,34 @@ After installation, the following commands are available (prefix with `uv run` i
 
 ### Configuring ORION
 
-ORION uses three directories for its data, configured via environment variables:
+ORION is configured via environment variables, which can be set directly or through an `.env` file. 
 
-| Variable | Purpose                              |
-|---|--------------------------------------|
-| `ORION_STORAGE` | Data ingest pipeline storage |
-| `ORION_GRAPHS` | Knowledge graph outputs              |
-| `ORION_LOGS` | Log files                            |
-
-You can set these up manually or use the provided script:
+In most cases, you can simply use this provided script to set up a local environment. It will create directories for ORION outputs next to where ORION was installed and set env vars pointing to them.
 
 ```bash
 source ./set_up_dev_env.sh
 ```
 
-#### Graph Spec
+For more customization and settings, use an .env file. Copy or rename the `.env.example` file to `.env`.
 
-A Graph Spec yaml file defines which sources to include in a knowledge graph. Set one of the following environment variables (not both):
+Then uncommment and edit `.env` as desired to set values for your environment.
 
-```bash
-# Option 1: Name of a file in the graph_specs/ directory
-export ORION_GRAPH_SPEC=example-graph-spec.yaml
+| Variable | Purpose                                                    | Default |
+|---|------------------------------------------------------------|---|
+| `ORION_STORAGE` | Path to a directory for data ingest pipeline storage       | (required) |
+| `ORION_GRAPHS` | Path to a directory for Knowledge Graph outputs            | (required) |
+| `ORION_LOGS` | Path to a Log file directory (if unset, logs go to stdout) | `None` |
+| `ORION_GRAPH_SPEC` | Graph Spec filename from `graph_specs/`                    | `example-graph-spec.yaml` |
+| `ORION_GRAPH_SPEC_URL` | URL to a remote Graph Spec file                            | |
 
-# Option 2: URL pointing to a Graph Spec yaml file
-export ORION_GRAPH_SPEC_URL=https://stars.renci.org/var/data_services/graph_specs/default-graph-spec.yaml
-```
+Configuration is managed by [pydantic-settings](https://docs.pydantic.dev/latest/concepts/pydantic_settings/) — environment variables override `.env` file values, and sensible defaults are provided where possible. See `orion/config.py` for the full list of settings.
+
+#### Graph Spec
+
+A Graph Spec yaml file defines which sources to include in a knowledge graph. Set one of the following (not both):
+
+- `ORION_GRAPH_SPEC` - name of a file in the `graph_specs/` directory
+- `ORION_GRAPH_SPEC_URL` - URL pointing to a Graph Spec yaml file
 
 Here is a simple Graph Spec example:
 
@@ -100,6 +103,8 @@ See the `graph_specs/` directory for more examples.
 
 ### Running with Docker
 
+Make sure environment variables are set or an `.env` file is configured with at least `ORION_STORAGE`, and `ORION_GRAPHS` pointing to valid host directories. The compose file reads these env vars and mounts those directories as volumes in the container.
+
 Build the image:
 
 ```bash
@@ -115,19 +120,19 @@ docker compose up
 Build a specific graph:
 
 ```bash
-docker compose run --rm orion orion-build Example_Graph
+docker compose run orion orion-build Example_Graph
 ```
 
 Run the ingest pipeline for a single data source:
 
 ```bash
-docker compose run --rm orion orion-ingest DrugCentral
+docker compose run orion orion-ingest DrugCentral
 ```
 
 See available data sources and options:
 
 ```bash
-docker compose run --rm orion orion-ingest -h
+docker compose run orion orion-ingest -h
 ```
 
 ### Development

From 2c53dd31c2c4833195954225b5a2191cbf57daed Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Mon, 6 Apr 2026 16:42:27 -0700
Subject: [PATCH 10/14] adding dockerignore

---
 .dockerignore | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 .dockerignore

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 00000000..25573bec
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,9 @@
+.git
+.env
+.idea
+.DS_Store
+.pytest_cache
+.venv
+__pycache__
+*.egg-info
+dist
\ No newline at end of file

From e0b0275127b7d97ae3bee96a490f00baa82dd670 Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Mon, 6 Apr 2026 20:23:41 -0700
Subject: [PATCH 11/14] removing unnecessary logs

---
 .github/workflows/test.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 1b126e2a..945c8e29 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -14,10 +14,8 @@ jobs:
       - name: create env params
         run: |
           echo "ROBOKOP_HOME=$PWD" >> $GITHUB_ENV
-          mkdir -p $PWD/tests/workspace/logs
           mkdir -p $PWD/tests/workspace/storage
           mkdir -p $PWD/tests/workspace/graphs
-          echo "ORION_LOGS=$PWD/tests/workspace/logs" >> $GITHUB_ENV
           echo "ORION_STORAGE=$PWD/tests/workspace/storage" >> $GITHUB_ENV
           echo "ORION_GRAPHS=$PWD/tests/workspace/graphs" >> $GITHUB_ENV
 

From ca0a15a5780abaf22a2489db383c59159c68ffbd Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Mon, 6 Apr 2026 20:24:14 -0700
Subject: [PATCH 12/14] updating actions and removing unused command

---
 .github/workflows/release.yml | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index eb959fe5..4dee91cd 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -11,29 +11,28 @@ jobs:
     push_to_registry:
         name: Push Docker image to GitHub Packages tagged with "latest" and version number.
         runs-on: ubuntu-latest
+        permissions:
+            contents: read
+            packages: write
         steps:
             - name: Check out the repo
               uses: actions/checkout@v4
-            - name: Get the version
-              id: get_version
-              run: echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//}
             - name: Login to ghcr
-              uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
+              uses: docker/login-action@v3
               with:
                   registry: ${{ env.REGISTRY }}
                   username: ${{ github.actor }}
                   password: ${{ secrets.GITHUB_TOKEN }}
             - name: Extract metadata (tags, labels) for Docker
               id: meta
-              uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
+              uses: docker/metadata-action@v5
               with:
                 images:
                   ghcr.io/${{ github.repository }}
             - name: Push to GitHub Packages
-              uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
+              uses: docker/build-push-action@v6
               with:
                   context: .
                   push: true
                   tags: ${{ steps.meta.outputs.tags }}
-                  labels: ${{ steps.meta.outputs.labels }}
-                  build-args: VERSION=${{ steps.get_version.outputs.VERSION }}
+                  labels: ${{ steps.meta.outputs.labels }}
\ No newline at end of file

From 88c7d8d70ae0be1c499d11301c3cc048ab4788a3 Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Mon, 6 Apr 2026 21:12:23 -0700
Subject: [PATCH 13/14] bumping robokop-genetics

---
 pyproject.toml | 2 +-
 uv.lock        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index fcadba77..c79d7b72 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ dependencies = [
     "pyyaml>=6.0.1",
     "requests>=2.33.1",
     "requests-toolbelt>=1.0.0",
-    "robokop-genetics>=0.7.0",
+    "robokop-genetics>=0.8.0",
     "uuid-utils>=0.14.1",
     "xxhash>=3.6.0",
 ]
diff --git a/uv.lock b/uv.lock
index c6eeb1e7..d9e0afa0 100644
--- a/uv.lock
+++ b/uv.lock
@@ -931,16 +931,16 @@ wheels = [
 
 [[package]]
 name = "robokop-genetics"
-version = "0.7.0"
+version = "0.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "bmt" },
     { name = "redis" },
     { name = "requests" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/41/a6/75b9edf1186d3dbfb485910b40570ea9c7452ffce195934de2a40d17167f/robokop_genetics-0.7.0.tar.gz", hash = "sha256:87eb12250867c18f7e149d869fe9173f664f83e90d6c7b910303fd9ba9efc931", size = 18837, upload-time = "2025-10-07T07:18:29.144Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a7/17/063c21735ef3ce7fb5abb3d3c7bae122db37e45185ecec510bcab3dfb2b2/robokop_genetics-0.8.0.tar.gz", hash = "sha256:4aeb333e5b373b7e2d72f4d56329748a559f4196d0360e7b7d23d4c1a58a1985", size = 18669, upload-time = "2026-04-07T04:08:32.127Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7f/96/7e941b2ad392429aac56b0826965534e79edff20784eb767c702cab8fbef/robokop_genetics-0.7.0-py3-none-any.whl", hash = "sha256:fe33f004138f5feb5c43157411b146411bf249b97a1a6900348607f874dd8494", size = 18695, upload-time = "2025-10-07T07:18:27.895Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/e8/dd70c2cc4e0e076d31a0266aaeb710620d987016eddc65c84f2dee08d07f/robokop_genetics-0.8.0-py3-none-any.whl", hash = "sha256:3ea16f1c72d8c0f3a4f73f9e5c7122347c304b425aafc3d67906ebc68d56fefe", size = 18313, upload-time = "2026-04-07T04:08:30.278Z" },
 ]
 
 [[package]]
@@ -1004,7 +1004,7 @@ requires-dist = [
     { name = "redis", marker = "extra == 'robokop'", specifier = ">=5.2.1" },
     { name = "requests", specifier = ">=2.33.1" },
     { name = "requests-toolbelt", specifier = ">=1.0.0" },
-    { name = "robokop-genetics", specifier = ">=0.7.0" },
+    { name = "robokop-genetics", specifier = ">=0.8.0" },
     { name = "uuid-utils", specifier = ">=0.14.1" },
     { name = "xxhash", specifier = ">=3.6.0" },
 ]

From a050f3cbf2306de0a2f93cc8066deef9997d56c5 Mon Sep 17 00:00:00 2001
From: Evan Morris <evandietzmorris@gmail.com>
Date: Mon, 6 Apr 2026 21:55:22 -0700
Subject: [PATCH 14/14] adding more comments to the env.example

---
 .env.example | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.env.example b/.env.example
index 75cecf91..856092b2 100644
--- a/.env.example
+++ b/.env.example
@@ -1,3 +1,8 @@
+# Copy or rename this file to ".env" to use it for environment variable configurations.
+#
+# ATTENTION: The only required environment variables are ORION_STORAGE and ORION_GRAPHS. The rest are optional and it's
+# usually fine to leave them commented out or delete them, as the ORION config module will assign defaults.
+
 # ---- Storage & Output ----
 
 # Directory for source data downloads and ingest pipeline files
@@ -9,7 +14,8 @@ ORION_GRAPHS=~/ORION_graphs/
 # Directory for log files (if unset, logs go to stdout only)
 # ORION_LOGS=
 
-# Base URL used when generating graph metadata
+# Base URL utilized to generate URI identifiers utilized by metadata.
+# For example, ROBOKOP graphs use https://robokop.renci.org/
 # ORION_OUTPUT_URL=https://localhost
 
 # ---- Graph Spec ----
@@ -27,7 +33,7 @@ ORION_GRAPHS=~/ORION_graphs/
 
 # ---- Biolink Model ----
 
-# Biolink model version
+# Biolink model version (optional - don't set this and ORION will use the latest)
 # BL_VERSION=v4.3.4
 
 # ---- Normalization URLs ----