Skip to content

Commit 88f65e5

Browse files
committed
Reuse file_path instead of introducing another document_path parameter
1 parent 9e151d1 commit 88f65e5

File tree

6 files changed

+7
-21
lines changed

6 files changed

+7
-21
lines changed

docs/source/user_guide_kg_builder.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,6 @@ Run Parameters
224224

225225
SimpleKGPipeline also accepts addition runtime parameters:
226226

227-
- ``document_path`` (str): only used when ``from_pdf=False``, this is the path property of the ``Document`` node.
228227
- ``document_metadata`` (dict): each item will be saved as a property attached to the ``Document`` node.
229228

230229

examples/build_graph/simple_kg_builder_from_text.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,9 @@ async def define_and_run_pipeline(
8181
)
8282
return await kg_builder.run_async(
8383
text=TEXT,
84-
# optional, specify document path for the Document node
84+
# optional, specify file path for the Document node
8585
# if not, a random name will be generated
86-
# document_path="my_document.txt"
86+
# file_path="my_document.txt"
8787
# optional, add document metadata, each item will
8888
# be saved as a property of the Document node
8989
# document_metadata={"author": "Frank Herbert"},

src/neo4j_graphrag/experimental/pipeline/config/template_pipeline/simple_kg_builder.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -339,8 +339,8 @@ def _get_connections(self) -> list[ConnectionDefinition]:
339339
def get_run_params(self, user_input: dict[str, Any]) -> dict[str, Any]:
340340
text = user_input.get("text")
341341
file_path = user_input.get("file_path")
342-
if not ((text is None) ^ (file_path is None)):
343-
# exactly one of text or user_input must be set
342+
if text is None and file_path is None:
343+
# use must provide either text or file_path or both
344344
raise PipelineDefinitionError(
345345
"Use either 'text' (when from_pdf=False) or 'file_path' (when from_pdf=True) argument."
346346
)
@@ -367,7 +367,7 @@ def get_run_params(self, user_input: dict[str, Any]) -> dict[str, Any]:
367367
run_params["schema"]["text"] = text
368368
run_params["extractor"]["document_info"] = dict(
369369
path=user_input.get(
370-
"document_path",
370+
"file_path",
371371
)
372372
or "document.txt",
373373
metadata=user_input.get("document_metadata"),

src/neo4j_graphrag/experimental/pipeline/kg_builder.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,16 +148,14 @@ async def run_async(
148148
self,
149149
file_path: Optional[str] = None,
150150
text: Optional[str] = None,
151-
document_path: Optional[str] = None,
152151
document_metadata: Optional[dict[str, Any]] = None,
153152
) -> PipelineResult:
154153
"""
155154
Asynchronously runs the knowledge graph building process.
156155
157156
Args:
158-
file_path (Optional[str]): The path to the PDF file to process. Required if `from_pdf` is True.
157+
file_path (Optional[str]): The path to the PDF file to process. Required if `from_pdf` is True. If `from_pdf` is False, can be used to set the Document node path property.
159158
text (Optional[str]): The text content to process. Required if `from_pdf` is False.
160-
document_path (Optional[str]): The path to the document to process. Required if `from_pdf` is True.
161159
document_metadata (Optional[dict[str, Any]]): The metadata to attach to the document.
162160
163161
Returns:
@@ -167,7 +165,6 @@ async def run_async(
167165
{
168166
"file_path": file_path,
169167
"text": text,
170-
"document_path": document_path,
171168
"document_metadata": document_metadata,
172169
}
173170
)

tests/unit/experimental/pipeline/config/template_pipeline/test_simple_kg_builder.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -322,16 +322,6 @@ def test_simple_kg_pipeline_config_run_params_no_file_no_text() -> None:
322322
)
323323

324324

325-
def test_simple_kg_pipeline_config_run_params_both_file_and_text() -> None:
326-
config = SimpleKGPipelineConfig(from_pdf=False)
327-
with pytest.raises(PipelineDefinitionError) as excinfo:
328-
config.get_run_params({"text": "my text", "file_path": "my file"})
329-
assert (
330-
"Use either 'text' (when from_pdf=False) or 'file_path' (when from_pdf=True) argument."
331-
in str(excinfo)
332-
)
333-
334-
335325
def test_simple_kg_pipeline_config_process_schema_with_precedence_legacy() -> None:
336326
entities: list[EntityInputType] = [
337327
"Person",

tests/unit/experimental/pipeline/test_kg_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ async def test_knowledge_graph_builder_document_info_with_text(_: Mock) -> None:
9090
) as mock_run:
9191
await kg_builder.run_async(
9292
text=text_input,
93-
document_path="my_document.txt",
93+
file_path="my_document.txt",
9494
document_metadata={"source": "google drive"},
9595
)
9696

0 commit comments

Comments
 (0)