Skip to content

Commit add8928

Browse files
Added new commands to reproschema (#90)
* Added new commands to reproschema * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Removed fstrings * Removed Bastract class * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * removed extra imports and renamed variables * Added examples and documentation * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added descriptions and updated function names * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent e161ae6 commit add8928

37 files changed

+2384
-2
lines changed

README.md

+23-1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ Commands:
5656
reproschema2redcap Converts reproschema protocol to REDCap CSV format.
5757
serve
5858
validate Validates if the path has a valid reproschema format
59+
reproschema2fhir Generates FHIR questionnaire resources from reproschema activities
60+
output2redcap Generates redcap csv given the audio and survey data from reproschema ui
5961
```
6062

6163
## `reproschema2redcap`
@@ -116,7 +118,7 @@ redcap_version: "X.XX.X"
116118
```
117119
### CLI Usage
118120
119-
The `redcap2reproschema`` function has been integrated into a CLI tool, use the following command:
121+
The `redcap2reproschema` function has been integrated into a CLI tool, use the following command:
120122
```bash
121123
reproschema redcap2reproschema path/to/your_redcap_data_dic.csv path/to/your_redcap2rs.yaml
122124
```
@@ -136,5 +138,25 @@ output_path = "path-to/directory_you_want_to_save_output"
136138
redcap2reproschema(csv_file, yaml_file, output_path)
137139
```
138140

141+
## `output2redcap`
142+
The `output2redcap` function is designed to process the output from reproschema-ui into a REDCap CSV file as seen [here](reproschema/example/redcap).
143+
144+
145+
### CLI Usage
146+
147+
The `output2redcap` function has been integrated into a CLI tool, use the following command:
148+
```bash
149+
reproschema output2redcap ./path/to/your_reproschema-ui_files ./path/to/directory_you_want_to_save_output
150+
```
151+
152+
## `reproschema2fhir`
153+
The `reproschema2fhir` function is designed to convert reproschema activities and items into a FHIR Questionnaire resource as seen [here](reproschema/example/fhir).
154+
155+
### CLI Usage
156+
157+
The `reproschema2fhir` function has been integrated into a CLI tool, use the following command:
158+
```bash
159+
reproschema reproschema2fhir ./path/to/your_reproschema_activities ./path/to/directory_you_want_to_save_output
160+
```
139161
### Notes
140162
1. The script requires an active internet connection to access the GitHub repository.

pyproject.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ dependencies = [
1515
"beautifulsoup4",
1616
"lxml",
1717
"pydantic >= 2.0",
18-
"pandas"
18+
"pandas",
19+
"fhir.resources>=v8.0.0"
1920
]
2021
description = "Reproschema Python library"
2122
# Version from setuptools_scm

reproschema/cli.py

+123
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
1+
import json
12
import os
3+
import shutil
4+
from collections import OrderedDict
25
from pathlib import Path
36

47
import click
8+
import pandas as pd
9+
from fhir.resources.questionnaire import Questionnaire
510

611
from . import __version__, get_logger, set_logger_level
712
from .migrate import migrate2newschema
13+
from .output2redcap import parse_survey
814
from .redcap2reproschema import redcap2reproschema as redcap2rs
15+
from .reproschema2fhir import convert_to_fhir
916
from .reproschema2redcap import reproschema2redcap as rs2redcap
1017

1118
lgr = get_logger()
@@ -179,3 +186,119 @@ def reproschema2redcap(input_path, output_csv_path):
179186
click.echo(
180187
f"Converted reproschema protocol from {input_path} to Redcap CSV at {output_csv_path}"
181188
)
189+
190+
191+
@main.command()
192+
@click.argument("survey_file", type=str)
193+
@click.argument("redcap_csv", type=str)
194+
def output2redcap(survey_file, redcap_csv):
195+
"""
196+
Generates redcap csv given the audio and survey data from reproschema ui
197+
198+
survey_file is the location of the surveys generated from reproschema ui
199+
redcap_csv is the path to store the newly generated redcap csv
200+
201+
"""
202+
merged_questionnaire_data = []
203+
# load each file recursively within the folder into its own key
204+
content = OrderedDict()
205+
for file in Path(survey_file).rglob("*"):
206+
if file.is_file():
207+
filename = str(file.relative_to(survey_file))
208+
with open(f"{survey_file}/{filename}", "r") as f:
209+
content[filename] = json.load(f)
210+
211+
for questionnaire in content.keys(): # activity files
212+
try:
213+
record_id = (survey_file.split("/")[-1]).split()[0]
214+
survey_data = content[questionnaire]
215+
merged_questionnaire_data += parse_survey(
216+
survey_data, record_id, questionnaire
217+
)
218+
except Exception:
219+
continue
220+
221+
survey_df = pd.concat(merged_questionnaire_data, ignore_index=True)
222+
Path(redcap_csv).mkdir(parents=True, exist_ok=True)
223+
224+
merged_csv_path = os.path.join(redcap_csv, "redcap.csv")
225+
survey_df.to_csv(merged_csv_path, index=False)
226+
click.echo(
227+
f"Converted reproschema-ui output from {survey_file} to Redcap CSV at {redcap_csv}"
228+
)
229+
230+
231+
@main.command()
232+
@click.argument("reproschema_questionnaire", type=str)
233+
@click.argument("output", type=str)
234+
def reproschema2fhir(reproschema_questionnaire, output):
235+
"""
236+
Generates FHIR questionnaire resources from reproschema activities
237+
238+
reproschema_questionnaire is the location of all reproschema activities
239+
output is the path to store the newly generated fhir json
240+
"""
241+
output_path = Path(output)
242+
reproschema_folders = Path(reproschema_questionnaire)
243+
if not os.path.isdir(reproschema_folders):
244+
raise FileNotFoundError(
245+
f"{reproschema_folders} does not exist. Please check if folder exists and is located at the correct directory"
246+
)
247+
reproschema_folders = [
248+
Path(f) for f in reproschema_folders.iterdir() if f.is_dir()
249+
]
250+
for reproschema_folder in reproschema_folders:
251+
# load each file recursively within the folder into its own key in the reproschema_content dict
252+
reproschema_content = OrderedDict()
253+
for file in reproschema_folder.glob("**/*"):
254+
if file.is_file():
255+
# get the full path to the file *after* the base reproschema_folder path
256+
# since files can be referenced by relative paths, we need to keep track of relative location
257+
filename = str(file.relative_to(reproschema_folder))
258+
with open(f"{reproschema_folder}/{filename}") as f:
259+
reproschema_content[filename] = json.loads(f.read())
260+
261+
schema_name = [
262+
name
263+
for name in (reproschema_content.keys())
264+
if name.endswith("_schema")
265+
][0]
266+
reproschema_schema = reproschema_content[schema_name]
267+
268+
if (
269+
(
270+
"schema:version" in reproschema_schema
271+
and reproschema_schema["schema:version"]
272+
not in ("0.0.1", "1.0.0-rc1", "1.0.0")
273+
)
274+
or "schemaVersion" in reproschema_schema
275+
and reproschema_schema["schemaVersion"]
276+
not in ("0.0.1", "1.0.0-rc1", "1.0.0-rc4", "1.0.0")
277+
):
278+
raise ValueError(
279+
"Unable to work with reproschema versions other than 0.0.1, 1.0.0-rc1, and 1.0.0-rc4"
280+
)
281+
282+
fhir_questionnaire = convert_to_fhir(reproschema_content)
283+
284+
# validate the json using fhir resources
285+
try:
286+
Questionnaire.model_validate(fhir_questionnaire)
287+
except Exception:
288+
raise Exception("Fhir Questionnaire is not valid")
289+
290+
# get filename from the reproschema_folder name provided
291+
292+
file_name = reproschema_folder.parts[-1]
293+
294+
dirpath = Path(output_path / file_name)
295+
if dirpath.exists() and dirpath.is_dir():
296+
shutil.rmtree(dirpath)
297+
298+
paths = [output_path / file_name]
299+
300+
for folder in paths:
301+
folder.mkdir(parents=True, exist_ok=True)
302+
303+
with open(output_path / f"{file_name}/{file_name}.json", "w+") as f:
304+
f.write(json.dumps(fhir_questionnaire))

0 commit comments

Comments
 (0)