diff --git a/CODE/Generate_replicates_for_network_inference.ipynb b/CODE/Generate_replicates_for_network_inference.ipynb deleted file mode 100644 index ecdf46e..0000000 --- a/CODE/Generate_replicates_for_network_inference.ipynb +++ /dev/null @@ -1,174 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "This script is intended to generate 10(0) replicates for each of the DREAM3/4 networks to check inference algo performence\n", - "The pipeline works as follows:\n", - "1) For each network kinetic model file (.xml)\n", - "Repeat 10(0) times:\n", - "2) GNW: generate expression data (it generate it in the app folder due to a bug)\n", - "3) make a new subdirectory for replicate_X (including all parent directories)\n", - "4) move expression data files to the network subdirectory\n", - "5) Combine Steady-State expression data files to one file\n", - "\n", - "Written by: Lior Shachaf\n", - "2020-11-09\n", - "\n", - "2021-07-20: added dream4 option, more comments and variables for path instead of hard-coding\n", - "2021-07-30: Replaced the last bash block responding to appending all steady-state data file into one, with a python equivalent\n", - "\"\"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instructions:\n", - "

make sure settings.txt file in GNW is unchanged or set to default

\n", - "

change dream3 or dream4 according to usage in 3 places in the notebook

\n", - "

change \"targetpath\" to where you want to store the data

" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Change directory to where GNW is installed:\n", - "#path_to_gnw = '/home/local/WIN/lshacha1/GNW/gnw-3.1.2b' # long path\n", - "path_to_gnw = os.path.expanduser('~/GNW/gnw-3.1.2b')\n", - "os.chdir(path_to_gnw)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "path_to_dreamX_networks=\"~/GNW/gnw-3.1.2b/src/ch/epfl/lis/networks/dream4\"\n", - "eval path_to_dreamX_networks=$path_to_dreamX_networks # to expand Tilde\n", - "# iterating over all network XML files\n", - "for file in ${path_to_dreamX_networks}/*;\n", - "do\n", - "echo $file;\n", - "filename=$(basename $file .xml);\n", - "targetpath=\"~/DATA/Networks/Replicates_for_network_inference/dream4\";\n", - "eval targetpath=$targetpath # to expand Tilde\n", - "# generating 10 replicates. If this is changed please update cell below as well\n", - "for replicate in {1..10};\n", - "do\n", - "java -jar gnw-3.1.2b.jar --simulate -c settings.txt --input-net ${path_to_dreamX_networks}/${filename}.xml\n", - "mkdir -p ${targetpath}/${filename}/rep_${replicate}/;\n", - "mv ${filename}* ${targetpath}/${filename}/rep_${replicate}/;\n", - "done;\n", - "done;\n", - "\n", - "echo \"done\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Combine Steady-State expression data (wildtype, multifactorial, knockdowns, knockouts, dualknockouts) to one file" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "All done\n" - ] - } - ], - "source": [ - "\"\"\" iterate over all networks \n", - "and for each network iterate over all replicates \n", - "and for each replicate append the 5 steady-state \n", - "data files into one file {Network name}_SS_all.tsv \n", - "\"\"\"\n", - "# Change to specific DREAM data folder containing the different network folders\n", - "path_to_data = os.path.expanduser('../DATA/dream3/')\n", - "# path_to_data = os.path.expanduser('../DATA/dream4/')\n", - "os.chdir(path_to_data)\n", - "\n", - "data_type_list = [\"wildtype\", \"multifactorial\", \"knockdowns\", \"knockouts\", \"dualknockouts\"]\n", - "\n", - "for network_name in os.listdir():\n", - " # if \"100_\" in network_name: # Debug\n", - " # continue\n", - " \n", - " if os.path.isdir(network_name) == True:\n", - " os.chdir(f\"./{network_name}\")\n", - " \n", - " for replicate in os.listdir():\n", - "\n", - " if os.path.isdir(replicate) == True:\n", - " os.chdir(f\"./{replicate}\")\n", - " \n", - " output_file_name = f\"{network_name}_SS_all.tsv\"\n", - " output_file = open(output_file_name, \"w\")\n", - " \n", - " for data_type in data_type_list:\n", - " input_file = f\"{network_name}_{data_type}.tsv\"\n", - " in1 = open(input_file, \"r\")\n", - " data1 = in1.readlines()\n", - " in1.close()\n", - " \n", - " for line in data1:\n", - " if \"G1\" not in line:\n", - " output_file.write(line)\n", - " \n", - " output_file.close()\n", - " \n", - " os.chdir('../')\n", - " os.chdir('../')\n", - "\n", - "print(\"All done\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/CODE/Generate_replicates_for_network_inference.py b/CODE/Generate_replicates_for_network_inference.py new file mode 100644 index 0000000..90b7f52 --- /dev/null +++ b/CODE/Generate_replicates_for_network_inference.py @@ -0,0 +1,119 @@ +""" +This script is intended to generate X replicates (default = 10) for each of the DREAM (3/4) +networks to check inference algorithm performance. +The pipeline works as follows: +1) For each network kinetic model file (.xml) +Repeat X times: +2) GNW: generate expression data (it generates it in the app folder due to a bug) +3) Make a new subdirectory for replicate_X (including all parent directories) +4) Move expression data files to the network subdirectory +5) Combine Steady-State expression data files to one file + +Written by: Lior Shachaf +2020-11-09 + +Example usage: +python3 Generate_replicates_for_network_inference.py -r 10 -o ~/Data/ --path_to_gnw ~/Code/genenetweaver/ dream3 +""" + +import argparse +import os +import shutil # Import shutil for moving files across devices +import subprocess + + +def generate_replicates(dream_version, replicates, output_path, path_to_gnw): + """ + Generate replicates for each network XML file in the specified directory. + + Parameters: + dream_version (str): The DREAM version (dream3 or dream4). + replicates (int): Number of replicates to generate. + output_path (str): Path to store the generated data. + path_to_gnw (str): Path to the GeneNetWeaver (GNW) installation. + + Returns: + None + """ + # Define paths based on user input + path_to_gnw = os.path.abspath(os.path.expanduser(path_to_gnw)) + path_to_dreamX_networks = os.path.join(path_to_gnw, f'./src/ch/epfl/lis/networks/{dream_version}') + output_path = os.path.abspath(os.path.join(os.path.expanduser(output_path), dream_version)) + + # Iterate over all network XML files + for file in os.listdir(path_to_dreamX_networks): + if file.endswith('.xml'): + print(file) + filename = os.path.splitext(file)[0] + + # Generate replicates + for replicate in range(1, replicates + 1): + # Create target directory for the replicate + replicate_dir = os.path.join(output_path, filename, f'rep_{replicate}') + os.makedirs(replicate_dir, exist_ok=True) + + # Run the GNW simulation + subprocess.run(['java', '-jar', os.path.join(path_to_gnw, 'gnw-3.1.2b.jar'), + '--simulate', '-c', 'settings.txt', '--input-net', os.path.join(path_to_dreamX_networks, file)]) + + # Move generated files to the target directory + for generated_file in os.listdir(os.getcwd()): + if generated_file.startswith(filename): + shutil.move(os.path.join(os.getcwd(), generated_file), os.path.join(replicate_dir, generated_file)) + + print("\nReplicates generation done\n") + + +def combine_steady_state_data(dream_version, output_path): + """ + Combine Steady-State expression data (wildtype, multifactorial, knockdowns, knockouts, dualknockouts) into one file. + + Parameters: + dream_version (str): The DREAM version (dream3 or dream4). + output_path (str): Path to store the combined data. + + Returns: + None + """ + # Change to specific DREAM data folder containing the different network folders + path_to_data = os.path.abspath(os.path.join(os.path.expanduser(output_path), dream_version)) + os.chdir(path_to_data) + + data_type_list = ["wildtype", "multifactorial", "knockdowns", "knockouts", "dualknockouts"] + + for network_name in os.listdir(): + if os.path.isdir(network_name): + os.chdir(network_name) + + for replicate in os.listdir(): + if os.path.isdir(replicate): + os.chdir(replicate) + + output_file_name = f"{network_name}_SS_all.tsv" + with open(output_file_name, "w") as output_file: + for data_type in data_type_list: + input_file = f"{network_name}_{data_type}.tsv" + with open(input_file, "r") as in1: + data1 = in1.readlines() + + for line in data1: + if "G1" not in line: + output_file.write(line) + + os.chdir('..') + os.chdir('..') + + print("Combine steady-states done") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate replicates and combine steady-state data for DREAM networks.") + parser.add_argument("dream_version", choices=["dream3", "dream4"], help="Specify the DREAM version (dream3 or dream4).") + parser.add_argument("-r", "--replicates", type=int, default=10, help="Number of replicates to generate. Default=10") + parser.add_argument("-o", "--output_path", type=str, default=".", help="Output path to store the data. Default=.") + parser.add_argument("--path_to_gnw", type=str, default="~/genenetweaver/", help="Path to GeneNetWeaver (GNW) installation.") + + args = parser.parse_args() + + generate_replicates(args.dream_version, args.replicates, args.output_path, args.path_to_gnw) + combine_steady_state_data(args.dream_version, args.output_path) diff --git a/README.md b/README.md index 4fd69c8..7799ad5 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ deactivate ### Simulating/generating gene expression data:
The software GeneNetWeaver used to generate the datasets in the current study is available in the GitHub repository, [https://github.com/tschaffter/genenetweaver](https://github.com/tschaffter/genenetweaver)
-We used our Jupyter notebook [Generate_replicates_for_network_inference.ipynb](/CODE/Generate_replicates_for_network_inference.ipynb) to call GeneNetWeaver to generate multiple replicates of steady-state and time-series gene expression datasets for realistic in silico networks of sizes of 50, and 100 genes containing various experimental conditions (knockouts, knockdowns, multifactorial perturbation, etc.).
+We used our Jupyter notebook [Generate_replicates_for_network_inference.py](/CODE/Generate_replicates_for_network_inference.py) to call GeneNetWeaver to generate multiple replicates of steady-state and time-series gene expression datasets for realistic in silico networks of sizes of 50, and 100 genes containing various experimental conditions (knockouts, knockdowns, multifactorial perturbation, etc.).
We then combine all the steady-state expression data to a single file: {network name}_all.tsv

The data we used for the paper can also be downloaded from the [DATA](/DATA/) folder in this repo.