diff --git a/CODE/Generate_replicates_for_network_inference.ipynb b/CODE/Generate_replicates_for_network_inference.ipynb deleted file mode 100644 index ecdf46e..0000000 --- a/CODE/Generate_replicates_for_network_inference.ipynb +++ /dev/null @@ -1,174 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "This script is intended to generate 10(0) replicates for each of the DREAM3/4 networks to check inference algo performence\n", - "The pipeline works as follows:\n", - "1) For each network kinetic model file (.xml)\n", - "Repeat 10(0) times:\n", - "2) GNW: generate expression data (it generate it in the app folder due to a bug)\n", - "3) make a new subdirectory for replicate_X (including all parent directories)\n", - "4) move expression data files to the network subdirectory\n", - "5) Combine Steady-State expression data files to one file\n", - "\n", - "Written by: Lior Shachaf\n", - "2020-11-09\n", - "\n", - "2021-07-20: added dream4 option, more comments and variables for path instead of hard-coding\n", - "2021-07-30: Replaced the last bash block responding to appending all steady-state data file into one, with a python equivalent\n", - "\"\"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Instructions:\n", - "
make sure settings.txt file in GNW is unchanged or set to default
\n", - "change dream3 or dream4 according to usage in 3 places in the notebook
\n", - "change \"targetpath\" to where you want to store the data
" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Change directory to where GNW is installed:\n", - "#path_to_gnw = '/home/local/WIN/lshacha1/GNW/gnw-3.1.2b' # long path\n", - "path_to_gnw = os.path.expanduser('~/GNW/gnw-3.1.2b')\n", - "os.chdir(path_to_gnw)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%%bash\n", - "path_to_dreamX_networks=\"~/GNW/gnw-3.1.2b/src/ch/epfl/lis/networks/dream4\"\n", - "eval path_to_dreamX_networks=$path_to_dreamX_networks # to expand Tilde\n", - "# iterating over all network XML files\n", - "for file in ${path_to_dreamX_networks}/*;\n", - "do\n", - "echo $file;\n", - "filename=$(basename $file .xml);\n", - "targetpath=\"~/DATA/Networks/Replicates_for_network_inference/dream4\";\n", - "eval targetpath=$targetpath # to expand Tilde\n", - "# generating 10 replicates. If this is changed please update cell below as well\n", - "for replicate in {1..10};\n", - "do\n", - "java -jar gnw-3.1.2b.jar --simulate -c settings.txt --input-net ${path_to_dreamX_networks}/${filename}.xml\n", - "mkdir -p ${targetpath}/${filename}/rep_${replicate}/;\n", - "mv ${filename}* ${targetpath}/${filename}/rep_${replicate}/;\n", - "done;\n", - "done;\n", - "\n", - "echo \"done\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Combine Steady-State expression data (wildtype, multifactorial, knockdowns, knockouts, dualknockouts) to one file" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "All done\n" - ] - } - ], - "source": [ - "\"\"\" iterate over all networks \n", - "and for each network iterate over all replicates \n", - "and for each replicate append the 5 steady-state \n", - "data files into one file {Network name}_SS_all.tsv \n", - "\"\"\"\n", - "# Change to specific DREAM data folder containing the different network folders\n", - "path_to_data = os.path.expanduser('../DATA/dream3/')\n", - "# path_to_data = os.path.expanduser('../DATA/dream4/')\n", - "os.chdir(path_to_data)\n", - "\n", - "data_type_list = [\"wildtype\", \"multifactorial\", \"knockdowns\", \"knockouts\", \"dualknockouts\"]\n", - "\n", - "for network_name in os.listdir():\n", - " # if \"100_\" in network_name: # Debug\n", - " # continue\n", - " \n", - " if os.path.isdir(network_name) == True:\n", - " os.chdir(f\"./{network_name}\")\n", - " \n", - " for replicate in os.listdir():\n", - "\n", - " if os.path.isdir(replicate) == True:\n", - " os.chdir(f\"./{replicate}\")\n", - " \n", - " output_file_name = f\"{network_name}_SS_all.tsv\"\n", - " output_file = open(output_file_name, \"w\")\n", - " \n", - " for data_type in data_type_list:\n", - " input_file = f\"{network_name}_{data_type}.tsv\"\n", - " in1 = open(input_file, \"r\")\n", - " data1 = in1.readlines()\n", - " in1.close()\n", - " \n", - " for line in data1:\n", - " if \"G1\" not in line:\n", - " output_file.write(line)\n", - " \n", - " output_file.close()\n", - " \n", - " os.chdir('../')\n", - " os.chdir('../')\n", - "\n", - "print(\"All done\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.11" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/CODE/Generate_replicates_for_network_inference.py b/CODE/Generate_replicates_for_network_inference.py new file mode 100644 index 0000000..90b7f52 --- /dev/null +++ b/CODE/Generate_replicates_for_network_inference.py @@ -0,0 +1,119 @@ +""" +This script is intended to generate X replicates (default = 10) for each of the DREAM (3/4) +networks to check inference algorithm performance. +The pipeline works as follows: +1) For each network kinetic model file (.xml) +Repeat X times: +2) GNW: generate expression data (it generates it in the app folder due to a bug) +3) Make a new subdirectory for replicate_X (including all parent directories) +4) Move expression data files to the network subdirectory +5) Combine Steady-State expression data files to one file + +Written by: Lior Shachaf +2020-11-09 + +Example usage: +python3 Generate_replicates_for_network_inference.py -r 10 -o ~/Data/ --path_to_gnw ~/Code/genenetweaver/ dream3 +""" + +import argparse +import os +import shutil # Import shutil for moving files across devices +import subprocess + + +def generate_replicates(dream_version, replicates, output_path, path_to_gnw): + """ + Generate replicates for each network XML file in the specified directory. + + Parameters: + dream_version (str): The DREAM version (dream3 or dream4). + replicates (int): Number of replicates to generate. + output_path (str): Path to store the generated data. + path_to_gnw (str): Path to the GeneNetWeaver (GNW) installation. + + Returns: + None + """ + # Define paths based on user input + path_to_gnw = os.path.abspath(os.path.expanduser(path_to_gnw)) + path_to_dreamX_networks = os.path.join(path_to_gnw, f'./src/ch/epfl/lis/networks/{dream_version}') + output_path = os.path.abspath(os.path.join(os.path.expanduser(output_path), dream_version)) + + # Iterate over all network XML files + for file in os.listdir(path_to_dreamX_networks): + if file.endswith('.xml'): + print(file) + filename = os.path.splitext(file)[0] + + # Generate replicates + for replicate in range(1, replicates + 1): + # Create target directory for the replicate + replicate_dir = os.path.join(output_path, filename, f'rep_{replicate}') + os.makedirs(replicate_dir, exist_ok=True) + + # Run the GNW simulation + subprocess.run(['java', '-jar', os.path.join(path_to_gnw, 'gnw-3.1.2b.jar'), + '--simulate', '-c', 'settings.txt', '--input-net', os.path.join(path_to_dreamX_networks, file)]) + + # Move generated files to the target directory + for generated_file in os.listdir(os.getcwd()): + if generated_file.startswith(filename): + shutil.move(os.path.join(os.getcwd(), generated_file), os.path.join(replicate_dir, generated_file)) + + print("\nReplicates generation done\n") + + +def combine_steady_state_data(dream_version, output_path): + """ + Combine Steady-State expression data (wildtype, multifactorial, knockdowns, knockouts, dualknockouts) into one file. + + Parameters: + dream_version (str): The DREAM version (dream3 or dream4). + output_path (str): Path to store the combined data. + + Returns: + None + """ + # Change to specific DREAM data folder containing the different network folders + path_to_data = os.path.abspath(os.path.join(os.path.expanduser(output_path), dream_version)) + os.chdir(path_to_data) + + data_type_list = ["wildtype", "multifactorial", "knockdowns", "knockouts", "dualknockouts"] + + for network_name in os.listdir(): + if os.path.isdir(network_name): + os.chdir(network_name) + + for replicate in os.listdir(): + if os.path.isdir(replicate): + os.chdir(replicate) + + output_file_name = f"{network_name}_SS_all.tsv" + with open(output_file_name, "w") as output_file: + for data_type in data_type_list: + input_file = f"{network_name}_{data_type}.tsv" + with open(input_file, "r") as in1: + data1 = in1.readlines() + + for line in data1: + if "G1" not in line: + output_file.write(line) + + os.chdir('..') + os.chdir('..') + + print("Combine steady-states done") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate replicates and combine steady-state data for DREAM networks.") + parser.add_argument("dream_version", choices=["dream3", "dream4"], help="Specify the DREAM version (dream3 or dream4).") + parser.add_argument("-r", "--replicates", type=int, default=10, help="Number of replicates to generate. Default=10") + parser.add_argument("-o", "--output_path", type=str, default=".", help="Output path to store the data. Default=.") + parser.add_argument("--path_to_gnw", type=str, default="~/genenetweaver/", help="Path to GeneNetWeaver (GNW) installation.") + + args = parser.parse_args() + + generate_replicates(args.dream_version, args.replicates, args.output_path, args.path_to_gnw) + combine_steady_state_data(args.dream_version, args.output_path) diff --git a/README.md b/README.md index 4fd69c8..7799ad5 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ deactivate ### Simulating/generating gene expression data: