Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 0 additions & 174 deletions CODE/Generate_replicates_for_network_inference.ipynb

This file was deleted.

119 changes: 119 additions & 0 deletions CODE/Generate_replicates_for_network_inference.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""
This script is intended to generate X replicates (default = 10) for each of the DREAM (3/4)
networks to check inference algorithm performance.
The pipeline works as follows:
1) For each network kinetic model file (.xml)
Repeat X times:
2) GNW: generate expression data (it generates it in the app folder due to a bug)
3) Make a new subdirectory for replicate_X (including all parent directories)
4) Move expression data files to the network subdirectory
5) Combine Steady-State expression data files to one file

Written by: Lior Shachaf
2020-11-09

Example usage:
python3 Generate_replicates_for_network_inference.py -r 10 -o ~/Data/ --path_to_gnw ~/Code/genenetweaver/ dream3
"""

import argparse
import os
import shutil # Import shutil for moving files across devices
import subprocess


def generate_replicates(dream_version, replicates, output_path, path_to_gnw):
"""
Generate replicates for each network XML file in the specified directory.

Parameters:
dream_version (str): The DREAM version (dream3 or dream4).
replicates (int): Number of replicates to generate.
output_path (str): Path to store the generated data.
path_to_gnw (str): Path to the GeneNetWeaver (GNW) installation.

Returns:
None
"""
# Define paths based on user input
path_to_gnw = os.path.abspath(os.path.expanduser(path_to_gnw))
path_to_dreamX_networks = os.path.join(path_to_gnw, f'./src/ch/epfl/lis/networks/{dream_version}')
output_path = os.path.abspath(os.path.join(os.path.expanduser(output_path), dream_version))

# Iterate over all network XML files
for file in os.listdir(path_to_dreamX_networks):
if file.endswith('.xml'):
print(file)
filename = os.path.splitext(file)[0]

# Generate replicates
for replicate in range(1, replicates + 1):
# Create target directory for the replicate
replicate_dir = os.path.join(output_path, filename, f'rep_{replicate}')
os.makedirs(replicate_dir, exist_ok=True)

# Run the GNW simulation
subprocess.run(['java', '-jar', os.path.join(path_to_gnw, 'gnw-3.1.2b.jar'),
'--simulate', '-c', 'settings.txt', '--input-net', os.path.join(path_to_dreamX_networks, file)])

# Move generated files to the target directory
for generated_file in os.listdir(os.getcwd()):
if generated_file.startswith(filename):
shutil.move(os.path.join(os.getcwd(), generated_file), os.path.join(replicate_dir, generated_file))

print("\nReplicates generation done\n")


def combine_steady_state_data(dream_version, output_path):
"""
Combine Steady-State expression data (wildtype, multifactorial, knockdowns, knockouts, dualknockouts) into one file.

Parameters:
dream_version (str): The DREAM version (dream3 or dream4).
output_path (str): Path to store the combined data.

Returns:
None
"""
# Change to specific DREAM data folder containing the different network folders
path_to_data = os.path.abspath(os.path.join(os.path.expanduser(output_path), dream_version))
os.chdir(path_to_data)

data_type_list = ["wildtype", "multifactorial", "knockdowns", "knockouts", "dualknockouts"]

for network_name in os.listdir():
if os.path.isdir(network_name):
os.chdir(network_name)

for replicate in os.listdir():
if os.path.isdir(replicate):
os.chdir(replicate)

output_file_name = f"{network_name}_SS_all.tsv"
with open(output_file_name, "w") as output_file:
for data_type in data_type_list:
input_file = f"{network_name}_{data_type}.tsv"
with open(input_file, "r") as in1:
data1 = in1.readlines()

for line in data1:
if "G1" not in line:
output_file.write(line)

os.chdir('..')
os.chdir('..')

print("Combine steady-states done")


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate replicates and combine steady-state data for DREAM networks.")
parser.add_argument("dream_version", choices=["dream3", "dream4"], help="Specify the DREAM version (dream3 or dream4).")
parser.add_argument("-r", "--replicates", type=int, default=10, help="Number of replicates to generate. Default=10")
parser.add_argument("-o", "--output_path", type=str, default=".", help="Output path to store the data. Default=.")
parser.add_argument("--path_to_gnw", type=str, default="~/genenetweaver/", help="Path to GeneNetWeaver (GNW) installation.")

args = parser.parse_args()

generate_replicates(args.dream_version, args.replicates, args.output_path, args.path_to_gnw)
combine_steady_state_data(args.dream_version, args.output_path)
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ deactivate

### Simulating/generating gene expression data:<br>
The software GeneNetWeaver used to generate the datasets in the current study is available in the GitHub repository, [https://github.com/tschaffter/genenetweaver](https://github.com/tschaffter/genenetweaver)<br>
We used our Jupyter notebook [Generate_replicates_for_network_inference.ipynb](/CODE/Generate_replicates_for_network_inference.ipynb) to call GeneNetWeaver to generate multiple replicates of steady-state and time-series gene expression datasets for realistic in silico networks of sizes of 50, and 100 genes containing various experimental conditions (knockouts, knockdowns, multifactorial perturbation, etc.).<br>
We used our Jupyter notebook [Generate_replicates_for_network_inference.py](/CODE/Generate_replicates_for_network_inference.py) to call GeneNetWeaver to generate multiple replicates of steady-state and time-series gene expression datasets for realistic in silico networks of sizes of 50, and 100 genes containing various experimental conditions (knockouts, knockdowns, multifactorial perturbation, etc.).<br>
We then combine all the steady-state expression data to a single file: {network name}_all.tsv<br>
<br>
The data we used for the paper can also be downloaded from the [DATA](/DATA/) folder in this repo.<br>
Expand Down