From ab14cc9ac7729ec833bab4ae2ef7a6e49d7d3dbd Mon Sep 17 00:00:00 2001 From: Taveluz Date: Wed, 15 Nov 2017 16:06:08 -0500 Subject: [PATCH 1/5] The name of the package was changed from minval to g2f --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 64e6b2d..f8d0e7f 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -g2f : Find and Fill Gaps in Metabolic Networks +g2fg2f : Find and Fill Gaps in Metabolic Networks ====== The **g2f** package was designed as a tool to find and fill gaps in metabolic networks. For a given metabolic network, this package finds the gaps (metabolites not produced or not consumed in any other reaction), and fills it from the stoichiometric reactions of a reference metabolic reconstruction using a weighting function. Also the option to download all the set of gene-associated stoichiometric reactions for a specific organism from the KEGG database is available. @@ -11,7 +11,7 @@ For install the latest stable version this package directly from GitHub: # Install 'devtools' R package install.packages("devtools") -# Install 'minval' package +# Install 'g2f' package devtools::install_github("gibbslab/g2f") library("g2f") ``` From ac8f3696669b1811ed9aebffa92cb2ca47ad1847 Mon Sep 17 00:00:00 2001 From: Taveluz Date: Wed, 25 Apr 2018 10:27:51 -0500 Subject: [PATCH 2/5] Adding the evaluation file with g2f and GapFill, from Cobrapy, code --- Evaluation.Rmd | 235 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 Evaluation.Rmd diff --git a/Evaluation.Rmd b/Evaluation.Rmd new file mode 100644 index 0000000..32eccbd --- /dev/null +++ b/Evaluation.Rmd @@ -0,0 +1,235 @@ +--- +title: "Evaluation" +author: "Tain Velasco Luquez" +date: "19/04/2018" +output: html_document +editor_options: + chunk_output_type: console +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +## Opening +This file contain sthe comparison of g2f against other widely used algortihms to find a nd fill gaps in GEMs. + +### Required packages +```{r message=FALSE} +library("minval") +library("sybilSBML") +library("sybil") +library("glpkAPI") +library("g2f") +library("tictoc") +library("readr") +``` + +### Useful functions + +If the models has some syntax incompatibilities, this functions enables the localisation of the incompatibilities and replace them by the user define syntax. +```{r message=FALSE} +# FUN by Theodore lytras in https://stackoverflow.com/questions/15253954/replace-multiple-letters-with-accents-with-gsub/15256643?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa +mgsub <- function(pattern, replacement, x, ...) { + if (length(pattern)!=length(replacement)) { + stop("pattern and replacement do not have the same length.") + } + result <- x + for (i in 1:length(pattern)) { + result <- gsub(pattern[i], replacement[i], result, ...) + } + result +} +``` + +This functions retrieves an organism's GEM, in .xml extension from biomodels, and the complete set of reactions, in .csv extension from KEGG. +```{r message=FALSE} +my_models <- function(biomodel_ID, kegg_ID){ + # Both arguments must be of class character + # Go to http://rest.kegg.jp/list/organism for the KEGG_ID code + # Go to https://wwwdev.ebi.ac.uk/biomodels/ for the BioModel ID + + # Let's replace the url for the selected biomodel ID + my_model_url <- gsub("MODEL(.{10})", biomodel_ID, "https://wwwdev.ebi.ac.uk/biomodels/model/download/MODEL1507180017.2?filename=MODEL1507180017_url.xml") + + # And download it + download.file(my_model_url, paste0(kegg_ID, ".xml")) + + # Now lets get the reaction list and save it + write.csv(g2f::getReference(kegg_ID), file = paste0(kegg_ID, "_all.csv")) + + message("It's done MTF! -.-") +} +``` + +Extract reactions from a model +```{r message=FALSE} +# Function extracted from MinVal, by aniel Osorio +# modelData must be of class modelorg and kegg_ID of class character +my_reactions <- function(modelData, kegg_ID){ + + modelData <- minval:::convertData(model = modelData) + + outputData <- NULL + outputData$abbreviation <- modelData[["ID"]] + outputData$name <- modelData[["DESCRIPTION"]] + outputData$equation <- minval:::rearmReactions(S = minval::stoichiometricMatrix(modelData[["REACTION"]]), + reversible = grepl("<=>", modelData[["REACTION"]]), + type = "TSV", boundary = boundary) + outputData$reversible <- ifelse(test = grepl("<=>", modelData[["REACTION"]]), + yes = "reversible", no = "irreversible") + outputData$compartment <- unlist(lapply(modelData[["REACTION"]], + function(reaction) { + paste0(minval::compartments(reaction), collapse = " , ") + })) + outputData$lowbnd <- as.character(modelData[["LOWER.BOUND"]]) + outputData$uppbnd <- as.character(modelData[["UPPER.BOUND"]]) + outputData$obj_coef <- as.character(modelData[["OBJECTIVE"]]) + outputData$rule <- modelData[["GPR"]] + outputData$subsystem <- rep("", length(modelData[["REACTION"]])) + outputData <- data.frame(outputData) + + write.table(outputData, file = paste0(kegg_ID, "_react.csv"), + quote = TRUE, row.names = FALSE, sep = ",") +} +``` + +## Warming up + +We are going to load several GEMs their reaction lists and complete set of reactions from KEGG. +```{r message=FALSE} +# Download the models and its associated KEGG reactions +mapply(my_models, c("MODEL1507180048", "MODEL1507180017"), c("tma", "stm")) + +# Load all .xml models in the working directory to the .GlobalEnv and rename them adding _model +for (i in as.list(list.files(pattern = "\\.xml$"))) { + assign(gsub(".xml", "_model", i), sybilSBML::readSBMLmod(i), envir = .GlobalEnv) +} +# Load all reaction lists from KEGG per model +for (i in as.list(list.files(pattern = "\\_all.csv$"))) { + assign(gsub(".csv", "", i), read.csv(i), envir = .GlobalEnv) +} + +# Extract the reaction list of every .xml file in the working directory, save such reaction list to a .csv with the same name as the model +mapply(my_reactions, mget(setdiff(ls(pattern = "model"), lsf.str())), gsub("_all.csv", "", list.files(pattern = "\\_all.csv$"))) + +# And load them to the workspace +for (i in as.list(list.files(pattern = "\\_react.csv$"))) { + assign(gsub(".csv", "", i), read.csv(i), envir = .GlobalEnv) +} +``` + +Inasmuch as the gap filling functions is sensitive to incorrect syntax, let's check it's correctness +```{r} +# Is the syntax for the models correct? +sapply(mget(setdiff(ls(pattern = "react"), lsf.str())), function(i) { + suppressMessages(suppressWarnings(sum( + minval::validateSyntax(reactionList = i$equation) + ) == nrow(i))) +}) +``` + +As seen, the reaction list have an incorrect syntax. General mistakes are described in `r ?minval::validateSyntax `. Let's fix them: +```{r message=FALSE, eval=FALSE} +# Value for all data files (excluding functions) in the global environment are returned in the form of list +temp_var <- mget(setdiff(ls(pattern = "react"), lsf.str())) + +sapply(temp_var[[i]][, 3], function(i) { + assign(temp_var[[i]][, 3], mgsub( + c("-->", '<==>', '->', "\\(|\\)"), + c("=>", '<=>', "=>", ""), + temp_var[[i]][, 3] + ), envir = .GlobalEnv) +}) + + + +``` +```{r} +stm_react$equation <- mgsub(c("-->", '<==>', '->', "\\(|\\)"), c("=>", '<=>', "=>", ""), stm_react$equation) + +tma_react$equation <- mgsub(c("-->", '<==>', '->', "\\(|\\)"), c("=>", '<=>', "=>", ""), tma_react$equation) + +# Is the syntax for the models correct? +sapply(mget(setdiff(ls(pattern = "react"), lsf.str())), function(i) { + suppressMessages(suppressWarnings(sum( + minval::validateSyntax(reactionList = i$equation) + ) == nrow(i))) +}) +``` + +## The workout +Once the syntax is checked, it is time to do the gapfinding and gapfilling +```{r} +# lets fire it up ! +tictoc::tic("Gapfilling time") +gapFill(reactionList = unlist(stm_react$equation), + reference = stm_all$reaction, + limit = 0.25, + woCompartment = TRUE, + consensus = FALSE) +tictoc::toc() + +#1050 Orphan reactants found +#Error in reactionList[validateSyntax(reactionList)] : +# invalid subscript type 'list' +``` + +## Now in pyhton +Inasmuch as it is somewhat troublesome to import a model in +```{python} +# Tain Velasco-Luquez +# taveluz@javeriana.edu.co + +# This script will get a .xml GEM and a metabolite name. It will retrieve the following info: +# 1. The GEM dimensions +# 2. The number of reactions your metabolites participate in +# 3. The number of blocked reactions and its execution time +# 4. Gap filling time, No reactions added, id added reactions + +# The number of gaps created after deleting all reactions in which the given metabolite participate in +# 3. The file called model_universal containing the reactions deleted from the model, +# 4. The file model_gaps.xml containing the model with the gaps created +# 5. +# 6. The execution time for calculation of blo +from __future__ import print_function +from cobra.flux_analysis import gapfill +import cobra.test, time + + +def my_gems_gaps(model, metabo_code): + # Printing the GEM dimmensions + print( "GEM dimmensions (reactionsxmetabolites): {} x {}".format( len( model.reactions ), + len( model.metabolites ) ) ) + # Number of reactions the metabolite is participating in + print( "Your metabolite participate in {} reactions".format( len( model.metabolites.metabo_code.reactions ) ) ) + + # Initialising universal and deleting the reactions associated to the metabolite + universal = cobra.Model( "universal_reactions" ) + for i in [i.id for i in model.metabolites.metabo_code.reactions]: + reaction = model.reactions.get_by_id( i ) + universal.add_reaction( reaction.copy( ) ) + model.remove_reactions( [reaction] ) + + # Number of blocked reactions + start_time = time.time( ) + model_blocked_reactions = cobra.flux_analysis.find_blocked_reactions( model ) + print( "The number of blocked reactions after creating the gaps is {}".format( len( model_blocked_reactions ) ) ) + print( "Calculate the number of blocked reaction took %s seconds " % (time.time( ) - start_time) ) + + # Gap filling + start_time = time.time( ) + solution = gapfill( model, universal ) + print( "Gap filling process took %s seconds " % (time.time( ) - start_time) ) + print( "The number of filled reactions is:".format( len( solution.reactions ) ) ) + print( "These reactions are:" ) + for reaction in solution[0]: + print( reaction.id ) + + # Exporting the files + cobra.io.write_sbml_model( model, "model.xml" ) + cobra.io.write_sbml_model( universal, "universal.xml" ) + +``` + + From 69af3dd900c5c98eba2e2850a82f6fe73c8299e8 Mon Sep 17 00:00:00 2001 From: TainVelasco-Luquez Date: Wed, 25 Apr 2018 11:17:56 -0500 Subject: [PATCH 3/5] Name was changed from g2fg2f to g2f --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f8d0e7f..5bccbc6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -g2fg2f : Find and Fill Gaps in Metabolic Networks +g2f : Find and Fill Gaps in Metabolic Networks ====== The **g2f** package was designed as a tool to find and fill gaps in metabolic networks. For a given metabolic network, this package finds the gaps (metabolites not produced or not consumed in any other reaction), and fills it from the stoichiometric reactions of a reference metabolic reconstruction using a weighting function. Also the option to download all the set of gene-associated stoichiometric reactions for a specific organism from the KEGG database is available. From 155a22b3a4a72143a68c87ed5d7c4aa6e55bbf92 Mon Sep 17 00:00:00 2001 From: TainVelasco-Luquez Date: Wed, 25 Apr 2018 11:41:21 -0500 Subject: [PATCH 4/5] Removing the error messages to place them as comments to a pull request --- Evaluation.Rmd | 83 ++++++++++++++++++++------------------------------ 1 file changed, 33 insertions(+), 50 deletions(-) diff --git a/Evaluation.Rmd b/Evaluation.Rmd index 32eccbd..d630d9d 100644 --- a/Evaluation.Rmd +++ b/Evaluation.Rmd @@ -12,7 +12,7 @@ knitr::opts_chunk$set(echo = TRUE) ``` ## Opening -This file contain sthe comparison of g2f against other widely used algortihms to find a nd fill gaps in GEMs. +This file contain sthe comparison of g2f against other widely used algortihms to find and fill gaps in GEMs. ### Required packages ```{r message=FALSE} @@ -27,7 +27,7 @@ library("readr") ### Useful functions -If the models has some syntax incompatibilities, this functions enables the localisation of the incompatibilities and replace them by the user define syntax. +If the models has some syntax incompatibilities, this function enables the localisation of the incompatibilities and replace them by the user define syntax. ```{r message=FALSE} # FUN by Theodore lytras in https://stackoverflow.com/questions/15253954/replace-multiple-letters-with-accents-with-gsub/15256643?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa mgsub <- function(pattern, replacement, x, ...) { @@ -42,7 +42,7 @@ mgsub <- function(pattern, replacement, x, ...) { } ``` -This functions retrieves an organism's GEM, in .xml extension from biomodels, and the complete set of reactions, in .csv extension from KEGG. +This function retrieves an organism's GEM, in .xml extension from biomodels, and the complete set of reactions, in .csv extension from KEGG. ```{r message=FALSE} my_models <- function(biomodel_ID, kegg_ID){ # Both arguments must be of class character @@ -64,7 +64,7 @@ my_models <- function(biomodel_ID, kegg_ID){ Extract reactions from a model ```{r message=FALSE} -# Function extracted from MinVal, by aniel Osorio +# Function extracted from MinVal, by Daniel Osorio # modelData must be of class modelorg and kegg_ID of class character my_reactions <- function(modelData, kegg_ID){ @@ -119,7 +119,7 @@ for (i in as.list(list.files(pattern = "\\_react.csv$"))) { } ``` -Inasmuch as the gap filling functions is sensitive to incorrect syntax, let's check it's correctness +Inasmuch as the gap filling function is sensitive to incorrect syntax, let's check it's correctness ```{r} # Is the syntax for the models correct? sapply(mget(setdiff(ls(pattern = "react"), lsf.str())), function(i) { @@ -170,65 +170,48 @@ gapFill(reactionList = unlist(stm_react$equation), consensus = FALSE) tictoc::toc() -#1050 Orphan reactants found -#Error in reactionList[validateSyntax(reactionList)] : -# invalid subscript type 'list' ``` ## Now in pyhton -Inasmuch as it is somewhat troublesome to import a model in +Now lets compare the g2f results against the gapfill algorithm implemented in Copbrapy ```{python} -# Tain Velasco-Luquez -# taveluz@javeriana.edu.co - -# This script will get a .xml GEM and a metabolite name. It will retrieve the following info: -# 1. The GEM dimensions -# 2. The number of reactions your metabolites participate in -# 3. The number of blocked reactions and its execution time -# 4. Gap filling time, No reactions added, id added reactions - -# The number of gaps created after deleting all reactions in which the given metabolite participate in -# 3. The file called model_universal containing the reactions deleted from the model, -# 4. The file model_gaps.xml containing the model with the gaps created -# 5. -# 6. The execution time for calculation of blo +# Importing the modules from __future__ import print_function from cobra.flux_analysis import gapfill import cobra.test, time +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import glob +``` +# In order to supplement the universal reaction files to the gapfill algorithm, we are going to: +1. Read several .csv files at once +```{python} +all_all = {} +for file_all in glob.glob( '*_all.csv' ): + name = os.path.splitext( file_all )[0] + all_all[name] = cobra.Model( pd.read_csv( file_all ) ) +``` -def my_gems_gaps(model, metabo_code): - # Printing the GEM dimmensions - print( "GEM dimmensions (reactionsxmetabolites): {} x {}".format( len( model.reactions ), - len( model.metabolites ) ) ) - # Number of reactions the metabolite is participating in - print( "Your metabolite participate in {} reactions".format( len( model.metabolites.metabo_code.reactions ) ) ) +2. Import the models +```{python} +all_model = {} +for file_model in glob.glob( '*.xml' ): + name = os.path.splitext( file_model )[0] + all_model[name] = cobra.io.read_sbml_model( file_model ) +``` - # Initialising universal and deleting the reactions associated to the metabolite - universal = cobra.Model( "universal_reactions" ) - for i in [i.id for i in model.metabolites.metabo_code.reactions]: - reaction = model.reactions.get_by_id( i ) - universal.add_reaction( reaction.copy( ) ) - model.remove_reactions( [reaction] ) +3. let's gapfill them! +```{python} +solution={} - # Number of blocked reactions - start_time = time.time( ) - model_blocked_reactions = cobra.flux_analysis.find_blocked_reactions( model ) - print( "The number of blocked reactions after creating the gaps is {}".format( len( model_blocked_reactions ) ) ) - print( "Calculate the number of blocked reaction took %s seconds " % (time.time( ) - start_time) ) +for every_model_key, every_all_key in zip(all_model.keys(), all_all.keys() ): - # Gap filling start_time = time.time( ) - solution = gapfill( model, universal ) + solution[every_model_key] = gapfill( every_model_key, every_all_key ) print( "Gap filling process took %s seconds " % (time.time( ) - start_time) ) - print( "The number of filled reactions is:".format( len( solution.reactions ) ) ) - print( "These reactions are:" ) - for reaction in solution[0]: - print( reaction.id ) - - # Exporting the files - cobra.io.write_sbml_model( model, "model.xml" ) - cobra.io.write_sbml_model( universal, "universal.xml" ) + print( "The number of filled reactions is:".format( len( solution.key( )[every_model_key].reactions ) ) ) ``` From 3048d4de8bc2f36b8bf10a0d2e8a8d1b9649fb60 Mon Sep 17 00:00:00 2001 From: Tain Date: Wed, 25 Apr 2018 19:53:56 -0500 Subject: [PATCH 5/5] fixing some comments --- Evaluation.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Evaluation.Rmd b/Evaluation.Rmd index d630d9d..9c236f0 100644 --- a/Evaluation.Rmd +++ b/Evaluation.Rmd @@ -58,7 +58,7 @@ my_models <- function(biomodel_ID, kegg_ID){ # Now lets get the reaction list and save it write.csv(g2f::getReference(kegg_ID), file = paste0(kegg_ID, "_all.csv")) - message("It's done MTF! -.-") + message("It's done!") } ```