gibbslab · tluquez · Nov 15, 2017 · Apr 25, 2018 · Apr 25, 2018 · Apr 25, 2018
diff --git a/Evaluation.Rmd b/Evaluation.Rmd
@@ -0,0 +1,218 @@
+---
+title: "Evaluation"
+author: "Tain Velasco Luquez"
+date: "19/04/2018"
+output: html_document
+editor_options: 
+  chunk_output_type: console
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+## Opening
+This file contain sthe comparison of g2f against other widely used algortihms to find and fill gaps in GEMs.
+
+### Required packages
+```{r message=FALSE}
+library("minval")
+library("sybilSBML")
+library("sybil")
+library("glpkAPI")
+library("g2f")
+library("tictoc")
+library("readr")
+```
+
+### Useful functions
+
+If the models has some syntax incompatibilities, this function enables the localisation of the incompatibilities and replace them by the user define syntax.
+```{r message=FALSE}
+# FUN by Theodore lytras in https://stackoverflow.com/questions/15253954/replace-multiple-letters-with-accents-with-gsub/15256643?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
+mgsub <- function(pattern, replacement, x, ...) {
+  if (length(pattern)!=length(replacement)) {
+    stop("pattern and replacement do not have the same length.")
+  }
+  result <- x
+  for (i in 1:length(pattern)) {
+    result <- gsub(pattern[i], replacement[i], result, ...)
+  }
+  result
+}
+```
+
+This function retrieves an organism's GEM, in .xml extension from biomodels, and the complete set of reactions, in .csv extension from KEGG.
+```{r message=FALSE}
+my_models <- function(biomodel_ID, kegg_ID){
+  # Both arguments must be of class character
+  # Go to http://rest.kegg.jp/list/organism for the KEGG_ID code
+  # Go to https://wwwdev.ebi.ac.uk/biomodels/ for the BioModel ID
+
+  # Let's replace the url for the selected biomodel ID
+  my_model_url <- gsub("MODEL(.{10})", biomodel_ID, "https://wwwdev.ebi.ac.uk/biomodels/model/download/MODEL1507180017.2?filename=MODEL1507180017_url.xml")
+
+  # And download it
+  download.file(my_model_url, paste0(kegg_ID, ".xml"))
+
+  # Now lets get the reaction list and save it
+  write.csv(g2f::getReference(kegg_ID), file = paste0(kegg_ID, "_all.csv"))
+
+  message("It's done!")
+}
+```
+
+Extract reactions from a model
+```{r message=FALSE}
+# Function extracted from MinVal, by Daniel Osorio
+# modelData must be of class modelorg and kegg_ID of class character
+my_reactions <- function(modelData, kegg_ID){
+
+  modelData <- minval:::convertData(model = modelData)
+
+  outputData <- NULL
+  outputData$abbreviation <- modelData[["ID"]]
+  outputData$name <- modelData[["DESCRIPTION"]]
+  outputData$equation <- minval:::rearmReactions(S = minval::stoichiometricMatrix(modelData[["REACTION"]]), 
+    reversible = grepl("<=>", modelData[["REACTION"]]), 
+    type = "TSV", boundary = boundary)
+  outputData$reversible <- ifelse(test = grepl("<=>", modelData[["REACTION"]]), 
+    yes = "reversible", no = "irreversible")
+  outputData$compartment <- unlist(lapply(modelData[["REACTION"]], 
+    function(reaction) {
+      paste0(minval::compartments(reaction), collapse = " , ")
+    }))
+  outputData$lowbnd <- as.character(modelData[["LOWER.BOUND"]])
+  outputData$uppbnd <- as.character(modelData[["UPPER.BOUND"]])
+  outputData$obj_coef <- as.character(modelData[["OBJECTIVE"]])
+  outputData$rule <- modelData[["GPR"]]
+  outputData$subsystem <- rep("", length(modelData[["REACTION"]]))
+  outputData <- data.frame(outputData)
+
+  write.table(outputData, file = paste0(kegg_ID, "_react.csv"), 
+    quote = TRUE, row.names = FALSE, sep = ",")
+}
+```
+
+## Warming up
+
+We are going to load several GEMs their reaction lists and complete set of reactions from KEGG.
+```{r message=FALSE}
+# Download the models and its associated KEGG reactions
+mapply(my_models, c("MODEL1507180048", "MODEL1507180017"), c("tma", "stm"))
+
+# Load all .xml models in the working directory to the .GlobalEnv and rename them adding _model
+for (i in as.list(list.files(pattern = "\\.xml$"))) {
+  assign(gsub(".xml", "_model", i), sybilSBML::readSBMLmod(i), envir = .GlobalEnv)
+}
+# Load all reaction lists from KEGG per model 
+for (i in as.list(list.files(pattern = "\\_all.csv$"))) {
+  assign(gsub(".csv", "", i), read.csv(i), envir = .GlobalEnv)
+}
+
+# Extract the reaction list of every .xml file in the working directory, save such reaction list to a .csv with the same name as the model
+mapply(my_reactions, mget(setdiff(ls(pattern = "model"), lsf.str())), gsub("_all.csv", "", list.files(pattern = "\\_all.csv$")))
+
+# And load them to the workspace
+for (i in as.list(list.files(pattern = "\\_react.csv$"))) {
+  assign(gsub(".csv", "", i), read.csv(i), envir = .GlobalEnv)
+}
+```
+
+Inasmuch as the gap filling function is sensitive to incorrect syntax, let's check it's correctness
+```{r}
+# Is the syntax for the models correct?
+sapply(mget(setdiff(ls(pattern = "react"), lsf.str())), function(i) {
+  suppressMessages(suppressWarnings(sum(
+    minval::validateSyntax(reactionList = i$equation)
+  ) == nrow(i)))
+})
+```
+
+As seen, the reaction list have an incorrect syntax. General mistakes are described in `r ?minval::validateSyntax `. Let's fix them:
+```{r message=FALSE, eval=FALSE}
+# Value for all data files (excluding functions) in the global environment are returned in the form of list
+temp_var <- mget(setdiff(ls(pattern = "react"), lsf.str()))
+
+sapply(temp_var[[i]][, 3], function(i) {
+  assign(temp_var[[i]][, 3], mgsub(
+    c("-->", '<==>', '->', "\\(|\\)"),
+    c("=>", '<=>', "=>", ""),
+    temp_var[[i]][, 3]
+  ), envir = .GlobalEnv)
+})
+
+
+
+```
+```{r}
+stm_react$equation <- mgsub(c("-->", '<==>', '->', "\\(|\\)"), c("=>", '<=>', "=>", ""), stm_react$equation)
+
+tma_react$equation <- mgsub(c("-->", '<==>', '->', "\\(|\\)"), c("=>", '<=>', "=>", ""), tma_react$equation)
+
+# Is the syntax for the models correct?
+sapply(mget(setdiff(ls(pattern = "react"), lsf.str())), function(i) {
+  suppressMessages(suppressWarnings(sum(
+    minval::validateSyntax(reactionList = i$equation)
+  ) == nrow(i)))
+})
+```
+
+## The workout
+Once the syntax is checked, it is time to do the gapfinding and gapfilling
+```{r}
+# lets fire it up !
+tictoc::tic("Gapfilling time")
+gapFill(reactionList = unlist(stm_react$equation),
+  reference = stm_all$reaction,
+  limit = 0.25,
+  woCompartment = TRUE,
+  consensus = FALSE)
+tictoc::toc()
+
+```
+
+## Now in pyhton
+Now lets compare the g2f results against the gapfill algorithm implemented in Copbrapy
+```{python}
+# Importing the modules
+from __future__ import print_function
+from cobra.flux_analysis import gapfill
+import cobra.test, time
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import glob
+```
+
+# In order to supplement the universal reaction files to the gapfill algorithm, we are going to:
+1. Read several .csv files at once
+```{python}
+all_all = {}
+for file_all in glob.glob( '*_all.csv' ):
+    name = os.path.splitext( file_all )[0]
+    all_all[name] = cobra.Model( pd.read_csv( file_all ) )
+```
+
+2. Import the models
+```{python}
+all_model = {}
+for file_model in glob.glob( '*.xml' ):
+    name = os.path.splitext( file_model )[0]
+    all_model[name] = cobra.io.read_sbml_model( file_model )
+```    
+
+3. let's gapfill them!
+```{python}
+solution={}
+
+for every_model_key, every_all_key in zip(all_model.keys(), all_all.keys() ):
+
+    start_time = time.time( )
+    solution[every_model_key] = gapfill( every_model_key, every_all_key )
+    print( "Gap filling process took %s seconds " % (time.time( ) - start_time) )
+    print( "The number of filled reactions is:".format( len( solution.key( )[every_model_key].reactions ) ) )
+
+```
+
+
diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@ For install the latest stable version this package directly from GitHub:
 # Install 'devtools' R package
 install.packages("devtools")
 
-# Install 'minval' package
+# Install 'g2f' package
 devtools::install_github("gibbslab/g2f")
 library("g2f")
 ```