diff --git a/.gitattributes b/.gitattributes index 7f4d5667..3e0ca4ab 100644 --- a/.gitattributes +++ b/.gitattributes @@ -10,4 +10,3 @@ mafft binary .gitattributes export-ignore .gitignore export-ignore .github export-ignore -*.mat filter=lfs diff=lfs merge=lfs -text diff --git a/doc/external/kegg/getGenesFromKEGG.html b/doc/external/kegg/getGenesFromKEGG.html index 2035078a..cba93c72 100644 --- a/doc/external/kegg/getGenesFromKEGG.html +++ b/doc/external/kegg/getGenesFromKEGG.html @@ -102,7 +102,7 @@

CROSS-REFERENCE INFORMATION ^
 
 <h2><a name=SUBFUNCTIONS ^

+
  • function allKOs=getAllKOs(keggPath)
  • function downloadKEGGgenes()
  • SOURCE CODE ^

    0001 function model=getGenesFromKEGG(keggPath,koList)
    @@ -178,273 +178,339 @@ 

    SOURCE CODE ^'external','kegg','keggGenes.mat'); -0074 if exist(genesFile, 'file') -0075 fprintf(['Importing KEGG genes from ' strrep(genesFile,'\','/') '... ']); -0076 load(genesFile); -0077 else -0078 fprintf(['NOTE: Cannot locate ' strrep(genesFile,'\','/') ', it will therefore be generated from the local KEGG database\n']); -0079 if ~isfile(fullfile(keggPath,'ko')) || ~isfile(fullfile(keggPath,'reaction')) -0080 EM=fprintf(['The files ''ko'' and ''reaction'' cannot be located at ' strrep(keggPath,'\','/') '/ and should be downloaded from the KEGG FTP\n']); -0081 dispEM(EM); -0082 else -0083 fprintf('Generating keggGenes.mat file...\n'); -0084 %Get all KOs that are associated to reactions -0085 allKOs=getAllKOs(keggPath); -0086 -0087 %Since the list of genes will be accessed many times the hash table -0088 %is established -0089 geneMap=containers.Map('KeyType','char','ValueType','int32'); -0090 -0091 %Add new functionality in the order specified in models -0092 model.id='KEGG'; -0093 model.name='Automatically generated from KEGG database'; -0094 -0095 %Preallocate memory -0096 model.rxns=cell(numel(allKOs),1); -0097 model.rxnNames=cell(numel(allKOs),1); -0098 -0099 %Reserve space for 500000 genes -0100 model.genes=cell(500000,1); +0074 if ~exist(genesFile, 'file') +0075 try +0076 downloadKEGGgenes(); +0077 catch +0078 %Download failed; fall through to local regeneration below +0079 end +0080 end +0081 if exist(genesFile, 'file') +0082 fprintf(['Importing KEGG genes from ' strrep(genesFile,'\','/') '... ']); +0083 load(genesFile); +0084 else +0085 fprintf(['NOTE: Cannot locate ' strrep(genesFile,'\','/') ', it will therefore be generated from the local KEGG database\n']); +0086 if ~isfile(fullfile(keggPath,'ko')) || ~isfile(fullfile(keggPath,'reaction')) +0087 EM=fprintf(['The files ''ko'' and ''reaction'' cannot be located at ' strrep(keggPath,'\','/') '/ and should be downloaded from the KEGG FTP\n']); +0088 dispEM(EM); +0089 else +0090 fprintf('Generating keggGenes.mat file...\n'); +0091 %Get all KOs that are associated to reactions +0092 allKOs=getAllKOs(keggPath); +0093 +0094 %Since the list of genes will be accessed many times the hash table +0095 %is established +0096 geneMap=containers.Map('KeyType','char','ValueType','int32'); +0097 +0098 %Add new functionality in the order specified in models +0099 model.id='KEGG'; +0100 model.name='Automatically generated from KEGG database'; 0101 -0102 %Load information on KO ID, name, and associated genes -0103 fid = fopen(fullfile(keggPath,'ko'), 'r'); -0104 -0105 %Keeps track of how many KOs that have been added -0106 koCounter=0; -0107 nGenes=0; -0108 addingGenes=false; -0109 -0110 %These contain the mapping between KOs and genes and are used to -0111 %generate the model.rxnGeneMat in the end -0112 koIndex=zeros(5000000,1); -0113 geneIndex=koIndex; -0114 nMappings=0; -0115 -0116 skipRead=false; -0117 -0118 %Loop through the file -0119 while 1 -0120 %Get the next line -0121 if skipRead==false -0122 tline = fgetl(fid); -0123 else -0124 skipRead=false; -0125 end -0126 -0127 %Abort at end of file -0128 if ~ischar(tline) -0129 break; -0130 end -0131 -0132 %Skip '///' -0133 if numel(tline)<12 -0134 addingGenes=false; -0135 continue; -0136 end -0137 -0138 %Check if it's a new reaction -0139 if strcmp(tline(1:12),'ENTRY ') -0140 %Check if it should be added -0141 koID=tline(13:18); -0142 -0143 if ismember(koID,allKOs) -0144 addMe=true; -0145 koCounter=koCounter+1; -0146 else -0147 addMe=false; -0148 continue; -0149 end -0150 -0151 %Add reaction ID (always 6 characters) -0152 model.rxns{koCounter}=koID; -0153 -0154 model.rxnNames{koCounter}=''; -0155 %Will be overwritten if it exists -0156 end -0157 -0158 %To get here we must be in a KO that should be added -0159 if addMe==true -0160 %Add name -0161 if strcmp(tline(1:12),'DEFINITION ') -0162 model.rxnNames{koCounter}=tline(13:end); -0163 end -0164 -0165 %Check if it's time to start adding genes -0166 if strcmp(tline(1:12),'GENES ') -0167 addingGenes=true; -0168 end -0169 -0170 %Add genes -0171 if addingGenes==true -0172 %We are now adding genes for the current KO. All gene -0173 %rows start with 12 spaces. If this is not true it -0174 %means that there is additional annotation such as -0175 %AUTHORS. This is not common but it exists -0176 if strcmp(tline(1:12),' ') || strcmp(tline(1:12),'GENES ') -0177 geneLine=tline(13:end); -0178 -0179 %Check if the line is from a new organism of from -0180 %the same as before -0181 if strcmp(geneLine(4),':') -0182 %If organism id contains 3 letters; -0183 currentOrganism=geneLine(1:3); -0184 %Parse the string -0185 genes=regexp(geneLine(6:end),' ','split'); -0186 genes=strcat(currentOrganism,':',genes(:)); -0187 elseif strcmp(geneLine(5),':') -0188 %If organism id contains 4 letters; -0189 currentOrganism=geneLine(1:4); -0190 %Parse the string -0191 genes=regexp(geneLine(7:end),' ','split'); -0192 genes=strcat(currentOrganism,':',genes(:)); -0193 end -0194 -0195 %Add the genes to the gene list -0196 for i=1:numel(genes) -0197 geneString=genes{i}; -0198 if geneMap.isKey(geneString) -0199 index=geneMap(geneString); -0200 else -0201 nGenes=nGenes+1; -0202 model.genes{nGenes}=geneString; -0203 index=nGenes; -0204 geneMap(geneString)=index; -0205 end -0206 -0207 %Add the connection to the KO -0208 nMappings=nMappings+1; -0209 koIndex(nMappings)=koCounter; -0210 geneIndex(nMappings)=index; -0211 end -0212 else -0213 %Now we want to throw away everything until the -0214 %next entry -0215 while 1 -0216 tline = fgetl(fid); -0217 if strcmp(tline,'///') -0218 %When the new entry is found, skip reading -0219 %one line to fit with the rest of the code -0220 skipRead=true; -0221 break; -0222 end -0223 end -0224 end -0225 end -0226 end -0227 end -0228 %Close the file -0229 fclose(fid); -0230 -0231 %If too much space was allocated, shrink the model -0232 model.rxns=model.rxns(1:koCounter); -0233 model.rxnNames=model.rxnNames(1:koCounter); -0234 model.genes=model.genes(1:nGenes); -0235 -0236 %Retrieve and add the gene associations -0237 model.rxnGeneMat=sparse(koIndex(1:nMappings),geneIndex(1:nMappings),ones(nMappings,1)); -0238 -0239 %To make sure the size is correct if the last KOs don't have genes -0240 if size(model.rxnGeneMat,1)~=koCounter -0241 model.rxnGeneMat(koCounter,1)=0; -0242 end -0243 -0244 %Trim the genes so that they only contain information that can be -0245 %matched to the KEGG file of protein sequences (remove all -0246 %information after first parenthesis) -0247 %NOTE: For some reason the organism abbreviation should be in lower -0248 %case in this database. Fix this here -0249 for i=1:numel(model.genes) -0250 parIndex=strfind(model.genes{i},'('); -0251 if any(parIndex) -0252 model.genes{i}=model.genes{i}(1:parIndex-1); -0253 end -0254 colIndex=strfind(model.genes{i},':'); -0255 model.genes{i}=[lower(model.genes{i}(1:colIndex-1)) model.genes{i}(colIndex:end)]; -0256 end -0257 -0258 %Save the model structure -0259 save(genesFile,'model'); -0260 end -0261 end -0262 -0263 %Only get the KOs in koList -0264 if nargin>1 -0265 I=~ismember(model.rxns,koList); -0266 model=removeReactions(model,I,true,true); -0267 end -0268 fprintf('COMPLETE\n'); -0269 end -0270 -0271 function allKOs=getAllKOs(keggPath) -0272 %Retrieves all KOs that are associated to reactions. This is because the -0273 %number of genes in KEGG is very large so without this parsing it would -0274 %take many hours -0275 -0276 allKOs={}; +0102 %Preallocate memory +0103 model.rxns=cell(numel(allKOs),1); +0104 model.rxnNames=cell(numel(allKOs),1); +0105 +0106 %Reserve space for 500000 genes +0107 model.genes=cell(500000,1); +0108 +0109 %Load information on KO ID, name, and associated genes +0110 fid = fopen(fullfile(keggPath,'ko'), 'r'); +0111 +0112 %Keeps track of how many KOs that have been added +0113 koCounter=0; +0114 nGenes=0; +0115 addingGenes=false; +0116 +0117 %These contain the mapping between KOs and genes and are used to +0118 %generate the model.rxnGeneMat in the end +0119 koIndex=zeros(5000000,1); +0120 geneIndex=koIndex; +0121 nMappings=0; +0122 +0123 skipRead=false; +0124 +0125 %Loop through the file +0126 while 1 +0127 %Get the next line +0128 if skipRead==false +0129 tline = fgetl(fid); +0130 else +0131 skipRead=false; +0132 end +0133 +0134 %Abort at end of file +0135 if ~ischar(tline) +0136 break; +0137 end +0138 +0139 %Skip '///' +0140 if numel(tline)<12 +0141 addingGenes=false; +0142 continue; +0143 end +0144 +0145 %Check if it's a new reaction +0146 if strcmp(tline(1:12),'ENTRY ') +0147 %Check if it should be added +0148 koID=tline(13:18); +0149 +0150 if ismember(koID,allKOs) +0151 addMe=true; +0152 koCounter=koCounter+1; +0153 else +0154 addMe=false; +0155 continue; +0156 end +0157 +0158 %Add reaction ID (always 6 characters) +0159 model.rxns{koCounter}=koID; +0160 +0161 model.rxnNames{koCounter}=''; +0162 %Will be overwritten if it exists +0163 end +0164 +0165 %To get here we must be in a KO that should be added +0166 if addMe==true +0167 %Add name +0168 if strcmp(tline(1:12),'DEFINITION ') +0169 model.rxnNames{koCounter}=tline(13:end); +0170 end +0171 +0172 %Check if it's time to start adding genes +0173 if strcmp(tline(1:12),'GENES ') +0174 addingGenes=true; +0175 end +0176 +0177 %Add genes +0178 if addingGenes==true +0179 %We are now adding genes for the current KO. All gene +0180 %rows start with 12 spaces. If this is not true it +0181 %means that there is additional annotation such as +0182 %AUTHORS. This is not common but it exists +0183 if strcmp(tline(1:12),' ') || strcmp(tline(1:12),'GENES ') +0184 geneLine=tline(13:end); +0185 +0186 %Check if the line is from a new organism of from +0187 %the same as before +0188 if strcmp(geneLine(4),':') +0189 %If organism id contains 3 letters; +0190 currentOrganism=geneLine(1:3); +0191 %Parse the string +0192 genes=regexp(geneLine(6:end),' ','split'); +0193 genes=strcat(currentOrganism,':',genes(:)); +0194 elseif strcmp(geneLine(5),':') +0195 %If organism id contains 4 letters; +0196 currentOrganism=geneLine(1:4); +0197 %Parse the string +0198 genes=regexp(geneLine(7:end),' ','split'); +0199 genes=strcat(currentOrganism,':',genes(:)); +0200 end +0201 +0202 %Add the genes to the gene list +0203 for i=1:numel(genes) +0204 geneString=genes{i}; +0205 if geneMap.isKey(geneString) +0206 index=geneMap(geneString); +0207 else +0208 nGenes=nGenes+1; +0209 model.genes{nGenes}=geneString; +0210 index=nGenes; +0211 geneMap(geneString)=index; +0212 end +0213 +0214 %Add the connection to the KO +0215 nMappings=nMappings+1; +0216 koIndex(nMappings)=koCounter; +0217 geneIndex(nMappings)=index; +0218 end +0219 else +0220 %Now we want to throw away everything until the +0221 %next entry +0222 while 1 +0223 tline = fgetl(fid); +0224 if strcmp(tline,'///') +0225 %When the new entry is found, skip reading +0226 %one line to fit with the rest of the code +0227 skipRead=true; +0228 break; +0229 end +0230 end +0231 end +0232 end +0233 end +0234 end +0235 %Close the file +0236 fclose(fid); +0237 +0238 %If too much space was allocated, shrink the model +0239 model.rxns=model.rxns(1:koCounter); +0240 model.rxnNames=model.rxnNames(1:koCounter); +0241 model.genes=model.genes(1:nGenes); +0242 +0243 %Retrieve and add the gene associations +0244 model.rxnGeneMat=sparse(koIndex(1:nMappings),geneIndex(1:nMappings),ones(nMappings,1)); +0245 +0246 %To make sure the size is correct if the last KOs don't have genes +0247 if size(model.rxnGeneMat,1)~=koCounter +0248 model.rxnGeneMat(koCounter,1)=0; +0249 end +0250 +0251 %Trim the genes so that they only contain information that can be +0252 %matched to the KEGG file of protein sequences (remove all +0253 %information after first parenthesis) +0254 %NOTE: For some reason the organism abbreviation should be in lower +0255 %case in this database. Fix this here +0256 for i=1:numel(model.genes) +0257 parIndex=strfind(model.genes{i},'('); +0258 if any(parIndex) +0259 model.genes{i}=model.genes{i}(1:parIndex-1); +0260 end +0261 colIndex=strfind(model.genes{i},':'); +0262 model.genes{i}=[lower(model.genes{i}(1:colIndex-1)) model.genes{i}(colIndex:end)]; +0263 end +0264 +0265 %Save the model structure +0266 save(genesFile,'model'); +0267 end +0268 end +0269 +0270 %Only get the KOs in koList +0271 if nargin>1 +0272 I=~ismember(model.rxns,koList); +0273 model=removeReactions(model,I,true,true); +0274 end +0275 fprintf('COMPLETE\n'); +0276 end 0277 -0278 %First check if the reactions have already been parsed -0279 ravenPath=findRAVENroot; -0280 rxnsFile=fullfile(ravenPath,'external','kegg','keggRxns.mat'); -0281 if exist(rxnsFile, 'file') -0282 fprintf(['NOTE: Importing KEGG ORTHOLOGY list from ' strrep(rxnsFile,'\','/') '.\n']); -0283 load(rxnsFile,'model'); -0284 -0285 %Loop through the reactions and add the corresponding genes -0286 for i=1:numel(model.rxns) -0287 if isstruct(model.rxnMiriams{i}) -0288 %Get all KOs -0289 allKOs=[allKOs;model.rxnMiriams{i}.value(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology'))]; -0290 end -0291 end -0292 else -0293 %Parse the reaction file instead First load information on reaction ID, -0294 %reaction name, KO, pathway and ec-number -0295 fid = fopen(fullfile(keggPath,'reaction'), 'r'); -0296 orthology=false; -0297 while 1 -0298 %Get the next line -0299 tline = fgetl(fid); -0300 -0301 %Abort at end of file -0302 if ~ischar(tline) -0303 break; -0304 end -0305 -0306 %Skip '///' -0307 if numel(tline)<12 -0308 continue; -0309 end -0310 -0311 %Check if it's a new reaction -0312 if strcmp(tline(1:12),'ENTRY ') -0313 orthology=false; -0314 end -0315 -0316 if strcmp(tline(1:9),'REFERENCE') -0317 orthology=false; -0318 end -0319 -0320 %Add KO-ids -0321 if numel(tline)>16 -0322 if strcmp(tline(1:16),'ORTHOLOGY KO: ') || strcmp(tline(1:16),' KO: ') || strcmp(tline(1:12),'ORTHOLOGY ') || orthology==true -0323 if strcmp(tline(13:16),'KO:') -0324 %This is in the old version -0325 allKOs=[allKOs;tline(17:22)]; -0326 elseif strcmp(tline(1:12),'ORTHOLOGY ') -0327 %This means that it found one KO in the new format and -0328 %that subsequent lines might be other KOs -0329 orthology=true; -0330 allKOs=[allKOs;tline(13:18)]; -0331 end -0332 end -0333 end -0334 end -0335 -0336 %Close the file -0337 fclose(fid); -0338 end -0339 allKOs=unique(allKOs); -0340 end

    +0278 function allKOs=getAllKOs(keggPath) +0279 %Retrieves all KOs that are associated to reactions. This is because the +0280 %number of genes in KEGG is very large so without this parsing it would +0281 %take many hours +0282 +0283 allKOs={}; +0284 +0285 %First check if the reactions have already been parsed +0286 ravenPath=findRAVENroot; +0287 rxnsFile=fullfile(ravenPath,'external','kegg','keggRxns.mat'); +0288 if exist(rxnsFile, 'file') +0289 fprintf(['NOTE: Importing KEGG ORTHOLOGY list from ' strrep(rxnsFile,'\','/') '.\n']); +0290 load(rxnsFile,'model'); +0291 +0292 %Loop through the reactions and add the corresponding genes +0293 for i=1:numel(model.rxns) +0294 if isstruct(model.rxnMiriams{i}) +0295 %Get all KOs +0296 allKOs=[allKOs;model.rxnMiriams{i}.value(strcmpi(model.rxnMiriams{i}.name,'kegg.orthology'))]; +0297 end +0298 end +0299 else +0300 %Parse the reaction file instead First load information on reaction ID, +0301 %reaction name, KO, pathway and ec-number +0302 fid = fopen(fullfile(keggPath,'reaction'), 'r'); +0303 orthology=false; +0304 while 1 +0305 %Get the next line +0306 tline = fgetl(fid); +0307 +0308 %Abort at end of file +0309 if ~ischar(tline) +0310 break; +0311 end +0312 +0313 %Skip '///' +0314 if numel(tline)<12 +0315 continue; +0316 end +0317 +0318 %Check if it's a new reaction +0319 if strcmp(tline(1:12),'ENTRY ') +0320 orthology=false; +0321 end +0322 +0323 if strcmp(tline(1:9),'REFERENCE') +0324 orthology=false; +0325 end +0326 +0327 %Add KO-ids +0328 if numel(tline)>16 +0329 if strcmp(tline(1:16),'ORTHOLOGY KO: ') || strcmp(tline(1:16),' KO: ') || strcmp(tline(1:12),'ORTHOLOGY ') || orthology==true +0330 if strcmp(tline(13:16),'KO:') +0331 %This is in the old version +0332 allKOs=[allKOs;tline(17:22)]; +0333 elseif strcmp(tline(1:12),'ORTHOLOGY ') +0334 %This means that it found one KO in the new format and +0335 %that subsequent lines might be other KOs +0336 orthology=true; +0337 allKOs=[allKOs;tline(13:18)]; +0338 end +0339 end +0340 end +0341 end +0342 +0343 %Close the file +0344 fclose(fid); +0345 end +0346 allKOs=unique(allKOs); +0347 end +0348 +0349 function downloadKEGGgenes() +0350 % downloadKEGGgenes +0351 % Downloads keggGenes.mat from the RAVEN GitHub release into +0352 % external/kegg/. This file is distributed as a release asset rather +0353 % than shipped in the repository because it exceeds GitHub's 100 MB +0354 % file-size limit. +0355 % +0356 % The following asset is expected on the release tag defined below: +0357 % keggGenes.zip containing keggGenes.mat +0358 % +0359 % To force a refresh (for example, after a new KEGG snapshot has been +0360 % published with a new RAVEN release), delete +0361 % external/kegg/keggGenes.mat and call getGenesFromKEGG again. +0362 % +0363 % Usage: downloadKEGGgenes() +0364 +0365 % Release tag that hosts the data archive. Bump this for any RAVEN +0366 % release that ships an updated keggGenes.mat. +0367 releaseTag = 'v2.11.1'; +0368 +0369 archiveName = 'keggGenes.zip'; +0370 targetDir = fullfile(findRAVENroot(),'external','kegg'); +0371 targetFile = fullfile(targetDir,'keggGenes.mat'); +0372 url = ['https://github.com/SysBioChalmers/RAVEN/releases/download/',... +0373 releaseTag,'/',archiveName]; +0374 zipPath = fullfile(targetDir,archiveName); +0375 +0376 fprintf(['Downloading ' archiveName ' from RAVEN ' releaseTag ' release... ']); +0377 try +0378 websave(zipPath,url); +0379 catch ME +0380 if strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError') +0381 error(['Failed to download ' archiveName ': the server returned ' ... +0382 'a 404 error. If this persists, please report it at ' ... +0383 'https://github.com/SysBioChalmers/RAVEN/issues']); +0384 else +0385 rethrow(ME); +0386 end +0387 end +0388 fprintf('COMPLETE\n'); +0389 +0390 fprintf(['Extracting ' archiveName ' to ' strrep(targetDir,'\','/') '... ']); +0391 try +0392 unzip(zipPath,targetDir); +0393 catch ME +0394 if isfile(zipPath); delete(zipPath); end +0395 error(['Failed to extract ' archiveName '. The downloaded archive ' ... +0396 'may be corrupted.\nOriginal error: %s'],ME.message); +0397 end +0398 fprintf('COMPLETE\n'); +0399 +0400 if isfile(zipPath); delete(zipPath); end +0401 +0402 if ~isfile(targetFile) +0403 error(['keggGenes.mat was not found in ' archiveName ... +0404 '. Please verify the release asset contents.']); +0405 end +0406 end
    Generated by m2html © 2005
    \ No newline at end of file diff --git a/doc/external/kegg/index.html b/doc/external/kegg/index.html index 6bb246fa..ee196128 100644 --- a/doc/external/kegg/index.html +++ b/doc/external/kegg/index.html @@ -23,7 +23,7 @@

    Matlab files in this directory:

    Other Matlab-specific files in this directory:

    +
  • keggMets.mat
  • keggPhylDist.mat
  • keggRxns.mat

  • Generated by m2html © 2005
    diff --git a/external/kegg/getGenesFromKEGG.m b/external/kegg/getGenesFromKEGG.m index 12d267ca..0f29305a 100755 --- a/external/kegg/getGenesFromKEGG.m +++ b/external/kegg/getGenesFromKEGG.m @@ -71,6 +71,13 @@ ravenPath=findRAVENroot(); genesFile=fullfile(ravenPath,'external','kegg','keggGenes.mat'); +if ~exist(genesFile, 'file') + try + downloadKEGGgenes(); + catch + %Download failed; fall through to local regeneration below + end +end if exist(genesFile, 'file') fprintf(['Importing KEGG genes from ' strrep(genesFile,'\','/') '... ']); load(genesFile); @@ -338,3 +345,62 @@ end allKOs=unique(allKOs); end + +function downloadKEGGgenes() +% downloadKEGGgenes +% Downloads keggGenes.mat from the RAVEN GitHub release into +% external/kegg/. This file is distributed as a release asset rather +% than shipped in the repository because it exceeds GitHub's 100 MB +% file-size limit. +% +% The following asset is expected on the release tag defined below: +% keggGenes.zip containing keggGenes.mat +% +% To force a refresh (for example, after a new KEGG snapshot has been +% published with a new RAVEN release), delete +% external/kegg/keggGenes.mat and call getGenesFromKEGG again. +% +% Usage: downloadKEGGgenes() + +% Release tag that hosts the data archive. Bump this for any RAVEN +% release that ships an updated keggGenes.mat. +releaseTag = 'v2.11.1'; + +archiveName = 'keggGenes.zip'; +targetDir = fullfile(findRAVENroot(),'external','kegg'); +targetFile = fullfile(targetDir,'keggGenes.mat'); +url = ['https://github.com/SysBioChalmers/RAVEN/releases/download/',... + releaseTag,'/',archiveName]; +zipPath = fullfile(targetDir,archiveName); + +fprintf(['Downloading ' archiveName ' from RAVEN ' releaseTag ' release... ']); +try + websave(zipPath,url); +catch ME + if strcmp(ME.identifier,'MATLAB:webservices:HTTP404StatusCodeError') + error(['Failed to download ' archiveName ': the server returned ' ... + 'a 404 error. If this persists, please report it at ' ... + 'https://github.com/SysBioChalmers/RAVEN/issues']); + else + rethrow(ME); + end +end +fprintf('COMPLETE\n'); + +fprintf(['Extracting ' archiveName ' to ' strrep(targetDir,'\','/') '... ']); +try + unzip(zipPath,targetDir); +catch ME + if isfile(zipPath); delete(zipPath); end + error(['Failed to extract ' archiveName '. The downloaded archive ' ... + 'may be corrupted.\nOriginal error: %s'],ME.message); +end +fprintf('COMPLETE\n'); + +if isfile(zipPath); delete(zipPath); end + +if ~isfile(targetFile) + error(['keggGenes.mat was not found in ' archiveName ... + '. Please verify the release asset contents.']); +end +end diff --git a/external/kegg/keggGenes.mat b/external/kegg/keggGenes.mat deleted file mode 100644 index 2494cbe9..00000000 --- a/external/kegg/keggGenes.mat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d19545a9ec9bc2c0fc69dbfd80e26ffad18f5c7144a8dbc149f8702a1c895061 -size 117040308 diff --git a/external/kegg/keggMets.mat b/external/kegg/keggMets.mat index 784901c0..f0b11b9b 100644 Binary files a/external/kegg/keggMets.mat and b/external/kegg/keggMets.mat differ diff --git a/external/kegg/keggPhylDist.mat b/external/kegg/keggPhylDist.mat index 0f05bf94..16c9cb98 100644 Binary files a/external/kegg/keggPhylDist.mat and b/external/kegg/keggPhylDist.mat differ diff --git a/external/kegg/keggRxns.mat b/external/kegg/keggRxns.mat index f5a62f8d..b1c512e4 100644 Binary files a/external/kegg/keggRxns.mat and b/external/kegg/keggRxns.mat differ diff --git a/external/metacyc/metaCycEnzymes.mat b/external/metacyc/metaCycEnzymes.mat index 7b03db05..1caf660f 100644 Binary files a/external/metacyc/metaCycEnzymes.mat and b/external/metacyc/metaCycEnzymes.mat differ diff --git a/external/metacyc/metaCycMets.mat b/external/metacyc/metaCycMets.mat index 26707d6a..cb86ac0c 100644 Binary files a/external/metacyc/metaCycMets.mat and b/external/metacyc/metaCycMets.mat differ diff --git a/external/metacyc/metaCycRxns.mat b/external/metacyc/metaCycRxns.mat index e5c680ce..97a069a1 100644 Binary files a/external/metacyc/metaCycRxns.mat and b/external/metacyc/metaCycRxns.mat differ diff --git a/software/m2html/private/m2htmltoolbarimages.mat b/software/m2html/private/m2htmltoolbarimages.mat index 19e9f16b..2fd358ee 100644 Binary files a/software/m2html/private/m2htmltoolbarimages.mat and b/software/m2html/private/m2htmltoolbarimages.mat differ diff --git a/testing/unit_tests/test_data/ecoli_textbook.mat b/testing/unit_tests/test_data/ecoli_textbook.mat index 67e344ad..ac202963 100644 Binary files a/testing/unit_tests/test_data/ecoli_textbook.mat and b/testing/unit_tests/test_data/ecoli_textbook.mat differ diff --git a/testing/unit_tests/test_data/expBlastResults.mat b/testing/unit_tests/test_data/expBlastResults.mat index 7b81eac8..051bfa72 100644 Binary files a/testing/unit_tests/test_data/expBlastResults.mat and b/testing/unit_tests/test_data/expBlastResults.mat differ diff --git a/testing/unit_tests/test_data/expCdhitMafftOutput.mat b/testing/unit_tests/test_data/expCdhitMafftOutput.mat index d13059f6..6d80662d 100644 Binary files a/testing/unit_tests/test_data/expCdhitMafftOutput.mat and b/testing/unit_tests/test_data/expCdhitMafftOutput.mat differ diff --git a/testing/unit_tests/test_data/expDiamondResults.mat b/testing/unit_tests/test_data/expDiamondResults.mat index c242a16a..d064d42c 100644 Binary files a/testing/unit_tests/test_data/expDiamondResults.mat and b/testing/unit_tests/test_data/expDiamondResults.mat differ diff --git a/testing/unit_tests/test_data/importExportResults.mat b/testing/unit_tests/test_data/importExportResults.mat index 4a3f9c35..83f86ac8 100644 Binary files a/testing/unit_tests/test_data/importExportResults.mat and b/testing/unit_tests/test_data/importExportResults.mat differ diff --git a/testing/unit_tests/test_data/miriamTestOutput.mat b/testing/unit_tests/test_data/miriamTestOutput.mat index 8035ad4e..d5fc3b09 100644 Binary files a/testing/unit_tests/test_data/miriamTestOutput.mat and b/testing/unit_tests/test_data/miriamTestOutput.mat differ diff --git a/testing/unit_tests/test_data/solverTestOutput.mat b/testing/unit_tests/test_data/solverTestOutput.mat index aaec2fc2..a0e8dd76 100644 Binary files a/testing/unit_tests/test_data/solverTestOutput.mat and b/testing/unit_tests/test_data/solverTestOutput.mat differ diff --git a/tutorial/empty.mat b/tutorial/empty.mat index 7e05da08..936aead1 100644 Binary files a/tutorial/empty.mat and b/tutorial/empty.mat differ diff --git a/tutorial/iMK1208+suppInfo.mat b/tutorial/iMK1208+suppInfo.mat index 86a5fc7c..9aa45a08 100644 Binary files a/tutorial/iMK1208+suppInfo.mat and b/tutorial/iMK1208+suppInfo.mat differ diff --git a/tutorial/pathway.mat b/tutorial/pathway.mat index 5ffd49bc..bc01714f 100644 Binary files a/tutorial/pathway.mat and b/tutorial/pathway.mat differ diff --git a/tutorial/pcPathway.mat b/tutorial/pcPathway.mat index d696b633..acfa80b5 100644 Binary files a/tutorial/pcPathway.mat and b/tutorial/pcPathway.mat differ