@@ -433,111 +433,6 @@ def best_structures(uniprot_id, outname=None, outdir=None, seq_ident_cutoff=0.0,
433433 return data
434434
435435
436- def blast_pdb (seq , outfile = '' , outdir = '' , evalue = 0.0001 , seq_ident_cutoff = 0.0 , link = False , force_rerun = False ):
437- """Returns a list of BLAST hits of a sequence to available structures in the PDB.
438-
439- Args:
440- seq (str): Your sequence, in string format
441- outfile (str): Name of output file
442- outdir (str, optional): Path to output directory. Default is the current directory.
443- evalue (float, optional): Cutoff for the E-value - filters for significant hits. 0.001 is liberal, 0.0001 is stringent (default).
444- seq_ident_cutoff (float, optional): Cutoff results based on percent coverage (in decimal form)
445- link (bool, optional): Set to True if a link to the HTML results should be displayed
446- force_rerun (bool, optional): If existing BLAST results should not be used, set to True. Default is False
447-
448- Returns:
449- list: Rank ordered list of BLAST hits in dictionaries.
450-
451- """
452-
453- if len (seq ) < 12 :
454- raise ValueError ('Sequence must be at least 12 residues long.' )
455- if link :
456- page = 'PDB results page: http://www.rcsb.org/pdb/rest/getBlastPDB1?sequence={}&eCutOff={}&maskLowComplexity=yes&matrix=BLOSUM62&outputFormat=HTML' .format (seq , evalue )
457- print (page )
458-
459- parser = etree .XMLParser (ns_clean = True )
460-
461- outfile = op .join (outdir , outfile )
462- if ssbio .utils .force_rerun (force_rerun , outfile ):
463- # Load the BLAST XML results if force_rerun=True
464- page = 'http://www.rcsb.org/pdb/rest/getBlastPDB1?sequence={}&eCutOff={}&maskLowComplexity=yes&matrix=BLOSUM62&outputFormat=XML' .format (
465- seq , evalue )
466- req = requests .get (page )
467- if req .status_code == 200 :
468- response = req .text
469-
470- # Save the XML file
471- if outfile :
472- with open (outfile , 'w' ) as f :
473- f .write (response )
474-
475- # Parse the XML string
476- tree = etree .ElementTree (etree .fromstring (response , parser ))
477- log .debug ('Loaded BLAST results from REST server' )
478- else :
479- log .error ('BLAST request timed out' )
480- return []
481- else :
482- tree = etree .parse (outfile , parser )
483- log .debug ('{}: Loaded existing BLAST XML results' .format (outfile ))
484-
485- # Get length of original sequence to calculate percentages
486- len_orig = float (len (seq ))
487-
488- root = tree .getroot ()
489- hit_list = []
490-
491- for hit in root .findall ('BlastOutput_iterations/Iteration/Iteration_hits/Hit' ):
492- info = {}
493-
494- hitdef = hit .find ('Hit_def' )
495- if hitdef is not None :
496- info ['hit_pdb' ] = hitdef .text .split ('|' )[0 ].split (':' )[0 ].lower ()
497- info ['hit_pdb_chains' ] = hitdef .text .split ('|' )[0 ].split (':' )[2 ].split (',' )
498-
499- # One PDB can align to different parts of the sequence
500- # Will just choose the top hit for this single PDB
501- hsp = hit .findall ('Hit_hsps/Hsp' )[0 ]
502-
503- # Number of identical residues
504- hspi = hsp .find ('Hsp_identity' )
505- if hspi is not None :
506- info ['hit_num_ident' ] = int (hspi .text )
507- info ['hit_percent_ident' ] = int (hspi .text )/ len_orig
508-
509- if int (hspi .text )/ len_orig < seq_ident_cutoff :
510- log .debug ('{}: does not meet sequence identity cutoff' .format (hitdef .text .split ('|' )[0 ].split (':' )[0 ]))
511- continue
512-
513- # Number of similar residues (positive hits)
514- hspp = hsp .find ('Hsp_positive' )
515- if hspp is not None :
516- info ['hit_num_similar' ] = int (hspp .text )
517- info ['hit_percent_similar' ] = int (hspp .text ) / len_orig
518-
519- # Total number of gaps (unable to align in either query or subject)
520- hspg = hsp .find ('Hsp_gaps' )
521- if hspg is not None :
522- info ['hit_num_gaps' ] = int (hspg .text )
523- info ['hit_percent_gaps' ] = int (hspg .text ) / len_orig
524-
525- # E-value of BLAST
526- hspe = hsp .find ('Hsp_evalue' )
527- if hspe is not None :
528- info ['hit_evalue' ] = float (hspe .text )
529-
530- # Score of BLAST
531- hsps = hsp .find ('Hsp_score' )
532- if hsps is not None :
533- info ['hit_score' ] = float (hsps .text )
534-
535- hit_list .append (info )
536-
537- log .debug ("{}: Number of BLAST hits" .format (len (hit_list )))
538- return hit_list
539-
540-
541436def blast_pdb_df (blast_results ):
542437 """Make a dataframe of BLAST results"""
543438 cols = ['hit_pdb' , 'hit_pdb_chains' , 'hit_evalue' , 'hit_score' , 'hit_num_ident' , 'hit_percent_ident' ,
@@ -738,7 +633,7 @@ def get_bioassembly_info(pdb_id, biomol_num, cache=False, outdir=None, force_rer
738633def download_biomol (pdb_id , biomol_num , outdir , file_type = 'pdb' , force_rerun = False ):
739634 import zlib
740635 from six .moves .urllib_error import URLError
741- from six .moves .urllib .request import urlopen , urlretrieve
636+ from six .moves .urllib .request import urlopen
742637 import contextlib
743638
744639 ssbio .utils .make_dir (outdir )
0 commit comments