Grepsel

From PyMOLWiki
Revision as of 12:11, 20 July 2005 by Gawells (talk | contribs)
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
#Create named selections using regular expressions for the protein sequence

import pymol
import re

aa = { 'ASP' : 'D' , 'GLU' : 'E' , 'GLN' : 'Q' , 'ASN' : 'N' , 'SER' : 'S' ,
       'THR' : 'T' , 'CYS' : 'C' , 'HIS' : 'H' , 'ARG' : 'R' , 'LYS' : 'K' ,
       'MET' : 'M' , 'ALA' : 'A' , 'ILE' : 'I' , 'LEU' : 'L' , 'VAL' : 'V' ,
       'GLY' : 'G' , 'PRO' : 'P' , 'TRP' : 'W' , 'PHE' : 'F' , 'TYR' : 'Y' ,
       'SCY' : 'U' , 'ASX' : 'B' , 'GLX' : 'Z' , 'XXX' : 'X'}

#made this before the sequence view option, probably another way to do it now

def seqoneint(model):
   pymol.stored.seq = []
   cmd.iterate("%s and name ca"%model,"stored.seq.append(resn)")
   seq = ""
   for x in pymol.stored.seq:
      if aa.has_key(x):
         res = aa[x]
         seq = seq+res
      else:
         seq = seq + '-'
   return seq



def grepsel(model="(all)",stretch="",prefix="",combined="0",single="1"):
   '''
DESCRIPTION

    Make selections matching regular expressions. Selections are labelled
    as "prefix_expression_###", where ### is the index for the first residue
    of the match. Prefix defaults to model name. combined = 1 creates one
    selection for all occurences. single = 1 creates one selection for each
    occurance (the default).
    
USAGE

    grepsel selection, expression, [prefix, [combined, [single ]]]

EXAMPLES

    Make selections for all motifs matching "ESS" (model_ESS_###,...):
    grepsel model, ess

    Make selections for the PxGY motif with prefix m (m_P.CY_###,...):
    grepsel model, p.gy, m
        
    '''

   
   if model == "(all)":
      model = "all"
   if prefix == "":
      prefix=model

   stretch = stretch.upper() 
   seq = seqoneint(model)
   pymol.stored.resi = []
   pymol.stored.chain = []
   cmd.iterate("%s and name ca"%model,"stored.resi.append(resi);stored.chain.append(chain)")
   motif = re.compile(stretch)
   occurrences = motif.finditer(seq)
   if combined == "1":
      cmd.do("select %s_%s, none"%(prefix,stretch))

   for match in occurrences:
      fx = match.start()
      pos = 0
      for fy in range(int(fx),int(fx)+len(stretch)):
         ch = pymol.stored.chain[fy]
         ri = pymol.stored.resi[fy]
         if pos == 0:
            pos = ri
            if single == "1":
               cmd.select("%s_%s_%s%s"%(prefix,stretch,pos,ch), "none")            
         if single == "1":
            cmd.select("%s_%s_%s%s"%(prefix,stretch,pos,ch), "%s_%s_%s%s | %s/%s/"%(prefix,stretch,pos,ch,ch,ri))
         if combined == "1":
            cmd.select("%s_%s"%(prefix,stretch), "%s_%s | %s//"%(prefix,stretch,ri))
   cmd.select("none")
   cmd.delete("sel*")

cmd.extend("grepsel",grepsel)