Grepsel

From PyMOLWiki
Revision as of 11:11, 20 July 2005 by Gawells (talk | contribs)
Jump to navigation Jump to search
#Create named selections using regular expressions for the protein sequence

import pymol
import re

aa = { 'ASP' : 'D' , 'GLU' : 'E' , 'GLN' : 'Q' , 'ASN' : 'N' , 'SER' : 'S' ,
       'THR' : 'T' , 'CYS' : 'C' , 'HIS' : 'H' , 'ARG' : 'R' , 'LYS' : 'K' ,
       'MET' : 'M' , 'ALA' : 'A' , 'ILE' : 'I' , 'LEU' : 'L' , 'VAL' : 'V' ,
       'GLY' : 'G' , 'PRO' : 'P' , 'TRP' : 'W' , 'PHE' : 'F' , 'TYR' : 'Y' ,
       'SCY' : 'U' , 'ASX' : 'B' , 'GLX' : 'Z' , 'XXX' : 'X'}

#made this before the sequence view option, probably another way to do it now

def seqoneint(model):
   pymol.stored.seq = []
   cmd.iterate("%s and name ca"%model,"stored.seq.append(resn)")
   seq = ""
   for x in pymol.stored.seq:
      if aa.has_key(x):
         res = aa[x]
         seq = seq+res
      else:
         seq = seq + '-'
   return seq



def grepsel(model="(all)",stretch="",prefix="",combined="0",single="1"):
   '''
DESCRIPTION

    Make selections matching regular expressions. Selections are labelled
    as "prefix_expression_###", where ### is the index for the first residue
    of the match. Prefix defaults to model name. combined = 1 creates one
    selection for all occurences. single = 1 creates one selection for each
    occurance (the default).
    
USAGE

    grepsel selection, expression, [prefix, [combined, [single ]]]

EXAMPLES

    Make selections for all motifs matching "ESS" (model_ESS_###,...):
    grepsel model, ess

    Make selections for the PxGY motif with prefix m (m_P.CY_###,...):
    grepsel model, p.gy, m
        
    '''

   
   if model == "(all)":
      model = "all"
   if prefix == "":
      prefix=model

   stretch = stretch.upper() 
   seq = seqoneint(model)
   pymol.stored.resi = []
   pymol.stored.chain = []
   cmd.iterate("%s and name ca"%model,"stored.resi.append(resi);stored.chain.append(chain)")
   motif = re.compile(stretch)
   occurrences = motif.finditer(seq)
   if combined == "1":
      cmd.do("select %s_%s, none"%(prefix,stretch))

   for match in occurrences:
      fx = match.start()
      pos = 0
      for fy in range(int(fx),int(fx)+len(stretch)):
         ch = pymol.stored.chain[fy]
         ri = pymol.stored.resi[fy]
         if pos == 0:
            pos = ri
            if single == "1":
               cmd.select("%s_%s_%s%s"%(prefix,stretch,pos,ch), "none")            
         if single == "1":
            cmd.select("%s_%s_%s%s"%(prefix,stretch,pos,ch), "%s_%s_%s%s | %s/%s/"%(prefix,stretch,pos,ch,ch,ri))
         if combined == "1":
            cmd.select("%s_%s"%(prefix,stretch), "%s_%s | %s//"%(prefix,stretch,ri))
   cmd.select("none")
   cmd.delete("sel*")

cmd.extend("grepsel",grepsel)