Tiff2ccp4

From PyMOLWiki
Jump to: navigation, search

Overview

This script will convert tiff stacks to CCP4 maps for reading in PyMOL.

If someone knows how to determine the file's dimensions from the file itself (not the image size, but the physical dimension size) please let me know.

Note: requires Python 2.7 or later (for argparse).

Usage

#
# convert input.tiff to output.ccp4 which has cell dimensions
# x=40 A, y=80.5 A, Z=125.2 A
#
python tiff2ccp4.py input.tiff output.ccp4 -x 40 -y 80.5 -z 125.2
 
#
# print help
#
python tiff2ccp4.py -h

The Code

#
# tiff2ccp4.py -- convert a TIFF stack to a CCP4 map
#
# Author: Jason Vertrees
# Date  : 2011-09-16
#
# Notes : To get help, type "python tiff2ccp4.py -h"
#
import struct
import sys
import string
 
# key   = field_type
# value = # bytes for that field
field_types = {                                                                                                                
    1 : 1,                                                                                                                     
    2 : 1,                                                                                                                     
    3 : 2,                                                                                                                     
    4 : 4,                                                                                                                     
    5 : 8,                                                                                                                     
    6 : 1,                                                                                                                     
    7 : 1,                                                                                                                     
    8 : 2,                                                                                                                     
    9 : 4,                                                                                                                     
    10: 8,                                                                                                                     
    11: 4,                                                                                                                     
    12: 8 }                                                                                                                    
 
field_fmt = {                                                                                                                  
    1 : "B",                                                                                                                   
    2 : "c",                                                                                                                   
    3 : "H",                                                                                                                   
    4 : "I",                                                                                                                   
    5 : "II",                                                                                                                  
    6 : "i",                                                                                                                   
    7 : "B",                                                                                                                   
    8 : "h",                                                                                                                   
    9 : "i",                                                                                                                   
    10: "ii",                                                                                                                  
    11: "f",                                                                                                                   
    12: "d" }                                                                                                                  
 
tag_types = {                                                                                                                  
    "NewSubfileType" : 254,                                                                                                    
    "ImageWidth"  : 256,  # # cols (number of pixels per scanline)                                                             
    "ImageLength" : 257,  # # rows (number of scanlines)                                                                       
    "BitsPerSample" : 258,                                                                                                     
    "PhotometricInterpretation" : 262,                                                                                         
    "ImageDescription" : 270,                                                                                                  
    "StripOffsets" : 273,                                                                                                      
    "SamplesPerPixel" : 277,                                                                                                   
    "RowsPerStrip" : 278,                                                                                                      
    "StripByteCounts" : 279,                                                                                                   
    "XResolution" : 282,                                                                                                       
    "YResolution" : 283,                                                                                                       
    "ZResolution" : 284,                                                                                                       
    "ResolutionUnit" : 296,                                                                                                    
    }
 
 
class TIFFStack:
 
    def __init__(self,filename):
 
        self._filename = filename
        self._f = open(self._filename, 'rb')
 
        self._file_size = self.get_file_size()
 
        # read and store the file header
 
        self._header = self.get_header()
 
        # read all IFDs and store
 
        self._IFDs = self.get_IFDs()
 
 
    def __str__(self):
        s  = ""
        s += "[TIFFStack]\n"
        s += "Filename   : %s\n" % self._filename
        s += "File size  : %s\n" % self._file_size
        s += "Header     : %s\n" % self._header
        s += "IFDs       : %s\n" % self._IFDs
        return s
 
 
    def get_file_size(self):
        """
        file size in bytes
        """
        if not self._f:
            print "Invalid: Must have open file handle."
            return -1
 
        # less typing
 
        f = self._f
 
        # get file size
 
        curP = f.tell()
        f.seek(0,2)
        sz = f.tell()
 
        # return to previous location
 
        f.seek(curP)
 
        return sz
 
 
    def get_header(self):
        return TIFFHeader(self._f)
 
 
    def get_IFDs(self):
        return IFD_Store(self._f, self._header)
 
    def get_data(self):
 
        f = self._f
 
        # bits per sample
 
        bps = 258
 
        # StripOffsets
 
        offset = 273
 
        # byte counts per strip
 
        byte_count = 279
 
        sample_format = 339
 
        data = []
 
        ifds = self._IFDs._store
 
        for curIFD in ifds:
 
            curOffset = curIFD[offset]._val
            curLength = curIFD[byte_count]._val
            curBPS = curIFD[bps]._val
            bytesPerSample = curBPS / 8.0
 
            fld = self.get_field_fmt(curIFD)
 
            # use "B" for now; support 16-bit later using tag 339 (SampleFormat)
            unpackStr = self._header._e_flg + fld * int(curLength/bytesPerSample)
 
            f.seek(curOffset)
 
            data.extend(struct.unpack(unpackStr, f.read(curLength)))
 
        return data
 
    def get_field_fmt(self,ifd):
        """
        Determines the Python struct code given
        BitsPerSample and SampleFormat from the IFD
        """
        bits_per_sample, sample_fmt = 258, 339
 
        bps = ifd[bits_per_sample]._val
 
        fmt = None
 
        if sample_fmt in ifd.keys():
            fmt = ifd[sample_fmt]._val
        else:
            fmt = 1
 
        if bps==8:
            # 1-byte unsigned int
            if fmt==1:
                return "B"
            # 1-byte signed
            elif fmt==2:
                return "b"
        elif bps==16:
            # 2-byte unsigned
            if fmt==1:
                return "H"
            elif fmt==2:
                return "h"
 
    def get_size(self):
        fX, fY = 256, 257
        x = self._IFDs._store[0][fX]._val
        y = self._IFDs._store[0][fY]._val
        z = len(self._IFDs._store)
 
        return x, y, z
 
    def get_axes_scale(self):
        # for now
        return 1,1,1
 
 
    def asCCP4(self,filename,dimX=-1,dimY=-1,dimZ=-1):
        data = self.get_data()
 
        # pixels size x,y,z
 
        nX, nY, nZ = self.get_size()
 
        # dimension scaling
 
        if -1 in (dimX,dimY,dimZ):
            dimX, dimY, dimZ = self.get_axes_scale()
            dimX *= nX
            dimY *= nY
            dimZ *= nZ
 
        m, avg, M = min(data), sum(data)/len(data), max(data)
 
        outFile = open(filename, 'wb')
 
        outFile.write(struct.pack('i',nX)) # col
        outFile.write(struct.pack('i',nY)) # row
        outFile.write(struct.pack('i',nZ)) # section
        outFile.write(struct.pack('i',2))  # mode = 2
 
        outFile.write(struct.pack('i',1))  # number of first col 
        outFile.write(struct.pack('i',1))  # '' row
        outFile.write(struct.pack('i',1))  # '' section
 
        outFile.write(struct.pack('i',nX)) # nIntervals
        outFile.write(struct.pack('i',nY)) #
        outFile.write(struct.pack('i',nZ)) #
 
        outFile.write(struct.pack('f',dimX)) # length in X
        outFile.write(struct.pack('f',dimY)) # length in Y
        outFile.write(struct.pack('f',dimZ)) # length in Z
 
        outFile.write(struct.pack('f',90.)) # alpha 
        outFile.write(struct.pack('f',90.)) # beta
        outFile.write(struct.pack('f',90.)) # gamma
 
        outFile.write(struct.pack('i',1)) # axis cols
        outFile.write(struct.pack('i',2)) # axis rows
        outFile.write(struct.pack('i',3)) # axis section
 
        outFile.write(struct.pack('f', m)) # min density
        outFile.write(struct.pack('f', avg))  # max density
        outFile.write(struct.pack('f', M))  # mean density
 
        outFile.write(struct.pack('i',0)) # space gp ?
 
        # header info; blank for us
        for x in range(24,257): outFile.write(struct.pack('i',0))
 
        # assume constant data in file
        norm = 255.
        bps = tag_types["BitsPerSample"]
        max_bits = self._IFDs._store[0][bps]._val
        norm = float(2**max_bits-1.)
 
        # read/write data
        for x in data:
            outFile.write(struct.pack('f', x/norm))
 
        outFile.close()  
 
 
class TIFFHeader:
    def __init__(self,fileHandle):
        self._endian, self._e_flg = self.get_endian(fileHandle)
        self._magic_number = self.get_magic_number(fileHandle)
        self._first_IFD = self.get_first_IFDOffset(fileHandle)
 
    def __str__(self):
        s  = "\n"
        s += "  [TIFF Header]\n"
        s += "  Endian        : %s\n" % self._endian
        s += "  Endian Flag   : %s\n" % self._e_flg
        s += "  Magic Number  : %s\n" % self._magic_number
        s += "  IFD[0] Offset : %s" % self._first_IFD
        return s
 
    # for struct.unpackx
    def _1byte(self,n=1):
        return self._e_flg + "C"*n
    def _2byte(self,n=1):
        return self._e_flg + "H"*n
    def _4byte(self,n=1):
        return self._e_flg + "I"*n
    def _IFDbyte(self,n=1):
        return self._e_flg + "HHII"*n
 
    def get_endian(self,fileHandle):
        f = fileHandle
        f.seek(0)
        code = struct.unpack("cc", f.read(2))
        code = "".join(code)
        flg = ""
        if code=="II":
            flg = "<"
        elif code=="MM":
            flg = ">"
        else:
            print "This file is not a valid TIFF (bad endian tag)."
            flg = "?"
        return code,flg
 
    def get_magic_number(self,fileHandle):
        f = fileHandle
        f.seek(2)
        # read the magic number
        idx = 0
        if self._endian == "II": idx = 1
        _42 = struct.unpack(self._2byte(), f.read(2))[0]
        if _42!=42:
            print "Error: Improperly formatted TIFF file (bad magic number)."
            return None
        return _42
 
    def get_first_IFDOffset(self,fileHandle):
        f=fileHandle
        f.seek(4)
        off = struct.unpack(self._4byte(), f.read(4))[0]
        return off
 
 
class IFD_Store:
    def __init__(self,fileHandle,fileHeader):
        self._f = fileHandle
        self._header = fileHeader
        self._first_offset = self._header._first_IFD
        # array of IFDs
        self._store = self.read_ifds()
 
    def __str__(self):
        s  = "\n"
        s += "  [IFD_Store]\n"
        s += "  First Offset : %d\n" % self._first_offset
        s += "  Store :\n"
        for st in self._store:
            s += "\n\n  IFD Store =>\n"
            for k in st:
                s += "    %s => %s" % (k,st[k])
        return s
 
    def read_ifds(self):
 
        f = self._f
 
        pos = self._first_offset
 
        ifds = []
 
        while pos!=0:
 
            ifds.append({})
 
            # get number of IFD_Entries
 
            f.seek(pos)
 
            # read number of IFDs
            num_entries = struct.unpack(self._header._2byte(), f.read(2))
            num_entries = num_entries[0]
 
            # read all into IFD[x]
            for x in range(num_entries):
                # pull the current record from file
                curParams = struct.unpack(self._header._IFDbyte(), f.read(12))
 
                # format the data if necessary
                if (self._header._e_flg==">" and sys.byteorder=="little") and \
                        curParams[0] not in (270,50838,50839):
                    scale = 32 - (8 * field_types[curParams[1]])
                    scaledData = curParams[3] >> scale
                else:
                    scaledData = curParams[3]
 
 
                ifds[-1][curParams[0]] = IFDEntry(curParams[0], curParams[1], curParams[2], scaledData)
 
            # read next offset
            pos = struct.unpack(self._header._4byte(), f.read(4))[0]
 
        return ifds
 
class IFDEntry:
    def __init__(self,theTag=None,theType=None,theCount=None,theValue=None):
        self._tag = theTag
        self._type = theType
        self._count = theCount
        self._val = theValue
 
    def __str__(self):
        s  = "\n"
        s += "  [IFD_Entry]\n"
        s += " Tag    : %s\n" % self._tag
        s += " Type   : %s\n" % self._type
        s += " Count  : %s\n" % self._count
        s += " Val/Off: %s\n" % self._val
        return s
 
if __name__=="__main__":
 
    """
    Running,
 
    python tiff2ccp4.py
 
    will convert all TIFF stacks in the current directory to 
    CCP4 maps.
    """
    import argparse
    from string import split
 
    parser = argparse.ArgumentParser(description="Convert a TIFF Stack to a CCP4 Map")
    parser.add_argument("input", type=str, help="input file name (usually .tif, .tiff)")
    parser.add_argument("output", type=str, help="output file name (usually .ccp4)")
    parser.add_argument('-x',"--x", help="length of x-dimension",default=-1.,type=float)
    parser.add_argument('-y',"--y", help="length of y-dimension",default=-1.,type=float)
    parser.add_argument('-z',"--z", help="length of z-dimension",default=-1.,type=float)
 
    args = parser.parse_args()
 
    s = TIFFStack(args.input)
    s.asCCP4(args.output, args.x, args.y, args.z)
Personal tools
Namespaces
Variants
Actions
Navigation
Toolbox