Tiff2ccp4

From PyMOLWiki
Revision as of 09:23, 17 August 2016 by Speleo3 (talk | contribs) (see also load_img_stack)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Overview

This script will convert tiff stacks to CCP4 maps for reading in PyMOL.

See also the load_img_stack script for loading image stacks directly into PyMOL.

If someone knows how to determine the file's dimensions from the file itself (not the image size, but the physical dimension size) please let me know.

Note: requires Python 2.7 or later (for argparse).

Usage

#
# convert input.tiff to output.ccp4 which has cell dimensions
# x=40 A, y=80.5 A, Z=125.2 A
#
python tiff2ccp4.py input.tiff output.ccp4 -x 40 -y 80.5 -z 125.2

#
# print help
#
python tiff2ccp4.py -h

The Code

#
# tiff2ccp4.py -- convert a TIFF stack to a CCP4 map
#
# Author: Jason Vertrees
# Date  : 2011-09-16
#
# Notes : To get help, type "python tiff2ccp4.py -h"
#
import struct
import sys
import string

# key   = field_type
# value = # bytes for that field
field_types = {                                                                                                                
    1 : 1,                                                                                                                     
    2 : 1,                                                                                                                     
    3 : 2,                                                                                                                     
    4 : 4,                                                                                                                     
    5 : 8,                                                                                                                     
    6 : 1,                                                                                                                     
    7 : 1,                                                                                                                     
    8 : 2,                                                                                                                     
    9 : 4,                                                                                                                     
    10: 8,                                                                                                                     
    11: 4,                                                                                                                     
    12: 8 }                                                                                                                    
                                                                                                                               
field_fmt = {                                                                                                                  
    1 : "B",                                                                                                                   
    2 : "c",                                                                                                                   
    3 : "H",                                                                                                                   
    4 : "I",                                                                                                                   
    5 : "II",                                                                                                                  
    6 : "i",                                                                                                                   
    7 : "B",                                                                                                                   
    8 : "h",                                                                                                                   
    9 : "i",                                                                                                                   
    10: "ii",                                                                                                                  
    11: "f",                                                                                                                   
    12: "d" }                                                                                                                  
                                                                                                                               
tag_types = {                                                                                                                  
    "NewSubfileType" : 254,                                                                                                    
    "ImageWidth"  : 256,  # # cols (number of pixels per scanline)                                                             
    "ImageLength" : 257,  # # rows (number of scanlines)                                                                       
    "BitsPerSample" : 258,                                                                                                     
    "PhotometricInterpretation" : 262,                                                                                         
    "ImageDescription" : 270,                                                                                                  
    "StripOffsets" : 273,                                                                                                      
    "SamplesPerPixel" : 277,                                                                                                   
    "RowsPerStrip" : 278,                                                                                                      
    "StripByteCounts" : 279,                                                                                                   
    "XResolution" : 282,                                                                                                       
    "YResolution" : 283,                                                                                                       
    "ZResolution" : 284,                                                                                                       
    "ResolutionUnit" : 296,                                                                                                    
    }


class TIFFStack:

    def __init__(self,filename):

        self._filename = filename
        self._f = open(self._filename, 'rb')
        
        self._file_size = self.get_file_size()

        # read and store the file header

        self._header = self.get_header()

        # read all IFDs and store

        self._IFDs = self.get_IFDs()


    def __str__(self):
        s  = ""
        s += "[TIFFStack]\n"
        s += "Filename   : %s\n" % self._filename
        s += "File size  : %s\n" % self._file_size
        s += "Header     : %s\n" % self._header
        s += "IFDs       : %s\n" % self._IFDs
        return s


    def get_file_size(self):
        """
        file size in bytes
        """
        if not self._f:
            print "Invalid: Must have open file handle."
            return -1

        # less typing

        f = self._f

        # get file size

        curP = f.tell()
        f.seek(0,2)
        sz = f.tell()

        # return to previous location

        f.seek(curP)

        return sz


    def get_header(self):
        return TIFFHeader(self._f)


    def get_IFDs(self):
        return IFD_Store(self._f, self._header)

    def get_data(self):

        f = self._f

        # bits per sample

        bps = 258

        # StripOffsets

        offset = 273

        # byte counts per strip

        byte_count = 279

        sample_format = 339

        data = []

        ifds = self._IFDs._store

        for curIFD in ifds:

            curOffset = curIFD[offset]._val
            curLength = curIFD[byte_count]._val
            curBPS = curIFD[bps]._val
            bytesPerSample = curBPS / 8.0
            
            fld = self.get_field_fmt(curIFD)

            # use "B" for now; support 16-bit later using tag 339 (SampleFormat)
            unpackStr = self._header._e_flg + fld * int(curLength/bytesPerSample)

            f.seek(curOffset)

            data.extend(struct.unpack(unpackStr, f.read(curLength)))

        return data
        
    def get_field_fmt(self,ifd):
        """
        Determines the Python struct code given
        BitsPerSample and SampleFormat from the IFD
        """
        bits_per_sample, sample_fmt = 258, 339

        bps = ifd[bits_per_sample]._val

        fmt = None

        if sample_fmt in ifd.keys():
            fmt = ifd[sample_fmt]._val
        else:
            fmt = 1

        if bps==8:
            # 1-byte unsigned int
            if fmt==1:
                return "B"
            # 1-byte signed
            elif fmt==2:
                return "b"
        elif bps==16:
            # 2-byte unsigned
            if fmt==1:
                return "H"
            elif fmt==2:
                return "h"

    def get_size(self):
        fX, fY = 256, 257
        x = self._IFDs._store[0][fX]._val
        y = self._IFDs._store[0][fY]._val
        z = len(self._IFDs._store)
        
        return x, y, z

    def get_axes_scale(self):
        # for now
        return 1,1,1


    def asCCP4(self,filename,dimX=-1,dimY=-1,dimZ=-1):
        data = self.get_data()
        
        # pixels size x,y,z

        nX, nY, nZ = self.get_size()

        # dimension scaling

        if -1 in (dimX,dimY,dimZ):
            dimX, dimY, dimZ = self.get_axes_scale()
            dimX *= nX
            dimY *= nY
            dimZ *= nZ

        m, avg, M = min(data), sum(data)/len(data), max(data)
        
        outFile = open(filename, 'wb')
        
        outFile.write(struct.pack('i',nX)) # col
        outFile.write(struct.pack('i',nY)) # row
        outFile.write(struct.pack('i',nZ)) # section
        outFile.write(struct.pack('i',2))  # mode = 2
        
        outFile.write(struct.pack('i',1))  # number of first col 
        outFile.write(struct.pack('i',1))  # '' row
        outFile.write(struct.pack('i',1))  # '' section
        
        outFile.write(struct.pack('i',nX)) # nIntervals
        outFile.write(struct.pack('i',nY)) #
        outFile.write(struct.pack('i',nZ)) #
        
        outFile.write(struct.pack('f',dimX)) # length in X
        outFile.write(struct.pack('f',dimY)) # length in Y
        outFile.write(struct.pack('f',dimZ)) # length in Z
        
        outFile.write(struct.pack('f',90.)) # alpha 
        outFile.write(struct.pack('f',90.)) # beta
        outFile.write(struct.pack('f',90.)) # gamma
        
        outFile.write(struct.pack('i',1)) # axis cols
        outFile.write(struct.pack('i',2)) # axis rows
        outFile.write(struct.pack('i',3)) # axis section
        
        outFile.write(struct.pack('f', m)) # min density
        outFile.write(struct.pack('f', avg))  # max density
        outFile.write(struct.pack('f', M))  # mean density
        
        outFile.write(struct.pack('i',0)) # space gp ?

        # header info; blank for us
        for x in range(24,257): outFile.write(struct.pack('i',0))

        # assume constant data in file
        norm = 255.
        bps = tag_types["BitsPerSample"]
        max_bits = self._IFDs._store[0][bps]._val
        norm = float(2**max_bits-1.)

        # read/write data
        for x in data:
            outFile.write(struct.pack('f', x/norm))

        outFile.close()  


class TIFFHeader:
    def __init__(self,fileHandle):
        self._endian, self._e_flg = self.get_endian(fileHandle)
        self._magic_number = self.get_magic_number(fileHandle)
        self._first_IFD = self.get_first_IFDOffset(fileHandle)
        
    def __str__(self):
        s  = "\n"
        s += "  [TIFF Header]\n"
        s += "  Endian        : %s\n" % self._endian
        s += "  Endian Flag   : %s\n" % self._e_flg
        s += "  Magic Number  : %s\n" % self._magic_number
        s += "  IFD[0] Offset : %s" % self._first_IFD
        return s

    # for struct.unpackx
    def _1byte(self,n=1):
        return self._e_flg + "C"*n
    def _2byte(self,n=1):
        return self._e_flg + "H"*n
    def _4byte(self,n=1):
        return self._e_flg + "I"*n
    def _IFDbyte(self,n=1):
        return self._e_flg + "HHII"*n

    def get_endian(self,fileHandle):
        f = fileHandle
        f.seek(0)
        code = struct.unpack("cc", f.read(2))
        code = "".join(code)
        flg = ""
        if code=="II":
            flg = "<"
        elif code=="MM":
            flg = ">"
        else:
            print "This file is not a valid TIFF (bad endian tag)."
            flg = "?"
        return code,flg

    def get_magic_number(self,fileHandle):
        f = fileHandle
        f.seek(2)
        # read the magic number
        idx = 0
        if self._endian == "II": idx = 1
        _42 = struct.unpack(self._2byte(), f.read(2))[0]
        if _42!=42:
            print "Error: Improperly formatted TIFF file (bad magic number)."
            return None
        return _42

    def get_first_IFDOffset(self,fileHandle):
        f=fileHandle
        f.seek(4)
        off = struct.unpack(self._4byte(), f.read(4))[0]
        return off


class IFD_Store:
    def __init__(self,fileHandle,fileHeader):
        self._f = fileHandle
        self._header = fileHeader
        self._first_offset = self._header._first_IFD
        # array of IFDs
        self._store = self.read_ifds()

    def __str__(self):
        s  = "\n"
        s += "  [IFD_Store]\n"
        s += "  First Offset : %d\n" % self._first_offset
        s += "  Store :\n"
        for st in self._store:
            s += "\n\n  IFD Store =>\n"
            for k in st:
                s += "    %s => %s" % (k,st[k])
        return s

    def read_ifds(self):

        f = self._f

        pos = self._first_offset

        ifds = []

        while pos!=0:

            ifds.append({})
            
            # get number of IFD_Entries

            f.seek(pos)
        
            # read number of IFDs
            num_entries = struct.unpack(self._header._2byte(), f.read(2))
            num_entries = num_entries[0]

            # read all into IFD[x]
            for x in range(num_entries):
                # pull the current record from file
                curParams = struct.unpack(self._header._IFDbyte(), f.read(12))

                # format the data if necessary
                if (self._header._e_flg==">" and sys.byteorder=="little") and \
                        curParams[0] not in (270,50838,50839):
                    scale = 32 - (8 * field_types[curParams[1]])
                    scaledData = curParams[3] >> scale
                else:
                    scaledData = curParams[3]


                ifds[-1][curParams[0]] = IFDEntry(curParams[0], curParams[1], curParams[2], scaledData)

            # read next offset
            pos = struct.unpack(self._header._4byte(), f.read(4))[0]

        return ifds

class IFDEntry:
    def __init__(self,theTag=None,theType=None,theCount=None,theValue=None):
        self._tag = theTag
        self._type = theType
        self._count = theCount
        self._val = theValue

    def __str__(self):
        s  = "\n"
        s += "  [IFD_Entry]\n"
        s += " Tag    : %s\n" % self._tag
        s += " Type   : %s\n" % self._type
        s += " Count  : %s\n" % self._count
        s += " Val/Off: %s\n" % self._val
        return s

if __name__=="__main__":

    """
    Running,

    python tiff2ccp4.py

    will convert all TIFF stacks in the current directory to 
    CCP4 maps.
    """
    import argparse
    from string import split

    parser = argparse.ArgumentParser(description="Convert a TIFF Stack to a CCP4 Map")
    parser.add_argument("input", type=str, help="input file name (usually .tif, .tiff)")
    parser.add_argument("output", type=str, help="output file name (usually .ccp4)")
    parser.add_argument('-x',"--x", help="length of x-dimension",default=-1.,type=float)
    parser.add_argument('-y',"--y", help="length of y-dimension",default=-1.,type=float)
    parser.add_argument('-z',"--z", help="length of z-dimension",default=-1.,type=float)
    
    args = parser.parse_args()

    s = TIFFStack(args.input)
    s.asCCP4(args.output, args.x, args.y, args.z)