Source code for columns

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#    Copyright 2009 Glencoe Software, Inc. All rights reserved.
#    Use is subject to license terms supplied in LICENSE.txt
#

"""
Concrete implementations of the omero.grid.Column
type hierarchy which know how to convert themselves
to PyTables types.
"""

import omero
import Ice
import IceImport
IceImport.load("omero_Tables_ice")
python_sys = __import__("sys")  # Python sys

try:
    import numpy
    tables = __import__("tables")  # Pytables
    has_pytables = True
    if hasattr(tables, "open_file"):
        has_pytables3 = True
    else:
        has_pytables3 = False
except ImportError:
    has_pytables = False



[docs]
def columns2definition(cols):
    """
    Takes a list of columns and converts them into a map
    from names to tables.* column descriptors
    """
    definition = {}
    for i in range(len(cols)):
        column = cols[i]
        instance = column.descriptor(pos=i)
        if column.name in definition:
            raise omero.ApiUsageException(
                None, None, "Duplicate column name: %s" % column.name)
        definition[column.name] = instance
        # Descriptions are handled separately
    return definition




[docs]
class AbstractColumn(object):
    """
    Base logic for all columns
    """

    def __init__(self):
        # Note: don't rely on any properties such as self.name being set if
        # this has been called through Ice
        d = self.descriptor(None)
        if isinstance(d, tables.IsDescription):
            cols = d.columns
            try:
                del cols["_v_pos"]
            except KeyError:
                pass
            self._types = [None] * len(cols)
            self._subnames = [None] * len(cols)
            for k, v in list(cols.items()):
                self._types[v._v_pos] = v.recarrtype
                self._subnames[v._v_pos] = "/" + k

        else:
            self._types = [d.recarrtype]
            self._subnames = [""]


[docs]
    def settable(self, tbl):
        """
        Called by tables.py when first initializing columns.
        Can be used to complete further initialization.
        """
        self.__table = tbl



[docs]
    def append(self, tbl):
        """
        Called by tables.py to give columns. By default, does nothing.
        """
        pass



[docs]
    def readCoordinates(self, tbl, rowNumbers):
        if rowNumbers is None or len(rowNumbers) == 0:
            rows = tbl.read()
        else:
            if has_pytables3:
                rows = tbl.read_coordinates(rowNumbers, field=self.name)
            else:
                rows = tbl.readCoordinates(rowNumbers, field=self.name)
        self.fromrows(rows, field_only=True)



[docs]
    def read(self, tbl, start, stop):
        rows = tbl.read(start, stop, field=self.name)
        self.fromrows(rows, field_only=True)



[docs]
    def getsize(self):
        """
        Any method which does not use the "values" field
        will need to override this method.
        """
        if self.values is None:
            return None
        else:
            return len(self.values)



[docs]
    def setsize(self, size):
        """
        Any method which does not use the "values" field
        will need to override this method.
        """
        if size is None:
            self.values = None
        else:
            self.values = [None for x in range(size)]



[docs]
    def arrays(self):
        """
        Any method which does not use the "values" field
        will need to override this method.
        """
        return [self.values]



[docs]
    def dtypes(self):
        """
        Override this method if descriptor() doesn't return the correct data
        type/size at initialisation- this is mostly a problem for array types
        """
        names = [self.name + sn for sn in self._subnames]
        return list(zip(names, self._types))



[docs]
    def fromrows(self, rows, field_only=False):
        """
        Any method which does not use the "values" field
        will need to override this method.
        """

        if not field_only:
            rows = rows[self.name]

        self.values = rows

        # WORKAROUND:
        # http://www.zeroc.com/forums/bug-reports/4165-icepy-can-not-handle-buffers-longs-i64.html#post20468
        # see ticket:1951 and #2160
        # d = self.recarrtypes[0][1]
        # Disabled until Ice 3.4
        # if isinstance(d, str):
        #     d = numpy.dtype(d)
        # if d.kind == "S" or (d.kind == "i" and d.itemsize == "8"):
        self.values = self.values.tolist()





[docs]
class FileColumnI(AbstractColumn, omero.grid.FileColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.FileColumn.__init__(self, name, *args)
        AbstractColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        return tables.Int64Col(pos=pos)





[docs]
class ImageColumnI(AbstractColumn, omero.grid.ImageColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.ImageColumn.__init__(self, name, *args)
        AbstractColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        return tables.Int64Col(pos=pos)





[docs]
class WellColumnI(AbstractColumn, omero.grid.WellColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.WellColumn.__init__(self, name, *args)
        AbstractColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        return tables.Int64Col(pos=pos)





[docs]
class PlateColumnI(AbstractColumn, omero.grid.PlateColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.PlateColumn.__init__(self, name, *args)
        AbstractColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        return tables.Int64Col(pos=pos)




[docs]
class DatasetColumnI(AbstractColumn, omero.grid.DatasetColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.DatasetColumn.__init__(self, name, *args)
        AbstractColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        return tables.Int64Col(pos=pos)





[docs]
class RoiColumnI(AbstractColumn, omero.grid.RoiColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.RoiColumn.__init__(self, name, *args)
        AbstractColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        return tables.Int64Col(pos=pos)





[docs]
class BoolColumnI(AbstractColumn, omero.grid.BoolColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.BoolColumn.__init__(self, name, *args)
        AbstractColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        return tables.BoolCol(pos=pos)





[docs]
class DoubleColumnI(AbstractColumn, omero.grid.DoubleColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.DoubleColumn.__init__(self, name, *args)
        AbstractColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        return tables.Float64Col(pos=pos)





[docs]
class LongColumnI(AbstractColumn, omero.grid.LongColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.LongColumn.__init__(self, name, *args)
        AbstractColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        return tables.Int64Col(pos=pos)





[docs]
class StringColumnI(AbstractColumn, omero.grid.StringColumn):
    """
    StringColumns are actually numpy dtype 'S': "zero-terminated bytes (not recommended)"
    https://github.com/ome/omero-py/blob/v5.6.dev8/src/omero/columns.py#L269
    https://docs.scipy.org/doc/numpy-1.15.1/reference/arrays.dtypes.html#specifying-and-constructing-data-types
    In any case HDF5 doesn't seem to properly support unicode,
    and numexpr doesn't even pretend to support it:
    - https://github.com/PyTables/PyTables/issues/499
    - https://github.com/pydata/numexpr/issues/142
    - https://github.com/pydata/numexpr/issues/150
    - https://github.com/pydata/numexpr/issues/263
    - https://github.com/pydata/numexpr/blob/v2.7.0/numexpr/necompiler.py#L340-L341

    > import numexpr
    > a = "£"
    > numexpr.evaluate('a=="£"')
    ValueError: unknown type str32
    > b = "£".encode()
    > numexpr.evaluate('b=="£"')
    UnicodeEncodeError: 'ascii' codec can't encode character '\xa3' in position 0: ordinal not in range(128)

    You should be able to store/load unicode data but you can't use
    unicode in a where condition
    """

    def __init__(self, name="Unknown", *args):
        omero.grid.StringColumn.__init__(self, name, *args)
        AbstractColumn.__init__(self)


[docs]
    def settable(self, tbl):
        AbstractColumn.settable(self, tbl)
        self.size = getattr(tbl.cols, self.name).dtype.itemsize



[docs]
    def arrays(self):
        """
        Check for strings longer than the initialised column width
        This will always return bytes
        """
        bytevalues = [v.encode() for v in self.values]
        for bv in bytevalues:
            if len(bv) > self.size:
                raise omero.ValidationException(
                    None, None,
                    "Maximum string (byte) length in column %s is %d" %
                    (self.name, self.size))
        return [bytevalues]



[docs]
    def dtypes(self):
        """
        Overriding to correct for size.
        (Testing suggests this may not be necessary, the size appears to be
        correctly set at initialisation)
        """
        return [(self.name, "S", self.size)]



[docs]
    def descriptor(self, pos):
        # During initialization, size might be zero
        # to prevent exceptions we temporarily assume size 1
        if pos is None:
            return tables.StringCol(pos=pos, itemsize=1)
        if self.size < 1:
            raise omero.ApiUsageException(
                None, None, "String size must be > 0 (Column: %s)"
                % self.name)
        return tables.StringCol(pos=pos, itemsize=self.size)



[docs]
    def fromrows(self, rows, field_only=False):
        AbstractColumn.fromrows(self, rows, field_only=field_only)
        for i, val in enumerate(self.values):
            if isinstance(val, bytes):
                self.values[i] = val.decode("utf-8")





[docs]
class AbstractArrayColumn(AbstractColumn):
    """
    Additional base logic for array columns
    """

    def __init__(self):
        AbstractColumn.__init__(self)


[docs]
    def settable(self, tbl):
        AbstractColumn.settable(self, tbl)

        # Pytables 2.1 has the array size in Column.dtype.shape
        # shape = getattr(tbl.cols, self.name).dtype.shape
        # self.size = shape[0]

        # Pytables 2.2 and later replaced this with Column.shape
        # shape = getattr(tbl.cols, self.name).shape
        # assert(len(shape) == 2)
        # self.size = shape[1]

        # http://www.pytables.org/trac-bck/ticket/231
        # http://www.pytables.org/trac-bck/ticket/232
        # TODO: Clean this up

        # Taken from http://www.pytables.org/trac-bck/changeset/4176
        column = getattr(tbl.cols, self.name)
        self.size = column.descr._v_dtypes[column.name].shape[0]



[docs]
    def arrays(self):
        """
        Arrays of size 1 have to be converted to scalars, otherwise the
        column-to-row conversion in HdfStorage.append() will fail.
        This is messy, but I can't think of a better way.
        """
        for v in self.values:
            if len(v) != self.size:
                raise omero.ValidationException(
                    None, None, "Column %s requires arrays of length %d" %
                    (self.name, self.size))

        if self.size == 1:
            return [[v[0] for v in self.values]]
        return [self.values]



[docs]
    def dtypes(self):
        """
        Overriding to correct for size.
        """
        return [(self.name, self._types[0], self.size)]





[docs]
class FloatArrayColumnI(AbstractArrayColumn, omero.grid.FloatArrayColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.FloatArrayColumn.__init__(self, name, *args)
        AbstractArrayColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        # During initialization, size might be zero
        if pos is None:
            return tables.Float32Col(pos=pos)
        if self.size < 1:
            raise omero.ApiUsageException(
                None, None, "Array length must be > 0 (Column: %s)"
                % self.name)
        return tables.Float32Col(pos=pos, shape=self.size)





[docs]
class DoubleArrayColumnI(AbstractArrayColumn, omero.grid.DoubleArrayColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.DoubleArrayColumn.__init__(self, name, *args)
        AbstractArrayColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        # During initialization, size might be zero
        if pos is None:
            return tables.Float64Col(pos=pos)
        if self.size < 1:
            raise omero.ApiUsageException(
                None, None, "Array length must be > 0 (Column: %s)"
                % self.name)
        return tables.Float64Col(pos=pos, shape=self.size)





[docs]
class LongArrayColumnI(AbstractArrayColumn, omero.grid.LongArrayColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.LongArrayColumn.__init__(self, name, *args)
        AbstractArrayColumn.__init__(self)


[docs]
    def descriptor(self, pos):
        # During initialization, size might be zero
        if pos is None:
            return tables.Int64Col(pos=pos)
        if self.size < 1:
            raise omero.ApiUsageException(
                None, None, "Array length must be > 0 (Column: %s)"
                % self.name)
        return tables.Int64Col(pos=pos, shape=self.size)





[docs]
class MaskColumnI(AbstractColumn, omero.grid.MaskColumn):

    def __init__(self, name="Unknown", *args):
        omero.grid.MaskColumn.__init__(self, name, *args)
        AbstractColumn.__init__(self)

    def __noneorsame(self, a, b):
        if a is None:
            if b is None:
                return
        # a not none
        if b is not None:
            if len(a) == len(b):
                return
        raise omero.ValidationException(None, None, "Columns don't match")

    def __sanitycheck(self):
        self.__noneorsame(self.imageId, self.theZ)
        self.__noneorsame(self.imageId, self.theT)
        self.__noneorsame(self.imageId, self.x)
        self.__noneorsame(self.imageId, self.y)
        self.__noneorsame(self.imageId, self.w)
        self.__noneorsame(self.imageId, self.h)
        self.__noneorsame(self.imageId, self.bytes)


[docs]
    def descriptor(self, pos):
        class MaskDescription(tables.IsDescription):
            _v_pos = pos
            i = tables.Int64Col(pos=0)
            z = tables.Int32Col(pos=1)
            t = tables.Int32Col(pos=2)
            x = tables.Float64Col(pos=3)
            y = tables.Float64Col(pos=4)
            w = tables.Float64Col(pos=5)
            h = tables.Float64Col(pos=6)
        return MaskDescription()



[docs]
    def arrays(self):
        self.__sanitycheck()
        a = [
            self.imageId,
            self.theZ,
            self.theT,
            self.x,
            self.y,
            self.w,
            self.h,
            ]
        return a



[docs]
    def getsize(self):
        self.__sanitycheck()
        if self.imageId is None:
            return None
        else:
            return len(self.imageId)



[docs]
    def setsize(self, size):
        if size is None:
            self.imageId = None
            self.theZ = None
            self.theT = None
            self.x = None
            self.y = None
            self.w = None
            self.h = None
        else:
            dts = self.dtypes()
            self.imageId = numpy.zeroes(size, dtype=dts[0])
            self.theZ = numpy.zeroes(size, dtype=dts[1])
            self.theT = numpy.zeroes(size, dtype=dts[2])
            self.x = numpy.zeroes(size, dtype=dts[3])
            self.y = numpy.zeroes(size, dtype=dts[4])
            self.w = numpy.zeroes(size, dtype=dts[5])
            self.h = numpy.zeroes(size, dtype=dts[6])



[docs]
    def readCoordinates(self, tbl, rowNumbers):
        self.__sanitycheck()
        # calls fromrows
        AbstractColumn.readCoordinates(self, tbl, rowNumbers)
        masks = self._getmasks(tbl)
        if rowNumbers is None or len(rowNumbers) == 0:
            rowNumbers = list(range(masks.nrows))
        self.getbytes(masks, rowNumbers)



[docs]
    def read(self, tbl, start, stop):
        self.__sanitycheck()
        # calls fromrows
        AbstractColumn.read(self, tbl, start, stop)
        masks = self._getmasks(tbl)
        rowNumbers = list(range(start, stop))
        self.getbytes(masks, rowNumbers)



[docs]
    def getbytes(self, masks, rowNumbers):
        self.bytes = []
        for idx in rowNumbers:
            self.bytes.append(masks[idx].tolist())



[docs]
    def fromrows(self, rows, field_only=False):

        if not field_only:
            rows = rows[self.name]

        # WORKAROUND:
        # http://www.zeroc.com/forums/bug-reports/4165-icepy-can-not-handle-buffers-longs-i64.html#post20468
        self.imageId = rows["i"].tolist()
        self.theZ = rows["z"].tolist()  # ticket:1665
        self.theT = rows["t"].tolist()  # ticket:1665
        self.x = rows["x"]
        self.y = rows["y"]
        self.w = rows["w"]
        self.h = rows["h"]



[docs]
    def append(self, tbl):
        self.__sanitycheck()
        masks = self._getmasks(tbl)
        for x in self.bytes:
            if isinstance(x, list):
                # This occurs primarily in testing.
                masks.append(numpy.array(x, dtype=tables.UInt8Atom()))
            else:
                masks.append(numpy.fromstring(x, count=len(x),
                                              dtype=tables.UInt8Atom()))


    def _getmasks(self, tbl):
        n = tbl._v_name
        f = tbl._v_file
        p = tbl._v_parent
        # http://doc.zeroc.com/display/Ice/Basic+Types
        # Ice::Byte can be -128 to 127 OR 0 to 255, but using UInt8 for the
        # moment
        try:
            masks = getattr(p, "%s_masks" % n)
        except tables.NoSuchNodeError:
            if has_pytables3:
                masks = f.create_vlarray(p, "%s_masks" % n, tables.UInt8Atom())
            else:
                masks = f.createVLArray(p, "%s_masks" % n, tables.UInt8Atom())
        return masks


# Helpers
# ========================================================================


# Conversion classes are for omero.model <--> ome.model only (no python)

[docs]
class ObjectFactory(Ice.ObjectFactory):

    def __init__(self, cls, f):
        try:
            self.id = cls.ice_staticId()
        except Exception:
            pass

        self.f = f


[docs]
    def create(self, string):
        return self.f()



[docs]
    def destroy(self):
        pass



[docs]
    def register(self, ic):
        ic.addObjectFactory(self, self.id)




# Object factories
# =========================================================================

ObjectFactories = {
    FileColumnI: ObjectFactory(FileColumnI, lambda: FileColumnI()),
    ImageColumnI: ObjectFactory(ImageColumnI, lambda: ImageColumnI()),
    RoiColumnI: ObjectFactory(RoiColumnI, lambda: RoiColumnI()),
    WellColumnI: ObjectFactory(WellColumnI, lambda: WellColumnI()),
    PlateColumnI: ObjectFactory(PlateColumnI, lambda: PlateColumnI()),
    DatasetColumnI: ObjectFactory(DatasetColumnI, lambda: DatasetColumnI()),
    BoolColumnI: ObjectFactory(BoolColumnI, lambda: BoolColumnI()),
    DoubleColumnI: ObjectFactory(DoubleColumnI, lambda: DoubleColumnI()),
    LongColumnI: ObjectFactory(LongColumnI, lambda: LongColumnI()),
    StringColumnI: ObjectFactory(StringColumnI, lambda: StringColumnI()),
    FloatArrayColumnI: ObjectFactory(
        FloatArrayColumnI, lambda: FloatArrayColumnI()),
    DoubleArrayColumnI: ObjectFactory(
        DoubleArrayColumnI, lambda: DoubleArrayColumnI()),
    LongArrayColumnI: ObjectFactory(
        LongArrayColumnI, lambda: LongArrayColumnI()),
    MaskColumnI: ObjectFactory(MaskColumnI, lambda: MaskColumnI())
    }
Source code for columns

OMERO.py

Navigation

Related Topics