Source code for columns

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#    Copyright 2009 Glencoe Software, Inc. All rights reserved.
#    Use is subject to license terms supplied in LICENSE.txt
#

"""
Concrete implementations of the omero.grid.Column
type hierarchy which know how to convert themselves
to PyTables types.
"""

import omero
import Ice
import IceImport
IceImport.load("omero_Tables_ice")
python_sys = __import__("sys")  # Python sys

try:
    import numpy
    tables = __import__("tables")  # Pytables
    has_pytables = True
    if hasattr(tables, "open_file"):
        has_pytables3 = True
    else:
        has_pytables3 = False
except ImportError:
    has_pytables = False


[docs] def columns2definition(cols): """ Takes a list of columns and converts them into a map from names to tables.* column descriptors """ definition = {} for i in range(len(cols)): column = cols[i] instance = column.descriptor(pos=i) if column.name in definition: raise omero.ApiUsageException( None, None, "Duplicate column name: %s" % column.name) definition[column.name] = instance # Descriptions are handled separately return definition
[docs] class AbstractColumn(object): """ Base logic for all columns """ def __init__(self): # Note: don't rely on any properties such as self.name being set if # this has been called through Ice d = self.descriptor(None) if isinstance(d, tables.IsDescription): cols = d.columns try: del cols["_v_pos"] except KeyError: pass self._types = [None] * len(cols) self._subnames = [None] * len(cols) for k, v in list(cols.items()): self._types[v._v_pos] = v.recarrtype self._subnames[v._v_pos] = "/" + k else: self._types = [d.recarrtype] self._subnames = [""]
[docs] def settable(self, tbl): """ Called by tables.py when first initializing columns. Can be used to complete further initialization. """ self.__table = tbl
[docs] def append(self, tbl): """ Called by tables.py to give columns. By default, does nothing. """ pass
[docs] def readCoordinates(self, tbl, rowNumbers): if rowNumbers is None or len(rowNumbers) == 0: rows = tbl.read() else: if has_pytables3: rows = tbl.read_coordinates(rowNumbers, field=self.name) else: rows = tbl.readCoordinates(rowNumbers, field=self.name) self.fromrows(rows, field_only=True)
[docs] def read(self, tbl, start, stop): rows = tbl.read(start, stop, field=self.name) self.fromrows(rows, field_only=True)
[docs] def getsize(self): """ Any method which does not use the "values" field will need to override this method. """ if self.values is None: return None else: return len(self.values)
[docs] def setsize(self, size): """ Any method which does not use the "values" field will need to override this method. """ if size is None: self.values = None else: self.values = [None for x in range(size)]
[docs] def arrays(self): """ Any method which does not use the "values" field will need to override this method. """ return [self.values]
[docs] def dtypes(self): """ Override this method if descriptor() doesn't return the correct data type/size at initialisation- this is mostly a problem for array types """ names = [self.name + sn for sn in self._subnames] return list(zip(names, self._types))
[docs] def fromrows(self, rows, field_only=False): """ Any method which does not use the "values" field will need to override this method. """ if not field_only: rows = rows[self.name] self.values = rows # WORKAROUND: # http://www.zeroc.com/forums/bug-reports/4165-icepy-can-not-handle-buffers-longs-i64.html#post20468 # see ticket:1951 and #2160 # d = self.recarrtypes[0][1] # Disabled until Ice 3.4 # if isinstance(d, str): # d = numpy.dtype(d) # if d.kind == "S" or (d.kind == "i" and d.itemsize == "8"): self.values = self.values.tolist()
[docs] class FileColumnI(AbstractColumn, omero.grid.FileColumn): def __init__(self, name="Unknown", *args): omero.grid.FileColumn.__init__(self, name, *args) AbstractColumn.__init__(self)
[docs] def descriptor(self, pos): return tables.Int64Col(pos=pos)
[docs] class ImageColumnI(AbstractColumn, omero.grid.ImageColumn): def __init__(self, name="Unknown", *args): omero.grid.ImageColumn.__init__(self, name, *args) AbstractColumn.__init__(self)
[docs] def descriptor(self, pos): return tables.Int64Col(pos=pos)
[docs] class WellColumnI(AbstractColumn, omero.grid.WellColumn): def __init__(self, name="Unknown", *args): omero.grid.WellColumn.__init__(self, name, *args) AbstractColumn.__init__(self)
[docs] def descriptor(self, pos): return tables.Int64Col(pos=pos)
[docs] class PlateColumnI(AbstractColumn, omero.grid.PlateColumn): def __init__(self, name="Unknown", *args): omero.grid.PlateColumn.__init__(self, name, *args) AbstractColumn.__init__(self)
[docs] def descriptor(self, pos): return tables.Int64Col(pos=pos)
[docs] class DatasetColumnI(AbstractColumn, omero.grid.DatasetColumn): def __init__(self, name="Unknown", *args): omero.grid.DatasetColumn.__init__(self, name, *args) AbstractColumn.__init__(self)
[docs] def descriptor(self, pos): return tables.Int64Col(pos=pos)
[docs] class RoiColumnI(AbstractColumn, omero.grid.RoiColumn): def __init__(self, name="Unknown", *args): omero.grid.RoiColumn.__init__(self, name, *args) AbstractColumn.__init__(self)
[docs] def descriptor(self, pos): return tables.Int64Col(pos=pos)
[docs] class BoolColumnI(AbstractColumn, omero.grid.BoolColumn): def __init__(self, name="Unknown", *args): omero.grid.BoolColumn.__init__(self, name, *args) AbstractColumn.__init__(self)
[docs] def descriptor(self, pos): return tables.BoolCol(pos=pos)
[docs] class DoubleColumnI(AbstractColumn, omero.grid.DoubleColumn): def __init__(self, name="Unknown", *args): omero.grid.DoubleColumn.__init__(self, name, *args) AbstractColumn.__init__(self)
[docs] def descriptor(self, pos): return tables.Float64Col(pos=pos)
[docs] class LongColumnI(AbstractColumn, omero.grid.LongColumn): def __init__(self, name="Unknown", *args): omero.grid.LongColumn.__init__(self, name, *args) AbstractColumn.__init__(self)
[docs] def descriptor(self, pos): return tables.Int64Col(pos=pos)
[docs] class StringColumnI(AbstractColumn, omero.grid.StringColumn): """ StringColumns are actually numpy dtype 'S': "zero-terminated bytes (not recommended)" https://github.com/ome/omero-py/blob/v5.6.dev8/src/omero/columns.py#L269 https://docs.scipy.org/doc/numpy-1.15.1/reference/arrays.dtypes.html#specifying-and-constructing-data-types In any case HDF5 doesn't seem to properly support unicode, and numexpr doesn't even pretend to support it: - https://github.com/PyTables/PyTables/issues/499 - https://github.com/pydata/numexpr/issues/142 - https://github.com/pydata/numexpr/issues/150 - https://github.com/pydata/numexpr/issues/263 - https://github.com/pydata/numexpr/blob/v2.7.0/numexpr/necompiler.py#L340-L341 > import numexpr > a = "£" > numexpr.evaluate('a=="£"') ValueError: unknown type str32 > b = "£".encode() > numexpr.evaluate('b=="£"') UnicodeEncodeError: 'ascii' codec can't encode character '\xa3' in position 0: ordinal not in range(128) You should be able to store/load unicode data but you can't use unicode in a where condition """ def __init__(self, name="Unknown", *args): omero.grid.StringColumn.__init__(self, name, *args) AbstractColumn.__init__(self)
[docs] def settable(self, tbl): AbstractColumn.settable(self, tbl) self.size = getattr(tbl.cols, self.name).dtype.itemsize
[docs] def arrays(self): """ Check for strings longer than the initialised column width This will always return bytes """ bytevalues = [v.encode() for v in self.values] for bv in bytevalues: if len(bv) > self.size: raise omero.ValidationException( None, None, "Maximum string (byte) length in column %s is %d" % (self.name, self.size)) return [bytevalues]
[docs] def dtypes(self): """ Overriding to correct for size. (Testing suggests this may not be necessary, the size appears to be correctly set at initialisation) """ return [(self.name, "S", self.size)]
[docs] def descriptor(self, pos): # During initialization, size might be zero # to prevent exceptions we temporarily assume size 1 if pos is None: return tables.StringCol(pos=pos, itemsize=1) if self.size < 1: raise omero.ApiUsageException( None, None, "String size must be > 0 (Column: %s)" % self.name) return tables.StringCol(pos=pos, itemsize=self.size)
[docs] def fromrows(self, rows, field_only=False): AbstractColumn.fromrows(self, rows, field_only=field_only) for i, val in enumerate(self.values): if isinstance(val, bytes): self.values[i] = val.decode("utf-8")
[docs] class AbstractArrayColumn(AbstractColumn): """ Additional base logic for array columns """ def __init__(self): AbstractColumn.__init__(self)
[docs] def settable(self, tbl): AbstractColumn.settable(self, tbl) # Pytables 2.1 has the array size in Column.dtype.shape # shape = getattr(tbl.cols, self.name).dtype.shape # self.size = shape[0] # Pytables 2.2 and later replaced this with Column.shape # shape = getattr(tbl.cols, self.name).shape # assert(len(shape) == 2) # self.size = shape[1] # http://www.pytables.org/trac-bck/ticket/231 # http://www.pytables.org/trac-bck/ticket/232 # TODO: Clean this up # Taken from http://www.pytables.org/trac-bck/changeset/4176 column = getattr(tbl.cols, self.name) self.size = column.descr._v_dtypes[column.name].shape[0]
[docs] def arrays(self): """ Arrays of size 1 have to be converted to scalars, otherwise the column-to-row conversion in HdfStorage.append() will fail. This is messy, but I can't think of a better way. """ for v in self.values: if len(v) != self.size: raise omero.ValidationException( None, None, "Column %s requires arrays of length %d" % (self.name, self.size)) if self.size == 1: return [[v[0] for v in self.values]] return [self.values]
[docs] def dtypes(self): """ Overriding to correct for size. """ return [(self.name, self._types[0], self.size)]
[docs] class FloatArrayColumnI(AbstractArrayColumn, omero.grid.FloatArrayColumn): def __init__(self, name="Unknown", *args): omero.grid.FloatArrayColumn.__init__(self, name, *args) AbstractArrayColumn.__init__(self)
[docs] def descriptor(self, pos): # During initialization, size might be zero if pos is None: return tables.Float32Col(pos=pos) if self.size < 1: raise omero.ApiUsageException( None, None, "Array length must be > 0 (Column: %s)" % self.name) return tables.Float32Col(pos=pos, shape=self.size)
[docs] class DoubleArrayColumnI(AbstractArrayColumn, omero.grid.DoubleArrayColumn): def __init__(self, name="Unknown", *args): omero.grid.DoubleArrayColumn.__init__(self, name, *args) AbstractArrayColumn.__init__(self)
[docs] def descriptor(self, pos): # During initialization, size might be zero if pos is None: return tables.Float64Col(pos=pos) if self.size < 1: raise omero.ApiUsageException( None, None, "Array length must be > 0 (Column: %s)" % self.name) return tables.Float64Col(pos=pos, shape=self.size)
[docs] class LongArrayColumnI(AbstractArrayColumn, omero.grid.LongArrayColumn): def __init__(self, name="Unknown", *args): omero.grid.LongArrayColumn.__init__(self, name, *args) AbstractArrayColumn.__init__(self)
[docs] def descriptor(self, pos): # During initialization, size might be zero if pos is None: return tables.Int64Col(pos=pos) if self.size < 1: raise omero.ApiUsageException( None, None, "Array length must be > 0 (Column: %s)" % self.name) return tables.Int64Col(pos=pos, shape=self.size)
[docs] class MaskColumnI(AbstractColumn, omero.grid.MaskColumn): def __init__(self, name="Unknown", *args): omero.grid.MaskColumn.__init__(self, name, *args) AbstractColumn.__init__(self) def __noneorsame(self, a, b): if a is None: if b is None: return # a not none if b is not None: if len(a) == len(b): return raise omero.ValidationException(None, None, "Columns don't match") def __sanitycheck(self): self.__noneorsame(self.imageId, self.theZ) self.__noneorsame(self.imageId, self.theT) self.__noneorsame(self.imageId, self.x) self.__noneorsame(self.imageId, self.y) self.__noneorsame(self.imageId, self.w) self.__noneorsame(self.imageId, self.h) self.__noneorsame(self.imageId, self.bytes)
[docs] def descriptor(self, pos): class MaskDescription(tables.IsDescription): _v_pos = pos i = tables.Int64Col(pos=0) z = tables.Int32Col(pos=1) t = tables.Int32Col(pos=2) x = tables.Float64Col(pos=3) y = tables.Float64Col(pos=4) w = tables.Float64Col(pos=5) h = tables.Float64Col(pos=6) return MaskDescription()
[docs] def arrays(self): self.__sanitycheck() a = [ self.imageId, self.theZ, self.theT, self.x, self.y, self.w, self.h, ] return a
[docs] def getsize(self): self.__sanitycheck() if self.imageId is None: return None else: return len(self.imageId)
[docs] def setsize(self, size): if size is None: self.imageId = None self.theZ = None self.theT = None self.x = None self.y = None self.w = None self.h = None else: dts = self.dtypes() self.imageId = numpy.zeroes(size, dtype=dts[0]) self.theZ = numpy.zeroes(size, dtype=dts[1]) self.theT = numpy.zeroes(size, dtype=dts[2]) self.x = numpy.zeroes(size, dtype=dts[3]) self.y = numpy.zeroes(size, dtype=dts[4]) self.w = numpy.zeroes(size, dtype=dts[5]) self.h = numpy.zeroes(size, dtype=dts[6])
[docs] def readCoordinates(self, tbl, rowNumbers): self.__sanitycheck() # calls fromrows AbstractColumn.readCoordinates(self, tbl, rowNumbers) masks = self._getmasks(tbl) if rowNumbers is None or len(rowNumbers) == 0: rowNumbers = list(range(masks.nrows)) self.getbytes(masks, rowNumbers)
[docs] def read(self, tbl, start, stop): self.__sanitycheck() # calls fromrows AbstractColumn.read(self, tbl, start, stop) masks = self._getmasks(tbl) rowNumbers = list(range(start, stop)) self.getbytes(masks, rowNumbers)
[docs] def getbytes(self, masks, rowNumbers): self.bytes = [] for idx in rowNumbers: self.bytes.append(masks[idx].tolist())
[docs] def fromrows(self, rows, field_only=False): if not field_only: rows = rows[self.name] # WORKAROUND: # http://www.zeroc.com/forums/bug-reports/4165-icepy-can-not-handle-buffers-longs-i64.html#post20468 self.imageId = rows["i"].tolist() self.theZ = rows["z"].tolist() # ticket:1665 self.theT = rows["t"].tolist() # ticket:1665 self.x = rows["x"] self.y = rows["y"] self.w = rows["w"] self.h = rows["h"]
[docs] def append(self, tbl): self.__sanitycheck() masks = self._getmasks(tbl) for x in self.bytes: if isinstance(x, list): # This occurs primarily in testing. masks.append(numpy.array(x, dtype=tables.UInt8Atom())) else: masks.append(numpy.fromstring(x, count=len(x), dtype=tables.UInt8Atom()))
def _getmasks(self, tbl): n = tbl._v_name f = tbl._v_file p = tbl._v_parent # http://doc.zeroc.com/display/Ice/Basic+Types # Ice::Byte can be -128 to 127 OR 0 to 255, but using UInt8 for the # moment try: masks = getattr(p, "%s_masks" % n) except tables.NoSuchNodeError: if has_pytables3: masks = f.create_vlarray(p, "%s_masks" % n, tables.UInt8Atom()) else: masks = f.createVLArray(p, "%s_masks" % n, tables.UInt8Atom()) return masks
# Helpers # ======================================================================== # Conversion classes are for omero.model <--> ome.model only (no python)
[docs] class ObjectFactory(Ice.ObjectFactory): def __init__(self, cls, f): try: self.id = cls.ice_staticId() except Exception: pass self.f = f
[docs] def create(self, string): return self.f()
[docs] def destroy(self): pass
[docs] def register(self, ic): ic.addObjectFactory(self, self.id)
# Object factories # ========================================================================= ObjectFactories = { FileColumnI: ObjectFactory(FileColumnI, lambda: FileColumnI()), ImageColumnI: ObjectFactory(ImageColumnI, lambda: ImageColumnI()), RoiColumnI: ObjectFactory(RoiColumnI, lambda: RoiColumnI()), WellColumnI: ObjectFactory(WellColumnI, lambda: WellColumnI()), PlateColumnI: ObjectFactory(PlateColumnI, lambda: PlateColumnI()), DatasetColumnI: ObjectFactory(DatasetColumnI, lambda: DatasetColumnI()), BoolColumnI: ObjectFactory(BoolColumnI, lambda: BoolColumnI()), DoubleColumnI: ObjectFactory(DoubleColumnI, lambda: DoubleColumnI()), LongColumnI: ObjectFactory(LongColumnI, lambda: LongColumnI()), StringColumnI: ObjectFactory(StringColumnI, lambda: StringColumnI()), FloatArrayColumnI: ObjectFactory( FloatArrayColumnI, lambda: FloatArrayColumnI()), DoubleArrayColumnI: ObjectFactory( DoubleArrayColumnI, lambda: DoubleArrayColumnI()), LongArrayColumnI: ObjectFactory( LongArrayColumnI, lambda: LongArrayColumnI()), MaskColumnI: ObjectFactory(MaskColumnI, lambda: MaskColumnI()) }