Source code for util.populate_metadata

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Populate bulk metadata tables from delimited text files.
"""

#
#  Copyright (C) 2011-2014 University of Dundee. All rights reserved.
#
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#


import logging
import gzip
import sys
import csv
import re
import json
from getpass import getpass
from getopt import getopt, GetoptError

from collections import defaultdict
import warnings

import omero.clients
from omero import CmdError
from omero.rtypes import rlist, rstring, unwrap
from omero.model import DatasetAnnotationLinkI, DatasetI, FileAnnotationI
from omero.model import OriginalFileI, PlateI, PlateAnnotationLinkI, ScreenI
from omero.model import PlateAcquisitionI, WellI, WellSampleI, ImageI
from omero.model import ProjectAnnotationLinkI, ProjectI
from omero.model import ScreenAnnotationLinkI
from omero.model import MapAnnotationI, NamedValue
from omero.grid import ImageColumn, LongColumn, PlateColumn, RoiColumn
from omero.grid import StringColumn, WellColumn, DoubleColumn, BoolColumn
from omero.grid import DatasetColumn
from omero.util.metadata_mapannotations import (
    CanonicalMapAnnotation, MapAnnotationPrimaryKeyException,
    MapAnnotationManager)
from omero.util.metadata_utils import (
    KeyValueListPassThrough, KeyValueGroupList, NSBULKANNOTATIONSCONFIG)
from omero.util import pydict_text_io
from omero import client

from omero.util.populate_roi import ThreadPool


log = logging.getLogger("omero.util.populate_metadata")

warnings.warn(
    "This module is deprecated as of OMERO 5.4.8. Use the module"
    " in the omero-metadata project available from"
    " https://pypi.org/project/omero-metadata/ instead.",
    DeprecationWarning)


[docs] def usage(error): """Prints usage so that we don't have to. :)""" cmd = sys.argv[0] print("""%s Usage: %s [options] <target_object> <file> Runs metadata population code for a given object. Options: -s OMERO hostname to use [defaults to "localhost"] -p OMERO port to use [defaults to 4064] -u OMERO username to use -w OMERO password -k OMERO session key to use --columns Column configuration, Specify as comma separated list. Supported types: plate, well, image, roi, d (double), l (long), s (string), b (boolean) Supported Boolean True Values: "yes", "true", "t", "1". -i Dump measurement information and exit (no population) -d Print debug statements -c Use an alternative context (for expert users only) Examples: %s -s localhost -p 14064 -u bob --columns l,image,d,l Plate:6 metadata.csv Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % (error, cmd, cmd)) sys.exit(2)
# Global thread pool for use by workers thread_pool = None # Special column names we may add depending on the data type BOOLEAN_TRUE = ["yes", "true", "t", "1"] PLATE_NAME_COLUMN = 'Plate Name' WELL_NAME_COLUMN = 'Well Name' DATASET_NAME_COLUMN = 'Dataset Name' IMAGE_NAME_COLUMN = 'Image Name' COLUMN_TYPES = { 'plate': PlateColumn, 'well': WellColumn, 'image': ImageColumn, 'roi': RoiColumn, 'd': DoubleColumn, 'l': LongColumn, 's': StringColumn, 'b': BoolColumn } REGEX_HEADER_SPECIFIER = r'# header '
[docs] class MetadataError(Exception): """ Raised by the metadata parsing context when an error condition is reached. """ pass
[docs] class HeaderResolver(object): """ Header resolver for known header names which is responsible for creating the column set for the OMERO.tables instance. """ DEFAULT_COLUMN_SIZE = 1 dataset_keys = { 'image': ImageColumn, 'image_name': StringColumn, } project_keys = { 'dataset': DatasetColumn, 'dataset_name': StringColumn, 'image': ImageColumn, 'image_name': StringColumn, } plate_keys = dict({ 'well': WellColumn, 'field': ImageColumn, 'row': LongColumn, 'column': LongColumn, 'wellsample': ImageColumn, 'image': ImageColumn, }) screen_keys = dict({ 'plate': PlateColumn, }, **plate_keys) def __init__(self, target_object, headers, column_types=None): self.target_object = target_object self.headers = headers self.headers_as_lower = [v.lower() for v in self.headers] self.types = column_types
[docs] @staticmethod def is_row_column_types(row): if "# header" in row[0]: return True return False
[docs] @staticmethod def get_column_types(row): if "# header" not in row[0]: return None get_first_type = re.compile(REGEX_HEADER_SPECIFIER) column_types = [get_first_type.sub('', row[0])] for column in row[1:]: column_types.append(column) column_types = parse_column_types(column_types) return column_types
[docs] def create_columns(self): target_class = self.target_object.__class__ target_id = self.target_object.id.val if ScreenI is target_class: log.debug('Creating columns for Screen:%d' % target_id) return self.create_columns_screen() elif PlateI is target_class: log.debug('Creating columns for Plate:%d' % target_id) return self.create_columns_plate() elif DatasetI is target_class: log.debug('Creating columns for Dataset:%d' % target_id) return self.create_columns_dataset() elif ProjectI is target_class: log.debug('Creating columns for Project:%d' % target_id) return self.create_columns_project() raise MetadataError( 'Unsupported target object class: %s' % target_class)
[docs] def columns_sanity_check(self, columns): column_types = [column.__class__ for column in columns] if WellColumn in column_types and ImageColumn in column_types: log.debug(column_types) raise MetadataError( ('Well Column and Image Column cannot be resolved at ' 'the same time. Pick one.')) log.debug('Sanity check passed')
[docs] def create_columns_screen(self): return self._create_columns("screen")
[docs] def create_columns_plate(self): return self._create_columns("plate")
[docs] def create_columns_dataset(self): return self._create_columns("dataset")
[docs] def create_columns_project(self): return self._create_columns("project")
def _create_columns(self, klass): if self.types is not None and len(self.types) != len(self.headers): message = "Number of columns and column types not equal." raise MetadataError(message) columns = list() for i, header_as_lower in enumerate(self.headers_as_lower): name = self.headers[i] description = "" if "%%" in name: name, description = name.split("%%", 1) name = name.strip() # description is key=value. Convert to json if "=" in description: k, v = description.split("=", 1) k = k.strip() description = json.dumps({k: v.strip()}) # HDF5 does not allow / in column names name = name.replace('/', '\\') if self.types is not None and \ COLUMN_TYPES[self.types[i]] is StringColumn: column = COLUMN_TYPES[self.types[i]]( name, description, self.DEFAULT_COLUMN_SIZE, list()) elif self.types is not None: column = COLUMN_TYPES[self.types[i]](name, description, list()) else: try: keys = getattr(self, "%s_keys" % klass) column = keys[header_as_lower]( name, description, list()) except KeyError: column = StringColumn( name, description, self.DEFAULT_COLUMN_SIZE, list()) columns.append(column) append = [] for column in columns: if column.__class__ is PlateColumn: append.append(StringColumn(PLATE_NAME_COLUMN, '', self.DEFAULT_COLUMN_SIZE, list())) if column.__class__ is WellColumn: append.append(StringColumn(WELL_NAME_COLUMN, '', self.DEFAULT_COLUMN_SIZE, list())) if column.__class__ is ImageColumn: append.append(StringColumn(IMAGE_NAME_COLUMN, '', self.DEFAULT_COLUMN_SIZE, list())) # Currently hard-coded, but "if image name, then add image id" if column.name == IMAGE_NAME_COLUMN: append.append(ImageColumn("image", '', list())) columns.extend(append) self.columns_sanity_check(columns) return columns
[docs] class ValueResolver(object): """ Value resolver for column types which is responsible for filling up non-metadata columns with their OMERO data model identifiers. """ AS_ALPHA = [chr(v) for v in range(97, 122 + 1)] # a-z # Support more than 26 rows for v in range(97, 122 + 1): AS_ALPHA.append('a' + chr(v)) WELL_REGEX = re.compile(r'^([a-zA-Z]+)(\d+)$') def __init__(self, client, target_object): self.client = client self.target_object = target_object self.target_class = self.target_object.__class__ self.target_type = self.target_object.ice_staticId().split('::')[-1] self.target_id = self.target_object.id.val q = "select x.details.group.id from %s x where x.id = %d " % ( self.target_type, self.target_id ) result = self.client.sf.getQueryService().projection(q, None) self.target_group = result[0][0].val # The goal is to make this the only instance of # a if/elif/else block on the target_class. All # logic should be placed in a the concrete wrapper # implementation if PlateI is self.target_class: self.wrapper = PlateWrapper(self) elif DatasetI is self.target_class: self.wrapper = DatasetWrapper(self) elif ScreenI is self.target_class: self.wrapper = ScreenWrapper(self) elif ProjectI is self.target_class: self.wrapper = ProjectWrapper(self) else: raise MetadataError( 'Unsupported target object class: %s' % self.target_class)
[docs] def get_plate_name_by_id(self, plate): return self.wrapper.get_plate_name_by_id(plate)
[docs] def get_well_name(self, well_id, plate=None): well = self.wrapper.get_well_by_id(well_id, plate) row = well.row.val col = well.column.val row = self.AS_ALPHA[row] return '%s%d' % (row, col + 1)
[docs] def get_image_id_by_name(self, iname, dname=None): return self.wrapper.get_image_id_by_name(iname, dname)
[docs] def get_image_name_by_id(self, iid, pid=None): return self.wrapper.get_image_name_by_id(iid, pid)
[docs] def subselect(self, valuerows, names): return self.wrapper.subselect(valuerows, names)
[docs] def resolve(self, column, value, row): images_by_id = None column_class = column.__class__ column_as_lower = column.name.lower() if ImageColumn is column_class: if len(self.wrapper.images_by_id) == 1: images_by_id = list(self.wrapper.images_by_id.values())[0] else: for column, plate in row: if column.__class__ is PlateColumn: images_by_id = self.images_by_id[ self.plates_by_name[plate].id.val ] log.debug( "Got plate %i", self.plates_by_name[plate].id.val ) break if images_by_id is None: raise MetadataError( 'Unable to locate Plate column in Row: %r' % row ) try: return images_by_id[int(value)].id.val except KeyError: log.debug('Image Id: %i not found!' % (value)) return -1 return if WellColumn is column_class: return self.wrapper.resolve_well(column, row, value) if PlateColumn is column_class: return self.wrapper.resolve_plate(column, row, value) if column_as_lower in ('row', 'column') \ and column_class is LongColumn: try: # The value is not 0 offsetted return int(value) - 1 except ValueError: return int(self.AS_ALPHA.index(value.lower())) if StringColumn is column_class: return value if LongColumn is column_class: return int(value) if DoubleColumn is column_class: return float(value) if BoolColumn is column_class: return value.lower() in BOOLEAN_TRUE raise MetadataError('Unsupported column class: %s' % column_class)
[docs] class PlateData(object): """ Largely "mock" object which is intended to simulate the data returned by querying a Plate but without the overhead of storing all the Ice fields. """ def __init__(self, plate): self.id = plate.id self.name = plate.name self.wells = [] for well in plate.copyWells(): self.wells.append(WellData(well))
[docs] class WellData(object): """ Largely "mock" object which is intended to simulate the data returned by querying a Well but without the overhead of storing all the Ice fields. """ def __init__(self, well): self.id = well.id self.row = well.row self.column = well.column self.well_samples = [] for well_sample in well.copyWellSamples(): self.well_samples.append(WellSampleData(well_sample))
[docs] class WellSampleData(object): """ Largely "mock" object which is intended to simulate the data returned by querying a WellSample but without the overhead of storing all the Ice fields. """ def __init__(self, well_sample): self.id = well_sample.id self.image = ImageData(well_sample.getImage())
[docs] class ImageData(object): """ Largely "mock" object which is intended to simulate the data returned by querying a Image but without the overhead of storing all the Ice fields. """ def __init__(self, image): self.id = image.id self.name = image.name
[docs] class ValueWrapper(object): def __init__(self, value_resolver): self.resolver = value_resolver self.client = value_resolver.client self.target_object = value_resolver.target_object self.target_class = value_resolver.target_class
[docs] def subselect(self, rows, names): return rows
[docs] class SPWWrapper(ValueWrapper): def __init__(self, value_resolver): super(SPWWrapper, self).__init__(value_resolver) self.AS_ALPHA = value_resolver.AS_ALPHA self.WELL_REGEX = value_resolver.WELL_REGEX
[docs] def get_well_by_id(self, well_id, plate=None): raise Exception("to be implemented by subclasses")
[docs] def get_image_name_by_id(self, iid, pid=None): if not pid and len(self.images_by_id): pid = list(self.images_by_id.keys())[0] else: raise Exception("Cannot resolve image to plate") return self.images_by_id[pid][iid].name.val
[docs] def parse_plate(self, plate, wells_by_location, wells_by_id, images_by_id): """ Accepts PlateData instances """ # TODO: This should use the PlateNamingConvention. We're assuming rows # as alpha and columns as numeric. for well in plate.wells: wells_by_id[well.id.val] = well row = well.row.val # 0 offsetted is not what people use in reality column = str(well.column.val + 1) try: columns = wells_by_location[self.AS_ALPHA[row]] except KeyError: wells_by_location[self.AS_ALPHA[row]] = columns = dict() columns[column] = well for well_sample in well.well_samples: image = well_sample.image images_by_id[image.id.val] = image log.debug('Completed parsing plate: %s' % plate.name.val) for row in wells_by_location: log.debug('%s: %r' % (row, list(wells_by_location[row].keys())))
[docs] def resolve_well(self, column, row, value): m = self.WELL_REGEX.match(value) if m is None or len(m.groups()) != 2: msg = 'Cannot parse well identifier "%s" from row: %r' msg = msg % (value, [o[1] for o in row]) raise MetadataError(msg) plate_row = m.group(1).lower() plate_column = str(int(m.group(2))) wells_by_location = None if len(self.wells_by_location) == 1: wells_by_location = list(self.wells_by_location.values())[0] log.debug( 'Parsed "%s" row: %s column: %s' % ( value, plate_row, plate_column)) else: for column, plate in row: if column.__class__ is PlateColumn: wells_by_location = self.wells_by_location[plate] log.debug( 'Parsed "%s" row: %s column: %s plate: %s' % ( value, plate_row, plate_column, plate)) break if wells_by_location is None: raise MetadataError( 'Unable to locate Plate column in Row: %r' % row ) try: return wells_by_location[plate_row][plate_column].id.val except KeyError: log.debug('Row: %s Column: %s not found!' % ( plate_row, plate_column)) return -1
[docs] class ScreenWrapper(SPWWrapper): def __init__(self, value_resolver): super(ScreenWrapper, self).__init__(value_resolver) self._load()
[docs] def get_plate_name_by_id(self, plate): plate = self.plates_by_id[plate] return plate.name.val
[docs] def get_well_by_id(self, well_id, plate=None): wells = self.wells_by_id[plate] return wells[well_id]
[docs] def resolve_plate(self, column, row, value): try: return self.plates_by_name[value].id.val except KeyError: log.warn('Screen is missing plate: %s' % value) return Skip()
def _load(self): query_service = self.client.getSession().getQueryService() parameters = omero.sys.ParametersI() parameters.addId(self.target_object.id.val) log.debug('Loading Screen:%d' % self.target_object.id.val) self.target_object = query_service.findByQuery(( 'select s from Screen as s ' 'join fetch s.plateLinks as p_link ' 'join fetch p_link.child as p ' 'where s.id = :id'), parameters, {'omero.group': '-1'}) if self.target_object is None: raise MetadataError('Could not find target object!') self.target_name = unwrap(self.target_object.getName()) self.images_by_id = dict() self.wells_by_location = dict() self.wells_by_id = dict() self.plates_by_name = dict() self.plates_by_id = dict() images_by_id = dict() self.images_by_id[self.target_object.id.val] = images_by_id for plate in (l.child for l in self.target_object.copyPlateLinks()): parameters = omero.sys.ParametersI() parameters.addId(plate.id.val) plate = query_service.findByQuery(( 'select p from Plate p ' 'join fetch p.wells as w ' 'join fetch w.wellSamples as ws ' 'join fetch ws.image as i ' 'where p.id = :id'), parameters, {'omero.group': '-1'}) plate = PlateData(plate) self.plates_by_name[plate.name.val] = plate self.plates_by_id[plate.id.val] = plate wells_by_location = dict() wells_by_id = dict() self.wells_by_location[plate.name.val] = wells_by_location self.wells_by_id[plate.id.val] = wells_by_id self.parse_plate( plate, wells_by_location, wells_by_id, images_by_id )
[docs] class PlateWrapper(SPWWrapper): def __init__(self, value_resolver): super(PlateWrapper, self).__init__(value_resolver) self._load()
[docs] def get_well_by_id(self, well_id, plate=None): plate = self.target_object.id.val wells = self.wells_by_id[plate] return wells[well_id]
[docs] def subselect(self, rows, names): """ If we're processing a plate but the bulk-annotations file contains a plate column then select rows for this plate only """ for i, name in enumerate(names): if name.lower() == 'plate': valuerows = [row for row in rows if row[i] == self.value_resolver.target_name] log.debug( 'Selected %d/%d rows for plate "%s"', len(valuerows), len(rows), self.value_resolver.target_name) return valuerows return rows
def _load(self): query_service = self.client.getSession().getQueryService() parameters = omero.sys.ParametersI() parameters.addId(self.target_object.id.val) log.debug('Loading Plate:%d' % self.target_object.id.val) self.target_object = query_service.findByQuery(( 'select p from Plate as p ' 'join fetch p.wells as w ' 'join fetch w.wellSamples as ws ' 'join fetch ws.image as i ' 'where p.id = :id'), parameters, {'omero.group': '-1'}) if self.target_object is None: raise MetadataError('Could not find target object!') self.target_name = unwrap(self.target_object.getName()) self.wells_by_location = dict() self.wells_by_id = dict() wells_by_location = dict() wells_by_id = dict() self.images_by_id = dict() images_by_id = dict() self.wells_by_location[self.target_object.name.val] = wells_by_location self.wells_by_id[self.target_object.id.val] = wells_by_id self.images_by_id[self.target_object.id.val] = images_by_id self.parse_plate( PlateData(self.target_object), wells_by_location, wells_by_id, images_by_id )
[docs] class PDIWrapper(ValueWrapper):
[docs] def get_image_id_by_name(self, iname, dname=None): raise Exception("to be implemented by subclasses")
[docs] class DatasetWrapper(PDIWrapper): def __init__(self, value_resolver): super(DatasetWrapper, self).__init__(value_resolver) self.images_by_id = dict() self.images_by_name = dict() self._load()
[docs] def get_image_id_by_name(self, iname, dname=None): return self.images_by_name[iname]
def _load(self): query_service = self.client.getSession().getQueryService() parameters = omero.sys.ParametersI() parameters.addId(self.target_object.id.val) log.debug('Loading Dataset:%d' % self.target_object.id.val) parameters.page(0, 1) self.target_object = unwrap(query_service.findByQuery( 'select d from Dataset d where d.id = :id', parameters, {'omero.group': '-1'})) self.target_name = self.target_object.name.val data = list() while True: parameters.page(len(data), 1000) rv = unwrap(query_service.projection(( 'select distinct i.id, i.name from Dataset as d ' 'join d.imageLinks as l ' 'join l.child as i ' 'where d.id = :id order by i.id desc'), parameters, {'omero.group': '-1'})) if len(rv) == 0: break else: data.extend(rv) if not data: raise MetadataError('Could not find target object!') for iid, iname in data: self.images_by_id[iid] = iname if iname in self.images_by_name: raise Exception("Image named %s(id=%d) present. (id=%s)" % ( iname, self.images_by_name[iname], iid )) self.images_by_name[iname] = iid log.debug('Completed parsing dataset: %s' % self.target_name)
[docs] class ProjectWrapper(PDIWrapper): def __init__(self, value_resolver): super(ProjectWrapper, self).__init__(value_resolver) self.graph_by_id = defaultdict(lambda: dict()) self.graph_by_name = defaultdict(lambda: dict()) self._load()
[docs] def get_image_id_by_name(self, iname, dname=None): return self.graph_by_name[dname][iname][2]
def _load(self): query_service = self.client.getSession().getQueryService() parameters = omero.sys.ParametersI() parameters.addId(self.target_object.id.val) log.debug('Loading Project:%d' % self.target_object.id.val) parameters.page(0, 1) self.target_object = unwrap(query_service.findByQuery( 'select p from Project p where p.id = :id', parameters, {'omero.group': '-1'})) self.target_name = self.target_object.name.val data = list() while True: parameters.page(len(data), 1000) rv = unwrap(query_service.projection(( 'select distinct d.id, d.name, i.id, i.name ' 'from Project p ' 'join p.datasetLinks as pdl ' 'join pdl.child as d ' 'join d.imageLinks as l ' 'join l.child as i ' 'where p.id = :id order by i.id desc'), parameters, {'omero.group': '-1'})) if len(rv) == 0: break else: data.extend(rv) if not data: raise MetadataError('Could not find target object!') seen = dict() for row in data: did, dname, iid, iname = row if dname in seen and seen[dname] != did: raise Exception("Duplicate datasets: '%s' = %s, %s" % ( dname, seen[dname], did )) else: seen[dname] = did ikey = (did, iname) if ikey in seen and iid != seen[ikey]: raise Exception("Duplicate image: '%s' = %s, %s (Dataset:%s)" % (iname, seen[ikey], iid, did)) else: seen[ikey] = iid self.graph_by_id[did][iid] = row self.graph_by_name[dname][iname] = row log.debug('Completed parsing project: %s' % self.target_object.id.val)
[docs] class ParsingContext(object): """Generic parsing context for CSV files.""" def __init__(self, client, target_object, file=None, fileid=None, cfg=None, cfgid=None, attach=False, column_types=None, options=None): ''' This lines should be handled outside of the constructor: if not file: raise MetadataError('file required for %s' % type(self)) if fileid and not file: raise MetadataError('fileid not supported for %s' % type(self)) if cfg: raise MetadataError('cfg not supported for %s' % type(self)) if cfgid: raise MetadataError('cfgid not supported for %s' % type(self)) ''' self.client = client self.target_object = target_object self.file = file self.column_types = column_types self.value_resolver = ValueResolver(self.client, self.target_object)
[docs] def get_column_widths(self): widths = list() for column in self.columns: try: widths.append(column.size) except AttributeError: widths.append(None) return widths
[docs] def parse_from_handle(self, data): rows = list(csv.reader(data, delimiter=',')) first_row_is_types = HeaderResolver.is_row_column_types(rows[0]) header_index = 0 rows_index = 1 if first_row_is_types: header_index = 1 rows_index = 2 log.debug('Header: %r' % rows[header_index]) for h in rows[0]: if not h: raise Exception('Empty column header in CSV: %s' % rows[header_index]) if self.column_types is None and first_row_is_types: self.column_types = HeaderResolver.get_column_types(rows[0]) log.debug('Column types: %r' % self.column_types) self.header_resolver = HeaderResolver( self.target_object, rows[header_index], column_types=self.column_types) self.columns = self.header_resolver.create_columns() log.debug('Columns: %r' % self.columns) valuerows = rows[rows_index:] log.debug('Got %d rows', len(valuerows)) valuerows = self.value_resolver.subselect( valuerows, rows[header_index]) self.populate(valuerows) self.post_process() log.debug('Column widths: %r' % self.get_column_widths()) log.debug('Columns: %r' % [ (o.name, len(o.values)) for o in self.columns])
[docs] def parse(self): if self.file.endswith(".gz"): data = gzip.open(self.file, "rt") else: data = open(self.file, "rt") try: return self.parse_from_handle(data) finally: data.close()
[docs] def populate(self, rows): nrows = len(rows) for (r, row) in enumerate(rows): values = list() row = [(self.columns[i], value) for i, value in enumerate(row)] for column, original_value in row: log.debug('Row %d/%d Original value %s, %s', r + 1, nrows, original_value, column.name) value = self.value_resolver.resolve( column, original_value, row) if value.__class__ is Skip: break values.append(value) try: log.debug("Value's class: %s" % value.__class__) if isinstance(value, str): column.size = max(column.size, len(value)) except TypeError: log.error('Original value "%s" now "%s" of bad type!' % ( original_value, value)) raise if value.__class__ is not Skip: values.reverse() for column in self.columns: if not values: if isinstance(column, ImageColumn) or \ column.name in (PLATE_NAME_COLUMN, WELL_NAME_COLUMN, IMAGE_NAME_COLUMN): # Then assume that the values will be calculated # later based on another column. continue else: msg = 'Column %s has no values.' % column.name log.error(msg) raise IndexError(msg) else: column.values.append(values.pop())
[docs] def post_process(self): target_class = self.target_object.__class__ columns_by_name = dict() well_column = None well_name_column = None plate_name_column = None image_column = None image_name_column = None for column in self.columns: columns_by_name[column.name] = column if column.__class__ is PlateColumn: log.warn("PlateColumn is unimplemented") elif column.__class__ is WellColumn: well_column = column elif column.name == WELL_NAME_COLUMN: well_name_column = column elif column.name == PLATE_NAME_COLUMN: plate_name_column = column elif column.name == IMAGE_NAME_COLUMN: image_name_column = column elif column.__class__ is ImageColumn: image_column = column if well_name_column is None and plate_name_column is None \ and image_name_column is None: log.info('Nothing to do during post processing.') return sz = max([len(x.values) for x in self.columns]) for i in range(0, sz): if well_name_column is not None: v = '' try: well_id = well_column.values[i] plate = None if "Plate" in columns_by_name: # FIXME plate = columns_by_name["Plate"].values[i] v = self.value_resolver.get_well_name(well_id, plate) except KeyError: log.warn( 'Skipping table row %d! Missing well row or column ' 'for well name population!' % i, exc_info=True ) well_name_column.size = max(well_name_column.size, len(v)) well_name_column.values.append(v) else: log.info('Missing well name column, skipping.') if image_name_column is not None and ( DatasetI is target_class or ProjectI is target_class): iid = -1 try: iname = image_name_column.values[i] did = None if "Dataset Name" in columns_by_name: # FIXME did = columns_by_name["Dataset Name"].values[i] iid = self.value_resolver.get_image_id_by_name( iname, did) except KeyError: log.warn( "%s not found in image names" % iname) assert i == len(image_column.values) image_column.values.append(iid) image_name_column.size = max( image_name_column.size, len(iname)) elif image_name_column is not None and ( ScreenI is target_class or PlateI is target_class): iid = image_column.values[i] log.info("Checking image %s", iid) pid = None if 'Plate' in columns_by_name: pid = columns_by_name['Plate'].values[i] iname = self.value_resolver.get_image_name_by_id(iid, pid) image_name_column.values.append(iname) image_name_column.size = max( image_name_column.size, len(iname) ) else: log.info('Missing image name column, skipping.') if plate_name_column is not None: plate = columns_by_name['Plate'].values[i] # FIXME v = self.value_resolver.get_plate_name_by_id(plate) plate_name_column.size = max(plate_name_column.size, len(v)) plate_name_column.values.append(v) else: log.info('Missing plate name column, skipping.')
[docs] def write_to_omero(self, batch_size=1000, loops=10, ms=500): sf = self.client.getSession() group = self.value_resolver.target_group sr = sf.sharedResources() update_service = sf.getUpdateService() name = 'bulk_annotations' table = sr.newTable(1, name, {'omero.group': str(group)}) if table is None: raise MetadataError( "Unable to create table: %s" % name) original_file = table.getOriginalFile() log.info('Created new table OriginalFile:%d' % original_file.id.val) values = [] length = -1 for x in self.columns: if length < 0: length = len(x.values) else: assert length == len(x.values) values.append(x.values) x.values = None table.initialize(self.columns) log.info('Table initialized with %d columns.' % (len(self.columns))) i = 0 for pos in range(0, length, batch_size): i += 1 for idx, x in enumerate(values): self.columns[idx].values = x[pos:pos+batch_size] table.addData(self.columns) count = min(batch_size, length - pos) log.info('Added %s rows of column data (batch %s)', count, i) table.close() file_annotation = FileAnnotationI() file_annotation.ns = rstring( 'openmicroscopy.org/omero/bulk_annotations') file_annotation.description = rstring(name) file_annotation.file = OriginalFileI(original_file.id.val, False) link = self.create_annotation_link() link.parent = self.target_object link.child = file_annotation update_service.saveObject(link, {'omero.group': str(group)})
class _QueryContext(object): """ Helper class container query methods """ def __init__(self, client): self.client = client def _batch(self, i, sz=1000): """ Generate batches of size sz (by default 1000) from the input iterable `i`. """ i = list(i) # Copying list to handle sets and modifications for batch in (i[pos:pos + sz] for pos in range(0, len(i), sz)): yield batch def _grouped_batch(self, groups, sz=1000): """ In some cases groups of objects must be kept together. This method attempts to return up to sz objects without breaking up groups. If a single group is greater than sz the whole group is returned. Callers are responsible for checking the size of the returned batch. :param groups: an iterable of lists/tuples of objects :return: a list of the next batch of objects, if a single group has more than sz elements it will be returned as a single over-sized batch """ batch = [] for group in groups: if (len(batch) == 0) or (len(batch) + len(group) <= sz): batch.extend(group) else: toyield = batch batch = group yield toyield if batch: yield batch def projection(self, q, ids, nss=None, batch_size=None): """ Run a projection query designed to return scalars only :param q: The query to be projected, should contain either `:ids` or `:id` as a parameter :param: ids: Either a list of IDs to be passed as `:ids` parameter or a single scalar id to be passed as `:id` parameter in query :nss: Optional, Either a list of namespaces to be passed as `:nss` parameter or a single string to be passed as `:ns` parameter in query :batch_size: Optional batch_size (default: all) defining the number of IDs that will be queried at once. Methods that expect to have more than several thousand input IDs should consider an appropriate batch size. By default, however, no batch size is applied since this could change the interpretation of the query string (e.g. use of `distinct`). """ qs = self.client.getSession().getQueryService() params = omero.sys.ParametersI() try: nids = len(ids) single_id = None except TypeError: nids = 1 single_id = ids if isinstance(nss, str): params.addString("ns", nss) elif nss: params.map['nss'] = rlist(rstring(s) for s in nss) log.debug("Query: %s len(IDs): %d namespace(s): %s", q, nids, nss) if single_id is not None: params.addId(single_id) rss = unwrap(qs.projection(q, params)) elif batch_size is None: params.addIds(ids) rss = unwrap(qs.projection(q, params)) else: rss = [] for batch in self._batch(ids, sz=batch_size): params.addIds(batch) rss.extend(unwrap(qs.projection(q, params))) return [r for rs in rss for r in rs]
[docs] def get_config(session, cfg=None, cfgid=None): if cfgid: cfgdict = pydict_text_io.load( 'OriginalFile:%d' % cfgid, session=session) elif cfg: cfgdict = pydict_text_io.load(cfg) else: raise Exception("Configuration file required") default_cfg = cfgdict.get("defaults") column_cfgs = cfgdict.get("columns") advanced_cfgs = cfgdict.get("advanced", {}) if not default_cfg and not column_cfgs: raise Exception( "Configuration defaults and columns were both empty") return default_cfg, column_cfgs, advanced_cfgs
[docs] class BulkToMapAnnotationContext(_QueryContext): """ Processor for creating MapAnnotations from BulkAnnotations. """ def __init__(self, client, target_object, file=None, fileid=None, cfg=None, cfgid=None, attach=False, options=None): """ :param client: OMERO client object :param target_object: The object to be annotated :param file: Not supported :param fileid: The OriginalFile ID of the bulk-annotations table, default is to use the a bulk-annotation attached to target_object :param cfg: Path to a configuration file, ignored if cfgid given :param cfgid: OriginalFile ID of configuration file, either cfgid or cfg must be given """ super(BulkToMapAnnotationContext, self).__init__(client) if file and not fileid: raise MetadataError('file not supported for %s' % type(self)) # Reload object to get .details self.target_object = self.get_target(target_object) if fileid: self.ofileid = fileid else: self.ofileid = self.get_bulk_annotation_file() if not self.ofileid: raise MetadataError("Unable to find bulk-annotations file") self.default_cfg, self.column_cfgs, self.advanced_cfgs = \ get_config(self.client.getSession(), cfg=cfg, cfgid=cfgid) self.pkmap = {} self.mapannotations = MapAnnotationManager() self._init_namespace_primarykeys() self.options = {} if options: self.options = options def _init_namespace_primarykeys(self): try: pkcfg = self.advanced_cfgs['primary_group_keys'] except (TypeError, KeyError): return None for pk in pkcfg: try: gns = pk['namespace'] keys = pk['keys'] except KeyError: raise Exception('Invalid primary_group_keys: %s' % pk) if keys: if not isinstance(keys, list): raise Exception('keys must be a list') if gns in self.pkmap: raise Exception('Duplicate namespace in keys: %s' % gns) self.pkmap[gns] = keys self.mapannotations.add_from_namespace_query( self.client.getSession(), gns, keys) log.debug('Loaded ns:%s primary-keys:%s', gns, keys) def _get_ns_primary_keys(self, ns): return self.pkmap.get(ns, None) def _get_selected_namespaces(self): try: nss = self.options['ns'] if isinstance(nss, list): return nss return [nss] except KeyError: return None
[docs] def get_target(self, target_object): qs = self.client.getSession().getQueryService() return qs.find(target_object.ice_staticId().split('::')[-1], target_object.id.val)
[docs] def get_bulk_annotation_file(self): otype = self.target_object.ice_staticId().split('::')[-1] q = """SELECT child.file.id FROM %sAnnotationLink link WHERE parent.id=:id AND child.ns=:ns ORDER by id""" % otype r = self.projection(q, unwrap(self.target_object.getId()), omero.constants.namespaces.NSBULKANNOTATIONS) if r: return r[-1]
def _create_cmap_annotation(self, targets, rowkvs, ns): pks = self._get_ns_primary_keys(ns) ma = MapAnnotationI() ma.setNs(rstring(ns)) mv = [] for k, vs in rowkvs: if not isinstance(vs, (tuple, list)): vs = [vs] mv.extend(NamedValue(k, str(v)) for v in vs) if not mv: log.debug('Empty MapValue, ignoring: %s', rowkvs) return ma.setMapValue(mv) log.debug('Creating CanonicalMapAnnotation ns:%s pks:%s kvs:%s', ns, pks, rowkvs) cma = CanonicalMapAnnotation(ma, primary_keys=pks) for (otype, oid) in targets: cma.add_parent(otype, oid) return cma def _create_map_annotation_links(self, cma): """ Converts a `CanonicalMapAnnotation` object into OMERO `MapAnnotations` and `AnnotationLinks`. `AnnotationLinks` will have the parent and child set, where the child is a single `MapAnnotation` object shared amongst all links. If the number of `AnnotationLinks` is small callers may choose to save the links and `MapAnnotation` using `UpdateService` directly, ignoring the second element of the tuple. If the number of `AnnotationLinks` is large the caller should first save the `MapAnnotation`, call `setChild` on all `AnnotationLinks` to reference the saved `MapAnnotation`, and finally save all `AnnotationLinks`. This complexity is unfortunately required to avoid going over the Ice message size. :return: A tuple of (`AnnotationLinks`, `MapAnnotation`) """ links = [] ma = cma.get_mapann() for (otype, oid) in cma.get_parents(): link = getattr(omero.model, '%sAnnotationLinkI' % otype)() link.setParent(getattr(omero.model, '%sI' % otype)(oid, False)) link.setChild(ma) links.append(link) return links, ma def _save_annotation_links(self, links): """ Save `AnnotationLinks` including the child annotation in one go. See `_create_map_annotation_links` """ sf = self.client.getSession() group = str(self.target_object.details.group.id) update_service = sf.getUpdateService() arr = update_service.saveAndReturnArray( links, {'omero.group': str(group)}) return arr def _save_annotation_and_links(self, links, ann, batch_size): """ Save a single `Annotation`, followed by the `AnnotationLinks` to that Annotation and return the number of links saved. All `AnnotationLinks` must have `ann` as their child. Links will be saved in batches of `batch_size`. See `_create_map_annotation_links` """ sf = self.client.getSession() group = str(self.target_object.details.group.id) update_service = sf.getUpdateService() annobj = update_service.saveAndReturnObject(ann) annobj.unload() sz = 0 for batch in self._batch(links, sz=batch_size): for link in batch: link.setChild(annobj) update_service.saveArray( batch, {'omero.group': str(group)}) sz += len(batch) return sz
[docs] def parse(self): tableid = self.ofileid sr = self.client.getSession().sharedResources() log.debug('Loading table OriginalFile:%d', self.ofileid) table = sr.openTable(omero.model.OriginalFileI(tableid, False)) assert table try: return self.populate(table) finally: table.close()
def _get_additional_targets(self, target): iids = [] try: if self.advanced_cfgs['well_to_images'] and target[0] == 'Well': q = 'SELECT image.id FROM WellSample WHERE well.id=:id' iids = self.projection(q, target[1]) except (KeyError, TypeError): pass return [('Image', i) for i in iids]
[docs] def populate(self, table): def idcolumn_to_omeroclass(col): clsname = re.search(r'::(\w+)Column$', col.ice_staticId()).group(1) return str(clsname) try: ignore_missing_primary_key = self.advanced_cfgs[ 'ignore_missing_primary_key'] except (KeyError, TypeError): ignore_missing_primary_key = False nrows = table.getNumberOfRows() data = table.readCoordinates(list(range(nrows))) # Don't create annotations on higher-level objects # idcoltypes = set(HeaderResolver.screen_keys.values()) idcoltypes = set((ImageColumn, WellColumn)) idcols = [] for n in range(len(data.columns)): col = data.columns[n] if col.__class__ in idcoltypes: omeroclass = idcolumn_to_omeroclass(col) idcols.append((omeroclass, n)) headers = [c.name for c in data.columns] if self.default_cfg or self.column_cfgs: kvgl = KeyValueGroupList( headers, self.default_cfg, self.column_cfgs) trs = kvgl.get_transformers() else: trs = [KeyValueListPassThrough(headers)] selected_nss = self._get_selected_namespaces() for row in zip(*(c.values for c in data.columns)): targets = [] for omerotype, n in idcols: if row[n] > 0: # Be aware this has implications for client UIs, since # Wells and Images may be treated as one when it comes # to annotations obj = (omerotype, int(row[n])) targets.append(obj) targets.extend(self._get_additional_targets(obj)) else: log.warn("Invalid Id:%d found in row %s", row[n], row) if targets: for tr in trs: rowkvs = tr.transform(row) ns = tr.name if not ns: ns = omero.constants.namespaces.NSBULKANNOTATIONS if (selected_nss is not None) and (ns not in selected_nss): log.debug('Skipping namespace: %s', ns) continue try: cma = self._create_cmap_annotation(targets, rowkvs, ns) if cma: self.mapannotations.add(cma) log.debug('Added MapAnnotation: %s', cma) else: log.debug( 'Empty MapAnnotation: %s', rowkvs) except MapAnnotationPrimaryKeyException as e: c = '' if ignore_missing_primary_key: c = ' (Continuing)' log.error( 'Missing primary keys%s: %s %s ', c, e, rowkvs) if not ignore_missing_primary_key: raise
def _write_log(self, text): log.debug("BulkToMapAnnotation:write_to_omero - %s" % text)
[docs] def write_to_omero(self, batch_size=1000, loops=10, ms=500): i = 0 cur = 0 links = [] # This may be many-links-to-one-new-mapann so everything must # be kept together to avoid duplication of the mapann self._write_log("Start") cmas = self.mapannotations.get_map_annotations() self._write_log("found %s annotations" % len(cmas)) for cma in cmas: batch, ma = self._create_map_annotation_links(cma) self._write_log("found batch of size %s" % len(batch)) if len(batch) < batch_size: links.append(batch) cur += len(batch) if cur > 10 * batch_size: self._write_log("running batches. accumulated: %s" % cur) i += self._write_links(links, batch_size, i) links = [] cur = 0 else: self._write_log("running grouped_batch") sz = self._save_annotation_and_links(batch, ma, batch_size) i += sz log.info('Created/linked %d MapAnnotations (total %s)', sz, i) # Handle any remaining writes i += self._write_links(links, batch_size, i)
def _write_links(self, links, batch_size, i): count = 0 for batch in self._grouped_batch(links, sz=batch_size): self._write_log("batch size: %s" % len(batch)) arr = self._save_annotation_links(batch) count += len(arr) log.info('Created/linked %d MapAnnotations (total %s)', len(arr), i+count) return count
[docs] class DeleteMapAnnotationContext(_QueryContext): """ Processor for deleting MapAnnotations in the BulkAnnotations namespace on these types: Image WellSample Well PlateAcquisition Plate Screen """ def __init__(self, client, target_object, file=None, fileid=None, cfg=None, cfgid=None, attach=False, options=None): """ :param client: OMERO client object :param target_object: The object to be processed :param file, fileid: Ignored :param cfg, cfgid: Configuration file :param attach: Delete all attached config files (recursive, default False) """ super(DeleteMapAnnotationContext, self).__init__(client) self.target_object = target_object self.attach = attach if cfg or cfgid: self.default_cfg, self.column_cfgs, self.advanced_cfgs = \ get_config(self.client.getSession(), cfg=cfg, cfgid=cfgid) else: self.default_cfg = None self.column_cfgs = None self.advanced_cfgs = None self.options = {} if options: self.options = options
[docs] def parse(self): return self.populate()
def _get_annotations_for_deletion( self, objtype, objids, anntype, nss, getlink=False): r = [] if getlink: fetch = '' else: fetch = 'child.' if objids: q = ("SELECT %sid FROM %sAnnotationLink WHERE " "child.class=%s AND parent.id in (:ids) " "AND child.ns in (:nss)") r = self.projection(q % (fetch, objtype, anntype), objids, nss, batch_size=10000) log.debug("%s: %d %s(s)", objtype, len(set(r)), anntype) return r def _get_configured_namespaces(self): try: nss = self.options['ns'] if isinstance(nss, list): return nss return [nss] except KeyError: pass nss = set([ omero.constants.namespaces.NSBULKANNOTATIONS, NSBULKANNOTATIONSCONFIG, ]) if self.column_cfgs: for c in self.column_cfgs: try: ns = c['group']['namespace'] nss.add(ns) except KeyError: continue return list(nss)
[docs] def populate(self): # Hierarchy: Screen, Plate, {PlateAcquistion, Well}, WellSample, Image parentids = { "Screen": None, "Plate": None, "PlateAcquisition": None, "Well": None, "WellSample": None, "Image": None, "Dataset": None, "Project": None, } target = self.target_object ids = [unwrap(target.getId())] if isinstance(target, ScreenI): q = ("SELECT child.id FROM ScreenPlateLink " "WHERE parent.id in (:ids)") parentids["Screen"] = ids if parentids["Screen"]: parentids["Plate"] = self.projection(q, parentids["Screen"]) if isinstance(target, PlateI): parentids["Plate"] = ids if parentids["Plate"]: q = "SELECT id FROM PlateAcquisition WHERE plate.id IN (:ids)" parentids["PlateAcquisition"] = self.projection( q, parentids["Plate"]) q = "SELECT id FROM Well WHERE plate.id IN (:ids)" parentids["Well"] = self.projection(q, parentids["Plate"]) if isinstance(target, PlateAcquisitionI): parentids["PlateAcquisition"] = ids if parentids["PlateAcquisition"] and not isinstance(target, PlateI): # WellSamples are linked to PlateAcqs and Plates, so only get # if they haven't been obtained via a Plate # Also note that we do not get Wells if the parent is a # PlateAcquisition since this only refers to the fields in # the well q = "SELECT id FROM WellSample WHERE plateAcquisition.id IN (:ids)" parentids["WellSample"] = self.projection( q, parentids["PlateAcquisition"]) if isinstance(target, WellI): parentids["Well"] = ids if parentids["Well"]: q = "SELECT id FROM WellSample WHERE well.id IN (:ids)" parentids["WellSample"] = self.projection( q, parentids["Well"], batch_size=10000) if isinstance(target, WellSampleI): parentids["WellSample"] = ids if parentids["WellSample"]: q = "SELECT image.id FROM WellSample WHERE id IN (:ids)" parentids["Image"] = self.projection( q, parentids["WellSample"], batch_size=10000) if isinstance(target, ProjectI): parentids["Project"] = ids if parentids["Project"]: q = ("SELECT ds.id FROM ProjectDatasetLink link " "join link.parent prj " "join link.child as ds WHERE prj.id IN (:ids)") parentids["Dataset"] = self.projection(q, parentids["Project"]) if isinstance(target, DatasetI): parentids["Dataset"] = ids if parentids["Dataset"]: q = ("SELECT i.id FROM DatasetImageLink link " "join link.parent ds " "join link.child as i WHERE ds.id IN (:ids)") parentids["Image"] = self.projection(q, parentids["Dataset"]) if isinstance(target, ImageI): parentids["Image"] = ids # TODO: This should really include: # raise Exception("Unknown target: %s" % target.__class__.__name__) log.debug("Parent IDs: %s", ["%s:%s" % (k, v is not None and len(v) or "NA") for k, v in list(parentids.items())]) self.mapannids = dict() self.fileannids = set() not_annotatable = ('WellSample',) # Currently deleting AnnotationLinks should automatically delete # orphaned MapAnnotations: # https://github.com/openmicroscopy/openmicroscopy/pull/4907 # Note this may change in future: # https://trello.com/c/Gnoi9mTM/141-never-delete-orphaned-map-annotations nss = self._get_configured_namespaces() for objtype, objids in parentids.items(): if objtype in not_annotatable: continue r = self._get_annotations_for_deletion( objtype, objids, 'MapAnnotation', nss, getlink=True) if r: try: self.mapannids[objtype].update(r) except KeyError: self.mapannids[objtype] = set(r) log.info("Total MapAnnotationLinks in %s: %d", nss, sum(len(v) for v in list(self.mapannids.values()))) log.debug("MapAnnotationLinks in %s: %s", nss, self.mapannids) if self.attach and NSBULKANNOTATIONSCONFIG in nss: for objtype, objids in parentids.items(): if objtype in not_annotatable: continue r = self._get_annotations_for_deletion( objtype, objids, 'FileAnnotation', [NSBULKANNOTATIONSCONFIG]) self.fileannids.update(r) log.info("Total FileAnnotations in %s: %d", [NSBULKANNOTATIONSCONFIG], len(set(self.fileannids))) log.debug("FileAnnotations in %s: %s", [NSBULKANNOTATIONSCONFIG], self.fileannids)
[docs] def write_to_omero(self, batch_size=1000, loops=10, ms=500): for objtype, maids in self.mapannids.items(): for batch in self._batch(maids, sz=batch_size): self._write_to_omero_batch( {"%sAnnotationLink" % objtype: batch}, loops, ms) for batch in self._batch(self.fileannids, sz=batch_size): self._write_to_omero_batch({"FileAnnotation": batch}, loops, ms)
def _write_to_omero_batch(self, to_delete, loops=10, ms=500): delCmd = omero.cmd.Delete2(targetObjects=to_delete) try: callback = self.client.submit( delCmd, loops=loops, ms=ms, failontimeout=True) except CmdError as ce: log.error("Failed to delete: %s" % to_delete) raise Exception(ce.err) # At this point, we're sure that there's a response OR # an exception has been thrown (likely LockTimeout) rsp = callback.getResponse() try: deleted = rsp.deletedObjects for k, v in deleted.items(): log.info("Deleted: %s %d", k, len(v)) log.debug("Deleted: %s %s", k, v) except AttributeError: log.error("Delete failed: %s", rsp)
[docs] def parse_target_object(target_object): type, id = target_object.split(':') if 'Dataset' == type: return DatasetI(int(id), False) if 'Plate' == type: return PlateI(int(id), False) if 'Screen' == type: return ScreenI(int(id), False) raise ValueError('Unsupported target object: %s' % target_object)
[docs] def parse_column_types(column_type_list): column_types = [] for column_type in column_type_list: if column_type.lower() in COLUMN_TYPES: column_types.append(column_type.lower()) else: column_types = [] message = "\nColumn type '%s' unknown.\nChoose from following: " \ "%s" % (column_type, ",".join(list(COLUMN_TYPES.keys()))) raise MetadataError(message) return column_types
if __name__ == "__main__": try: options, args = getopt(sys.argv[1:], "s:p:u:w:k:c:id", ["columns="]) except GetoptError as xxx_todo_changeme: (msg, opt) = xxx_todo_changeme.args usage(msg) try: target_object, file = args target_object = parse_target_object(target_object) except ValueError: usage('Target object and file must be a specified!') username = None password = None hostname = 'localhost' port = 4064 # SSL info = False session_key = None logging_level = logging.INFO thread_count = 1 column_types = None context_class = ParsingContext for option, argument in options: if option == "-u": username = argument if option == "-w": password = argument if option == "-s": hostname = argument if option == "-p": port = int(argument) if option == "-i": info = True if option == "-k": session_key = argument if option == "-d": logging_level = logging.DEBUG if option == "-t": thread_count = int(argument) if option == "--columns": column_types = parse_column_types(argument.split(',')) if option == "-c": try: context_class = globals()[argument] except KeyError: usage("Invalid context class") if session_key is None and username is None: usage("Username must be specified!") if session_key is None and hostname is None: usage("Host name must be specified!") if session_key is None and password is None: password = getpass() logging.basicConfig(level=logging_level) client = client(hostname, port) client.setAgent("OMERO.populate_metadata") client.enableKeepAlive(60) try: if session_key is not None: client.joinSession(session_key) client.sf.detachOnDestroy() else: client.createSession(username, password) log.debug('Creating pool of %d threads' % thread_count) thread_pool = ThreadPool(thread_count) ctx = context_class( client, target_object, file, column_types=column_types) ctx.parse() if not info: ctx.write_to_omero() finally: pass client.closeSession()