Source code for plugins.import

#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
# Copyright (C) 2009-2016 Glencoe Software, Inc. All Rights Reserved.
# Use is subject to license terms supplied in LICENSE.txt
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

"""
   Startup plugin for command-line importer.

"""

from io import BytesIO
import os
import csv
import sys
import shlex
import requests
import re
from zipfile import ZipFile


from omero.cli import BaseControl, CLI
import omero.java
from omero.util import get_omero_user_cache_dir
from omero.util.temp_files import gettempdir
from omero_ext.argparse import SUPPRESS
from omero_ext.path import path

START_CLASS = "ome.formats.importer.cli.CommandLineImporter"
TEST_CLASS = "ome.formats.test.util.TestEngine"

HELP = """Run the Java-based command-line importer

This is a Python wrapper around the Java importer. Login is handled by Python
OMERO.cli. To see more options, use "--javahelp".

Options marked with "**" are passed strictly to Java. If they interfere with
any of the Python arguments, you may need to end precede your arguments with a
"--".

Bulk imports:

Rather than passing one or more files to the import command, a single
dictionary-like file (e.g. yml or json) can be passed to the `--bulk`
argument. Most keys in the bulk file will be treated like additional
command-line arguments. Special keys include:

 * columns      A list of columns for parsing the value of path
 * continue     Like the "-c" changes error handling
 * dry_run      If true, print out additional arguments rather than run them.
                If another string other than false, use as a template for
                storing the import commands. (e.g. /tmp/%s.sh)
 * include      Relative path (from the bulk file) of a parent bulk file
 * path         A file which will be parsed line by line based on its file
                ending. Lines containing zero or more keys along with a
                single file to be imported. Options for formats include:
                  - .tsv and .csv files will be parsed by the existing library
                  - other files will be parsed with shlex
                  - unless no columns are specified, in which case each line
                    is treated as a file

"""
EXAMPLES = """
Examples:

  # Display help
  $ omero import -h
  # Import foo.tiff using current login
  $ omero import ~/Data/my_file.dv
  # Import foo.tiff using input credentials
  $ omero import -s localhost -u user -w password foo.tiff
  # Set Java debugging level to ALL
  $ omero import foo.tiff -- --debug=ALL
  # Display used files for importing foo.tiff
  $ omero import foo.tiff -f
  # Limit debugging output
  $ omero import -- --debug=ERROR foo.tiff

For additional information, see:
https://docs.openmicroscopy.org/latest/omero/users/cli/import.html
Report bugs at https://forum.image.sc/
"""
TESTHELP = """Run the Importer TestEngine suite (devs-only)"""
DEBUG_CHOICES = ["ALL", "DEBUG", "ERROR", "FATAL", "INFO", "TRACE", "WARN"]
OUTPUT_CHOICES = ["ids", "legacy", "yaml"]
SKIP_CHOICES = ['all', 'checksum', 'minmax', 'thumbnails', 'upgrade']
NO_ARG = object()

OMERO_JAVA_ZIP = (
    'https://downloads.openmicroscopy.org/omero/{version}/OMERO.java.zip'
)


[docs] class CommandArguments(object): def __init__(self, ctx, args): self.__ctx = ctx self.__args = args self.__accepts = set() self.__added = dict() self.__java_initial = list() self.__java_additional = list() self.__py_initial = list() self.__py_additional = list() # Python arguments self.__py_keys = ( "javahelp", "skip", "file", "errs", "logback", "port", "password", "group", "create", "func", "bulk", "prog", "user", "key", "path", "logprefix", "JAVA_DEBUG", "quiet", "server", "depth", "clientdir", "fetch_jars", "sudo") self.set_login_arguments(ctx, args) self.set_skip_arguments(args) for key in vars(args): self.__accepts.add(key) val = getattr(args, key) if key in self.__py_keys: # Place the Python elements on the CommandArguments # instance so that it behaves like `args` setattr(self, key, val) self.append_arg(self.__py_initial, key, val) elif not val: # If there's no value, do nothing pass else: self.append_arg(self.__java_initial, key, val)
[docs] def append_arg(self, cmd_list, key, val=NO_ARG): arg_list = self.build_arg_list(key, val) cmd_list.extend(arg_list)
[docs] def reset_arg(self, cmd_list, idx, key, val=NO_ARG): arg_list = self.build_arg_list(key, val) cmd_list[idx:idx+len(arg_list)] = arg_list
[docs] def build_arg_list(self, key, val=NO_ARG): arg_list = [] if len(key) == 1: arg_list.append("-"+key) if val != NO_ARG: if isinstance(val, str): arg_list.append(val) else: key = key.replace("_", "-") if val == NO_ARG: arg_list.append("--%s" % key) elif isinstance(val, str): arg_list.append( "--%s=%s" % (key, val)) else: arg_list.append("--%s" % key) return arg_list
[docs] def set_path(self, path): if not isinstance(path, list): self.__ctx.die(202, "Path is not a list") else: self.path = path
[docs] def java_args(self): rv = list() rv.extend(self.__java_initial) rv.extend(self.__java_additional) rv.extend(self.path) if self.JAVA_DEBUG: # Since "args.debug" is used by omero/cli.py itself, # uses of "--debug" *after* the `import` command are # handled by placing them in this special variable. rv.append("--debug=%s" % self.JAVA_DEBUG) return rv
[docs] def initial_args(self): rv = list() rv.extend(self.__py_initial) rv.extend(self.__java_initial) return rv
[docs] def added_args(self): rv = list() rv.extend(self.__py_additional) rv.extend(self.__java_additional) rv.extend(self.path) return rv
[docs] def accepts(self, key): return key in self.__accepts
[docs] def add(self, key, val=NO_ARG): idx = None if key in self.__added: idx = self.__added[key] if key in self.__py_keys: # First we check if this is a Python argument, in which # case it's set directly on the instance itself. This # may need to be later set elsewhere if multiple bulk # files are supported. setattr(self, key, val) where = self.__py_additional elif not self.accepts(key): self.__ctx.die(200, "Unknown argument: %s" % key) else: where = self.__java_additional if idx is None: idx = len(where) self.append_arg(where, key, val) self.__added[key] = idx else: self.reset_arg(where, idx, key, val)
[docs] def set_login_arguments(self, ctx, args): """Set the connection arguments""" if args.javahelp: self.__java_initial.append("-h") # Connection is required unless help arguments or -f is passed connection_required = ("-h" not in self.__java_initial and not args.f and not args.advanced_help) if connection_required: client = ctx.conn(args) host = client.getProperty("omero.host") port = client.getProperty("omero.port") session = client.getSessionId() self.__java_initial.extend(["-s", host]) self.__java_initial.extend(["-p", port]) self.__java_initial.extend(["-k", session])
[docs] def set_skip_arguments(self, args): """Set the arguments to skip steps during import""" if not args.skip: return self.set_skip_values(args.skip)
[docs] def set_skip_values(self, skip): """Set the arguments to skip steps during import""" if ('all' in skip or 'checksum' in skip): self.__java_initial.append("--checksum-algorithm=File-Size-64") if ('all' in skip or 'thumbnails' in skip): self.__java_initial.append("--no-thumbnails") if ('all' in skip or 'minmax' in skip): self.__java_initial.append("--no-stats-info") if ('all' in skip or 'upgrade' in skip): self.__java_initial.append("--no-upgrade-check")
[docs] def open_files(self, mode="w"): # Open file handles for stdout/stderr if applicable out = self.open_log(self.__args.file, self.__args.logprefix, mode=mode) err = self.open_log(self.__args.errs, self.__args.logprefix, mode=mode) return out, err
[docs] def open_log(self, file, prefix=None, mode="w"): if not file: return None if prefix: file = os.path.sep.join([prefix, file]) file = os.path.abspath(file) dir = os.path.dirname(file) if not os.path.exists(dir): os.makedirs(dir) return open(file, mode)
[docs] class ImportControl(BaseControl): COMMAND = [START_CLASS] def _configure(self, parser): parser.add_login_arguments() parser.add_argument( "--javahelp", "--java-help", action="store_true", help="Show the Java help text") # The following arguments are strictly used by Python # The "---" form is kept for backwards compatibility. py_group = parser.add_argument_group( 'Python arguments', 'Optional arguments which are used to configure import.') def add_python_argument(*args, **kwargs): py_group.add_argument(*args, **kwargs) for name, help in ( ("bulk", "Bulk YAML file for driving multiple imports"), ("logprefix", "Directory or file prefix for --file and --errs"), ( "file", "File for storing the standard output from the Java process" ), ( "errs", "File for storing the standard error from the Java process" ) ): add_python_argument("--%s" % name, nargs="?", help=help) add_python_argument("---%s" % name, nargs="?", help=SUPPRESS) add_python_argument( "--clientdir", type=str, help="Path to the directory containing the client JARs. " " Default: lib/client") add_python_argument( "--logback", type=str, help="Path to a logback xml file. " " Default: etc/logback-cli.xml") add_python_argument( "--fetch-jars", type=str, help="Download OMERO.java jars by version or URL, then exit") # The following arguments are strictly passed to Java name_group = parser.add_argument_group( 'Naming arguments', 'Optional arguments passed strictly to Java.') def add_java_name_argument(*args, **kwargs): name_group.add_argument(*args, **kwargs) add_java_name_argument( "-n", "--name", help="Image or plate name to use (**)", metavar="NAME") add_java_name_argument( "-x", "--description", help="Image or plate description to use (**)", metavar="DESCRIPTION") # Deprecated naming arguments add_java_name_argument( "--plate_name", help=SUPPRESS) add_java_name_argument( "--plate_description", help=SUPPRESS) # Feedback options feedback_group = parser.add_argument_group( 'Feedback arguments', 'Optional arguments passed strictly to Java allowing to report' ' errors to the OME team.') def add_feedback_argument(*args, **kwargs): feedback_group.add_argument(*args, **kwargs) add_feedback_argument( "--report", action="store_true", help="Report errors to the OME team (**)") add_feedback_argument( "--upload", action="store_true", help=("Upload broken files and log file (if any) with report." " Required --report (**)")) add_feedback_argument( "--logs", action="store_true", help=("Upload log file (if any) with report." " Required --report (**)")) add_feedback_argument( "--email", help="Email for reported errors. Required --report (**)", metavar="EMAIL") add_feedback_argument( "--qa-baseurl", help=SUPPRESS) # Annotation options annotation_group = parser.add_argument_group( 'Annotation arguments', 'Optional arguments passed strictly to Java allowing to annotate' ' imports.') def add_annotation_argument(*args, **kwargs): annotation_group.add_argument(*args, **kwargs) add_annotation_argument( "--annotation-ns", metavar="ANNOTATION_NS", help="Namespace to use for subsequent annotation (**)") add_annotation_argument( "--annotation-text", metavar="ANNOTATION_TEXT", help="Content for a text annotation (**)") add_annotation_argument( "--annotation-link", metavar="ANNOTATION_LINK", help="Comment annotation ID to link all images to (**)") add_annotation_argument( "--annotation_ns", metavar="ANNOTATION_NS", help=SUPPRESS) add_annotation_argument( "--annotation_text", metavar="ANNOTATION_TEXT", help=SUPPRESS) add_annotation_argument( "--annotation_link", metavar="ANNOTATION_LINK", help=SUPPRESS) java_group = parser.add_argument_group( 'Java arguments', 'Optional arguments passed strictly to Java.') def add_java_argument(*args, **kwargs): java_group.add_argument(*args, **kwargs) add_java_argument( "-f", action="store_true", help="Display the used files and exit (**)") add_java_argument( "-c", action="store_true", help="Continue importing after errors (**)") add_java_argument( "-l", "--readers", help="Use the list of readers rather than the default (**)", metavar="READER_FILE") add_java_argument( "-d", help="OMERO dataset ID to import image into (**)", metavar="DATASET_ID") add_java_argument( "-r", help="OMERO screen ID to import plate into (**)", metavar="SCREEN_ID") add_java_argument( "-T", "--target", help="OMERO target specification (**)", metavar="TARGET") add_java_argument( "--debug", choices=DEBUG_CHOICES, help="Turn debug logging on (**)", metavar="LEVEL", dest="JAVA_DEBUG") add_java_argument( "--output", choices=OUTPUT_CHOICES, help="Set an alternative output style", metavar="TYPE") add_java_argument( "--encrypted", choices=("true", "false"), help="Whether the import should use SSL or not", metavar="TYPE") # Arguments previously *following" `--` advjava_group = parser.add_argument_group( 'Advanced Java arguments', ( 'Optional arguments passed strictly to Java. ' 'For more information, see --advanced-help.')) def add_advjava_argument(*args, **kwargs): advjava_group.add_argument(*args, **kwargs) add_advjava_argument( "--advanced-help", action="store_true", help="Show the advanced help text") add_advjava_argument( "--transfer", nargs="?", metavar="TYPE", help="Transfer methods like in-place import") add_advjava_argument( "--exclude", nargs="?", metavar="TYPE", help="Exclusion filters for preventing re-import") add_advjava_argument( "--checksum-algorithm", nargs="?", metavar="TYPE", help="Alternative hashing mechanisms balancing speed & accuracy") add_advjava_argument( "--no-stats-info", action="store_true", help=SUPPRESS) add_advjava_argument( "--no-thumbnails", action="store_true", help=SUPPRESS) add_advjava_argument( "--no-upgrade-check", action="store_true", help=SUPPRESS) add_advjava_argument( "--parallel-upload", metavar="COUNT", help="Number of file upload threads to run at the same time") add_advjava_argument( "--parallel-fileset", metavar="COUNT", help="Number of fileset candidates to import at the same time") # Unsure on these. add_python_argument( "--depth", default=4, type=int, help="Number of directories to scan down for files") add_python_argument( "--skip", type=str, choices=SKIP_CHOICES, action='append', help="Optional step to skip during import") add_python_argument( "path", nargs="*", help="Path to be passed to the Java process") parser.set_defaults(func=self.importer) def _get_classpath_logback(self, args): lib_client = self.ctx.dir / "lib" / "client" auto_download = False if args.clientdir: client_dir = path(args.clientdir) elif lib_client.exists(): client_dir = lib_client else: auto_download = True omero_java_dir, omero_java_txt = self._userdir_jars() client_dir = omero_java_dir etc_dir = self.ctx.dir / "etc" if args.logback: xml_file = path(args.logback) else: xml_file = etc_dir / "logback-cli.xml" classpath = [] if client_dir and client_dir.exists(): classpath = [f.abspath() for f in client_dir.files("*.jar")] if auto_download: if classpath: self.ctx.err('Using {}'.format(omero_java_txt.text())) if not args.logback: xml_file = client_dir / "logback-cli.xml" else: if not classpath: self.ctx.die( 103, "No JAR files found under '%s'" % client_dir) logback = "-Dlogback.configurationFile=%s" % xml_file return classpath, logback
[docs] def importer(self, args): if args.fetch_jars: if args.path: self.ctx.err('WARNING: Ignoring extra arguments') self.download_omero_java(args.fetch_jars) return classpath, logback = self._get_classpath_logback(args) if not classpath: self.download_omero_java('latest') classpath, logback = self._get_classpath_logback(args) command_args = CommandArguments(self.ctx, args) xargs = [ logback, "-Djava.io.tmpdir=%s" % gettempdir(), "-Xmx1024M", "-cp", os.pathsep.join(classpath)] xargs.append("-Domero.import.depth=%s" % args.depth) if args.bulk and args.path: self.ctx.die(104, "When using bulk import, omit paths") elif args.bulk: self.bulk_import(command_args, xargs) else: self.do_import(command_args, xargs)
def _userdir_jars(self, parentonly=False): user_jars = get_omero_user_cache_dir() / 'jars' # Use this file instead of a symlink so it works on all platform omero_java_txt = user_jars / 'OMERO.java.txt' if parentonly: return user_jars, omero_java_txt omero_java_dir = None if omero_java_txt.exists(): omero_java_dir = omero_java_txt.text().strip() return user_jars / omero_java_dir / 'libs', omero_java_txt else: return None, omero_java_txt
[docs] def download_omero_java(self, version_or_uri): if re.match(r"^\w+://", version_or_uri): omero_java_zip = version_or_uri else: omero_java_zip = OMERO_JAVA_ZIP.format(version=version_or_uri) self.ctx.err("Downloading %s" % omero_java_zip) jars_dir, omero_java_txt = self._userdir_jars(parentonly=True) jars_dir.makedirs_p() with requests.get(omero_java_zip) as resp: with ZipFile(BytesIO(resp.content)) as zipfile: topdirs = set(f.filename.split("/")[0] for f in zipfile.filelist if f.is_dir()) if len(topdirs) != 1: self.ctx.die( 108, 'Expected one top directory in OMERO.java.zip: {}' .format(topdirs)) topdir = topdirs.pop() if os.path.isabs(topdir): self.ctx.die( 108, 'Unexpected absolute paths in OMERO.java.zip: {}' .format(topdir)) zipfile.extractall(jars_dir) omero_java_txt.write_text(topdir)
[docs] def do_import(self, command_args, xargs, mode="w"): out = err = None try: import_command = self.COMMAND + command_args.java_args() out, err = command_args.open_files(mode=mode) p = omero.java.popen( import_command, debug=False, xargs=xargs, stdout=out, stderr=err) self.ctx.rv = p.wait() finally: # Make sure file handles are closed if out: out.close() if err: err.close()
[docs] def bulk_import(self, command_args, xargs): try: from yaml import safe_load except ImportError: self.ctx.die(105, "ERROR: PyYAML is not installed") old_pwd = os.getcwd() try: # Walk the .yml graph looking for includes # and load them all so that the top parent # values can be overwritten. contents = list() bulkfile = command_args.bulk while bulkfile: bulkfile = os.path.abspath(bulkfile) parent = os.path.dirname(bulkfile) with open(bulkfile, "r") as f: data = safe_load(f) contents.append((bulkfile, parent, data)) bulkfile = data.get("include") os.chdir(parent) # TODO: included files are updated based on the including # file but other file paths aren't! bulk = dict() for bulkfile, parent, data in reversed(contents): bulk.update(data) os.chdir(parent) incr = 0 failed = 0 total = 0 for cont in self.parse_bulk(bulk, command_args): incr += 1 if command_args.dry_run: rv = ['"%s"' % x for x in command_args.added_args()] rv = " ".join(rv) if command_args.dry_run.lower() == "true": self.ctx.out(rv) else: with open(command_args.dry_run % incr, "w") as o: # FIXME: this assumes 'omero' print(sys.argv[0], "import", rv, file=o) else: if incr == 1: mode = "w" else: mode = "a" self.do_import(command_args, xargs, mode=mode) if self.ctx.rv: failed += 1 total += self.ctx.rv if cont: msg = "Import failed with error code: %s. Continuing" self.ctx.err(msg % self.ctx.rv) else: msg = "Import failed. Use -c to continue after errors" self.ctx.die(106, msg) # Fail if any import failed self.ctx.rv = total if failed: self.ctx.err("%x failed imports" % failed) finally: os.chdir(old_pwd)
[docs] def parse_bulk(self, bulk, command_args): # Known keys with special handling cont = False command_args.dry_run = False if "dry_run" in bulk: dry_run = str(bulk.pop("dry_run")) # Accept any non-false string since it might be a pattern if dry_run.lower() != "false": command_args.dry_run = dry_run if "continue" in bulk: cont = True c = bulk.pop("continue") if bool(c): command_args.add("c") if "skip" in bulk: command_args.set_skip_values(bulk.pop("skip")) if "path" not in bulk: # Required until @file format is implemented self.ctx.die(107, "No path specified") path = bulk.pop("path") cols = None if "columns" in bulk: cols = bulk.pop("columns") if "include" in bulk: bulk.pop("include") # Now parse all other keys for key in bulk: command_args.add(key, bulk[key]) # All properties are set, yield for each path # to be imported in turn. The value for `cont` # is yielded so that the caller knows whether # or not an error should be fatal. if not cols: # No parsing necessary function = self.parse_text else: function = self.parse_shlex if path.endswith(".tsv"): function = self.parse_tsv elif path.endswith(".csv"): function = self.parse_csv for parts in function(path): if not cols: command_args.set_path(parts) else: for idx, col in enumerate(cols): if col == "path": command_args.set_path([parts[idx]]) else: command_args.add(col, parts[idx]) yield cont
[docs] def parse_text(self, path, parse=False): with open(path, "r") as o: for line in o: line = line.strip() if parse: line = shlex.split(line) yield [line]
[docs] def parse_shlex(self, path): for line in self.parse_text(path, parse=True): yield line
[docs] def parse_tsv(self, path, delimiter="\t"): for line in self.parse_csv(path, delimiter): yield line
[docs] def parse_csv(self, path, delimiter=","): with open(path, "r") as data: for line in csv.reader(data, delimiter=delimiter): yield line
[docs] class TestEngine(ImportControl): COMMAND = [TEST_CLASS]
try: register("import", ImportControl, HELP, epilog=EXAMPLES) register("testengine", TestEngine, TESTHELP) except NameError: if __name__ == "__main__": cli = CLI() cli.register("import", ImportControl, HELP, epilog=EXAMPLES) cli.register("testengine", TestEngine, TESTHELP) cli.invoke(sys.argv[1:])