#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Performs various performance metrics and reports on OMERO.importer log files.
"""
# Copyright (C) 2009 University of Dundee
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
import re
import sys
import mx.DateTime as DateTime
from mx.DateTime import DateTimeDelta
from getopt import getopt, GetoptError
[docs]
def usage(error):
"""Prints usage so that we don't have to. :)"""
cmd = sys.argv[0]
print("""%s
Usage: %s [options...] <importer_log_file>
Generate performance metrics from an OMERO.importer log file.
Options:
--series_report Print a CSV report for each import's series I/O
--help Display this help and exit
Examples:
%s importer.log
%s --series_report importer.log > series_report.csv
Report bugs to ome-devel@lists.openmicroscopy.org.uk""" % \
(error, cmd, cmd, cmd))
sys.exit(2)
[docs]
class ParsingError(Exception):
"""Raised whenever there is an error parsing a log file."""
pass
[docs]
class Import(object):
"""Stores context about a given import."""
def __init__(self, start, name):
self.start = start
self.end = None
self.name = name
self.setid_start = start
self.setid_end = None
self.post_process_start = None
self.post_process_end = None
self.save_to_db_start = None
self.save_to_db_end = None
self.overlays_start = None
self.thumbnailing_start = None
self.series = []
[docs]
class Series(object):
"""Stores context about a given series."""
def __init__(self, start):
self.start = start
self.end = None
self.planes = []
[docs]
class Plane(object):
"""Stores context about a given plane."""
def __init__(self, abs_date_time):
self.abs_date_time = abs_date_time
[docs]
class ImporterLog(object):
"""
Parses and stores context from an OMERO.importer log file. It also has
the capability of producing various reports.
"""
# Regular expression for matching log4j log lines
log_regex = re.compile(
r'^(?P<date_time>\S+\s+\S+)\s+(?P<ms_elapsed>\d+)\s+'
r'(?P<thread>\[.*?\])\s+(?P<level>\S+)\s+(?P<class>\S+)\s+-\s+'
r'(?P<message>.*)$')
# Regular expression for matching possible OMERO.importer status messages
status_regex = re.compile(r'^[A-Z_]*')
# Format string for matching log4j date/time strings
date_time_fmt = '%Y-%m-%d %H:%M:%S'
def __init__(self, log_file):
self.log_file = log_file
self.imports = []
self.parse()
self.last_import = None
self.last_series = None
[docs]
def parse(self):
"""Parses the specified log file."""
line_no = 1
for line in self.log_file:
match = self.log_regex.match(line)
if match:
self.handle_match(match)
line_no += 1
[docs]
def handle_match(self, match):
"""Handles cases where the log_regex is matched."""
message = match.group('message')
if not self.status_regex.match(message):
return
date_time = match.group('date_time')
date_time, ms = date_time.split(',')
date_time = DateTime.strptime(date_time, self.date_time_fmt)
ms = DateTimeDelta(0, 0, 0, int(ms) // 1000.0)
date_time = date_time + ms
if message.startswith('LOADING_IMAGE'):
name = message[message.find(':') + 2:]
self.last_import = Import(date_time, name)
self.imports.append(self.last_import)
elif not hasattr(self, 'last_import') or self.last_import is None:
return
elif message.startswith('LOADED_IMAGE'):
self.last_import.setid_end = date_time
elif message.startswith('BEGIN_POST_PROCESS'):
self.last_import.post_process_start = date_time
elif message.startswith('END_POST_PROCESS'):
self.last_import.post_process_end = date_time
elif message.startswith('BEGIN_SAVE_TO_DB'):
self.last_import.save_to_db_start = date_time
elif message.startswith('END_SAVE_TO_DB'):
self.last_import.save_to_db_end = date_time
elif message.startswith('IMPORT_OVERLAYS'):
self.last_import.overlays_start = date_time
elif message.startswith('IMPORT_THUMBNAILING'):
self.last_import.thumbnailing_start = date_time
elif message.startswith('IMPORT_DONE'):
self.last_import.end = date_time
self.last_import = None
elif message.startswith('DATASET_STORED'):
self.last_series = Series(date_time)
self.last_import.series.append(self.last_series)
elif message.startswith('DATA_STORED'):
self.last_import.series[-1].end = date_time
elif message.startswith('IMPORT_STEP'):
self.last_series.planes.append(Plane(date_time))
[docs]
def elapsed(self, start, end):
if start is not None and end is not None:
return str((end - start).seconds) + "sec"
return 'Unknown'
[docs]
def report(self):
"""
Prints a simple report to STDOUT stating timings for the overall
import and Bio-Formats setId().
"""
for import_n, i in enumerate(self.imports):
elapsed = self.elapsed(i.start, i.end)
print("Import(%s) %d start: %s end: %s elapsed: %s" % \
(i.name, import_n, i.start, i.end, elapsed))
elapsed = self.elapsed(i.setid_start, i.setid_end)
print("setId() start: %s end: %s elapsed: %s" % \
(i.setid_start, i.setid_end, elapsed))
elapsed = self.elapsed(i.post_process_start, i.post_process_end)
print("Post process start: %s end: %s elapsed: %s" % \
(i.post_process_start, i.post_process_end, elapsed))
elapsed = self.elapsed(i.save_to_db_start, i.save_to_db_end)
print("Save to DB start: %s end: %s elapsed: %s" % \
(i.save_to_db_start, i.save_to_db_end, elapsed))
if len(i.series) > 0:
elapsed = self.elapsed(i.series[0].start, i.series[-1].end)
print("Image I/O start: %s end: %s elapsed: %s" % \
(i.series[0].start, i.series[-1].end, elapsed))
elapsed = self.elapsed(i.overlays_start, i.thumbnailing_start)
print("Overlays start: %s end: %s elapsed: %s" % \
(i.overlays_start, i.thumbnailing_start, elapsed))
elapsed = self.elapsed(i.thumbnailing_start, i.end)
print("Thumbnailing start: %s end: %s elapsed: %s" % \
(i.thumbnailing_start, i.end, elapsed))
[docs]
def series_report_csv(self):
"""
Prints a CSV report to STDOUT with timings for the I/O operations
of each import's set of image series.
"""
print(','.join(['import', 'series', 'series_start', 'series_end',
'series_elapsed']))
for import_n, i in enumerate(self.imports):
for series_n, series in enumerate(i.series):
if series.start is None or series.end is None:
continue
elapsed = (series.end - series.start).seconds
values = [import_n, series_n, series.start, series.end,
elapsed * 1000]
print(','.join([str(v) for v in values]))
if __name__ == "__main__":
try:
options, args = getopt(sys.argv[1:], "", ['series_report', 'help'])
except GetoptError as xxx_todo_changeme:
(msg, opt) = xxx_todo_changeme.args
usage(msg)
try:
log_file, = args
except ValueError:
usage('Must specify at least one log file.')
log = ImporterLog(open(log_file))
do_default_report = True
for option, argument in options:
if option == '--help':
usage('')
if option == '--series_report':
do_default_report = False
log.series_report_csv()
if do_default_report:
log.report()