402 lines
12 KiB
Python
Executable File
402 lines
12 KiB
Python
Executable File
#!/usr/bin/python
|
|
"""Given a regtest result tree, prints an HTML summary to a file.
|
|
|
|
See HTML skeleton in tests/regtest.html.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
|
|
SUMMARY_ROW = """\
|
|
<tfoot style="font-weight: bold; text-align: right">
|
|
<tr>
|
|
<td>
|
|
%(name)s
|
|
</td>
|
|
|
|
<!-- input params -->
|
|
<td></td>
|
|
<td></td>
|
|
<td></td>
|
|
<td></td>
|
|
|
|
<!-- RAPPOR params -->
|
|
<td></td>
|
|
<td></td>
|
|
<td></td>
|
|
<td></td>
|
|
<td></td>
|
|
<td></td>
|
|
|
|
<!-- MAP params -->
|
|
<td></td>
|
|
<td></td>
|
|
|
|
<!-- Result metrics -->
|
|
<td></td>
|
|
<td></td>
|
|
<td>%(mean_fpr)s</td>
|
|
<td>%(mean_fnr)s</td>
|
|
<td>%(mean_tv)s</td>
|
|
<td>%(mean_am)s</td>
|
|
<td>%(mean_time)s</td>
|
|
</tr>
|
|
</tfoot>
|
|
"""
|
|
|
|
# Navigation and links to plot.
|
|
DETAILS = """\
|
|
<p style="text-align: right">
|
|
<a href="#top">Up</a>
|
|
</p>
|
|
|
|
<a id="%(anchor)s"></a>
|
|
|
|
<p style="text-align: center">
|
|
<img src="%(instance_dir)s/dist.png"/>
|
|
</p>
|
|
|
|
<p>
|
|
<a href="%(instance_dir)s">%(name)s files</a>
|
|
</p>
|
|
"""
|
|
|
|
|
|
def FormatFloat(x, percent):
|
|
"""Formats a floating-point number."""
|
|
if percent:
|
|
return '{:.1f}%'.format(x * 100.0)
|
|
else:
|
|
return '{:.3f}'.format(x)
|
|
|
|
|
|
def FormatMeanWithSem(m_std_error, percent=False):
|
|
"""Formats an estimate with standard error."""
|
|
if m_std_error is None:
|
|
return ''
|
|
m, std_error = m_std_error
|
|
if std_error is None:
|
|
return FormatFloat(m, percent)
|
|
else:
|
|
return '{}±{}'.format(
|
|
FormatFloat(m, percent),
|
|
FormatFloat(std_error, percent))
|
|
|
|
|
|
def Mean(l):
|
|
"""Computes the mean (average) for a list of numbers."""
|
|
if l:
|
|
return float(sum(l)) / len(l)
|
|
else:
|
|
return None
|
|
|
|
|
|
def SampleVar(l):
|
|
"""Computes the sample variance for a list of numbers."""
|
|
if len(l) > 1:
|
|
mean = Mean(l)
|
|
var = sum([(x - mean) ** 2 for x in l]) / (len(l) - 1)
|
|
return var
|
|
else:
|
|
return None
|
|
|
|
|
|
def StandardErrorEstimate(l):
|
|
"""Returns the standard error estimate for a list of numbers.
|
|
|
|
For a singleton the standard error is assumed to be 10% of its value.
|
|
"""
|
|
if len(l) > 1:
|
|
return (SampleVar(l) / len(l)) ** .5
|
|
elif l:
|
|
return l[0] / 10.0
|
|
else:
|
|
return None
|
|
|
|
|
|
def MeanOfMeans(dict_of_lists):
|
|
"""Returns the average of averages with the standard error of the estimate.
|
|
"""
|
|
means = [Mean(dict_of_lists[key]) for key in dict_of_lists
|
|
if dict_of_lists[key]]
|
|
if means:
|
|
# Compute variances of the estimate for each sublist.
|
|
se = [StandardErrorEstimate(dict_of_lists[key]) ** 2 for key
|
|
in dict_of_lists if dict_of_lists[key]]
|
|
return (Mean(means), # Mean over all sublists
|
|
sum(se) ** .5 / len(se)) # Standard deviation of the mean
|
|
else:
|
|
return None
|
|
|
|
|
|
def ParseSpecFile(spec_filename):
|
|
"""Parses the spec (parameters) file.
|
|
|
|
Returns:
|
|
An integer and a string. The integer is the number of bogus candidates
|
|
and the string is parameters in the HTML format.
|
|
"""
|
|
with open(spec_filename) as s:
|
|
spec_row = s.readline().split()
|
|
|
|
# Second to last column is 'num_additional' -- the number of bogus
|
|
# candidates added
|
|
num_additional = int(spec_row[-2])
|
|
|
|
spec_in_html = ' '.join('<td>%s</td>' % cell for cell in spec_row[1:])
|
|
|
|
return num_additional, spec_in_html
|
|
|
|
|
|
def ExtractTime(log_filename):
|
|
"""Extracts the elapsed time information from the log file.
|
|
|
|
Returns:
|
|
Elapsed time (in seconds) or None in case of failure.
|
|
"""
|
|
if os.path.isfile(log_filename):
|
|
with open(log_filename) as log:
|
|
log_str = log.read()
|
|
# Matching a line output by analyze.R.
|
|
match = re.search(r'Inference took ([0-9.]+) seconds', log_str)
|
|
if match:
|
|
return float(match.group(1))
|
|
return None
|
|
|
|
|
|
def ParseMetrics(metrics_file, log_file, num_additional):
|
|
"""Processes the metrics file.
|
|
|
|
Args:
|
|
metrics_file: name of the metrics file
|
|
log_file: name of the log.txt file
|
|
num_additional: A number of bogus candidates added to the candidate list.
|
|
|
|
Returns a pair:
|
|
- A dictionary of metrics (some can be []).
|
|
- An HTML-formatted portion of the report row.
|
|
"""
|
|
|
|
if not os.path.isfile(metrics_file):
|
|
metrics_row_str = ['', '', '', '', '', '']
|
|
metrics_row_dict = {}
|
|
else:
|
|
with open(metrics_file) as m:
|
|
m.readline()
|
|
metrics_row = m.readline().split(',')
|
|
|
|
(num_actual, num_rappor, num_false_pos, num_false_neg, total_variation,
|
|
allocated_mass) = metrics_row
|
|
|
|
num_actual = int(num_actual)
|
|
num_rappor = int(num_rappor)
|
|
|
|
num_false_pos = int(num_false_pos)
|
|
num_false_neg = int(num_false_neg)
|
|
|
|
total_variation = float(total_variation)
|
|
allocated_mass = float(allocated_mass)
|
|
|
|
# e.g. if there are 20 additional candidates added, and 1 false positive,
|
|
# the false positive rate is 5%.
|
|
fp_rate = float(num_false_pos) / num_additional if num_additional else 0
|
|
# e.g. if there are 100 strings in the true input, and 80 strings
|
|
# detected by RAPPOR, then we have 20 false negatives, and a false
|
|
# negative rate of 20%.
|
|
fn_rate = float(num_false_neg) / num_actual
|
|
|
|
metrics_row_str = [
|
|
str(num_actual),
|
|
str(num_rappor),
|
|
'%.1f%% (%d)' % (fp_rate * 100, num_false_pos) if num_additional
|
|
else '',
|
|
'%.1f%% (%d)' % (fn_rate * 100, num_false_neg),
|
|
'%.3f' % total_variation,
|
|
'%.3f' % allocated_mass,
|
|
]
|
|
|
|
metrics_row_dict = {
|
|
'tv': [total_variation],
|
|
'fpr': [fp_rate] if num_additional else [],
|
|
'fnr': [fn_rate],
|
|
'am': [allocated_mass],
|
|
}
|
|
|
|
elapsed_time = ExtractTime(log_file)
|
|
if elapsed_time is not None:
|
|
metrics_row_str = metrics_row_str + ['%.2f' % elapsed_time]
|
|
metrics_row_dict['time'] = [elapsed_time]
|
|
|
|
# return metrics formatted as HTML table entries
|
|
return (metrics_row_dict,
|
|
' '.join('<td>%s</td>' % cell for cell in metrics_row_str))
|
|
|
|
|
|
def FormatCell1(test_case, test_instance, metrics_file, log_file, plot_file,
|
|
link_to_plots):
|
|
"""Outputs an HTML table entry for the first cell of the row.
|
|
|
|
The row is filled if the metrics file exist. The first cell contains a link
|
|
that for short tables points to a plot file inline, for large tables to an
|
|
external file.
|
|
|
|
If the metrics file is missing, the link points to the log file (if one
|
|
exists)
|
|
"""
|
|
relpath_report = '{}/{}_report'.format(test_case, test_instance)
|
|
if os.path.isfile(metrics_file):
|
|
external_file = plot_file
|
|
if link_to_plots:
|
|
link = '#{}_{}'.format(test_case, test_instance) # anchor
|
|
else:
|
|
link = os.path.join(relpath_report, 'dist.png')
|
|
else: # no results likely due to an error, puts a link to the log file
|
|
external_file = log_file
|
|
link = os.path.join(relpath_report, 'log.txt')
|
|
|
|
if os.path.isfile(external_file):
|
|
return '<td><a href="{}">{}</a></td>'.format(link, test_case)
|
|
else: # if no file to link to
|
|
return '<td>{}</td>'.format(test_case)
|
|
|
|
|
|
def FormatSummaryRow(metrics_lists):
|
|
"""Outputs an HTML-formatted summary row."""
|
|
means_with_sem = {} # SEM - standard error of the mean
|
|
|
|
for key in metrics_lists:
|
|
means_with_sem[key] = MeanOfMeans(metrics_lists[key])
|
|
# If none of the lists is longer than one element, drop the SEM component.
|
|
if means_with_sem[key] and max([len(l) for l in metrics_lists[key]]) < 2:
|
|
means_with_sem[key] = [means_with_sem[key][0], None]
|
|
|
|
summary = {
|
|
'name': 'Means',
|
|
'mean_fpr': FormatMeanWithSem(means_with_sem['fpr'], percent=True),
|
|
'mean_fnr': FormatMeanWithSem(means_with_sem['fnr'], percent=True),
|
|
'mean_tv': FormatMeanWithSem(means_with_sem['tv'], percent=True),
|
|
'mean_am': FormatMeanWithSem(means_with_sem['am'], percent=True),
|
|
'mean_time': FormatMeanWithSem(means_with_sem['time']),
|
|
}
|
|
return SUMMARY_ROW % summary
|
|
|
|
|
|
def FormatPlots(base_dir, test_instances):
|
|
"""Outputs HTML-formatted plots."""
|
|
result = ''
|
|
for instance in test_instances:
|
|
# A test instance is identified by the test name and the test run.
|
|
test_case, test_instance, _ = instance.split(' ')
|
|
instance_dir = test_case + '/' + test_instance + '_report'
|
|
if os.path.isfile(os.path.join(base_dir, instance_dir, 'dist.png')):
|
|
result += DETAILS % {'anchor': test_case + '_' + test_instance,
|
|
'name': '{} (instance {})'.format(test_case,
|
|
test_instance),
|
|
'instance_dir': instance_dir}
|
|
return result
|
|
|
|
|
|
def main(argv):
|
|
base_dir = argv[1]
|
|
output_file = open(argv[2], 'w')
|
|
|
|
# This file has the test case names, in the order that they should be
|
|
# displayed.
|
|
instances_file = os.path.join(base_dir, 'test-instances.txt')
|
|
if not os.path.isfile(instances_file):
|
|
raise RuntimeError('{} is missing'.format(instances_file))
|
|
|
|
with open(instances_file) as f:
|
|
test_instances = [line.strip() for line in f]
|
|
|
|
# Metrics are assembled into a dictionary of dictionaries. The top-level
|
|
# key is the metric name ('tv', 'fpr', etc.), the second level key is
|
|
# the test case. These keys reference a list of floats, which can be empty.
|
|
metrics = {
|
|
'tv': {}, # total_variation for all test cases
|
|
'fpr': {}, # dictionary of false positive rates
|
|
'fnr': {}, # dictionary of false negative rates
|
|
'am': {}, # dictionary of total allocated masses
|
|
'time': {}, # dictionary of total elapsed time measurements
|
|
}
|
|
|
|
# If there are too many tests, the plots are not included in the results
|
|
# file. Instead, rows' names are links to the corresponding .png files.
|
|
include_plots = len(test_instances) < 20
|
|
|
|
instances_succeeded = 0
|
|
instances_failed = 0
|
|
instances_running = 0
|
|
|
|
for instance in test_instances:
|
|
# A test instance is idenfied by the test name and the test run.
|
|
test_case, test_instance, _ = instance.split(' ')
|
|
|
|
spec_file = os.path.join(base_dir, test_case, 'spec.txt')
|
|
if not os.path.isfile(spec_file):
|
|
raise RuntimeError('{} is missing'.format(spec_file))
|
|
|
|
num_additional, spec_html = ParseSpecFile(spec_file)
|
|
metrics_html = '' # will be filled in later on, if metrics exist
|
|
|
|
report_dir = os.path.join(base_dir, test_case, test_instance + '_report')
|
|
|
|
metrics_file = os.path.join(report_dir, 'metrics.csv')
|
|
log_file = os.path.join(report_dir, 'log.txt')
|
|
plot_file = os.path.join(report_dir, 'dist.png')
|
|
|
|
cell1_html = FormatCell1(test_case, test_instance, metrics_file, log_file,
|
|
plot_file, include_plots)
|
|
|
|
# ParseMetrics outputs an HTML table row and also updates lists
|
|
metrics_dict, metrics_html = ParseMetrics(metrics_file, log_file,
|
|
num_additional)
|
|
|
|
# Update the metrics structure. Initialize dictionaries if necessary.
|
|
for m in metrics:
|
|
if m in metrics_dict:
|
|
if not test_case in metrics[m]:
|
|
metrics[m][test_case] = metrics_dict[m]
|
|
else:
|
|
metrics[m][test_case] += metrics_dict[m]
|
|
|
|
print >>output_file, '<tr>{}{}{}</tr>'.format(cell1_html,
|
|
spec_html, metrics_html)
|
|
|
|
# Update counters
|
|
if 'tv' in metrics_dict:
|
|
instances_succeeded += 1
|
|
else:
|
|
if 'time' in metrics_dict:
|
|
instances_failed += 1
|
|
else:
|
|
if os.path.isfile(log_file):
|
|
instances_running += 1
|
|
|
|
print >>output_file, FormatSummaryRow(metrics)
|
|
|
|
print >>output_file, '</tbody>'
|
|
print >>output_file, '</table>'
|
|
print >>output_file, '<p style="padding-bottom: 3em"></p>' # vertical space
|
|
|
|
# Plot links.
|
|
if include_plots:
|
|
print >>output_file, FormatPlots(base_dir, test_instances)
|
|
else:
|
|
print >>output_file, ('<p>Too many tests to include plots. '
|
|
'Click links within rows for details.</p>')
|
|
|
|
print ('Instances'
|
|
' succeeded: {} failed: {} running: {} total: {}'.
|
|
format(instances_succeeded, instances_failed, instances_running,
|
|
len(test_instances)))
|
|
|
|
if __name__ == '__main__':
|
|
try:
|
|
main(sys.argv)
|
|
except RuntimeError, e:
|
|
print >>sys.stderr, 'FATAL: %s' % e
|
|
sys.exit(1)
|