470 lines
16 KiB
Python
470 lines
16 KiB
Python
# Copyright 2013 The Chromium Authors
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
|
|
import difflib
|
|
import hashlib
|
|
import itertools
|
|
import json
|
|
import os
|
|
import sys
|
|
import zipfile
|
|
|
|
from util import build_utils
|
|
import action_helpers # build_utils adds //build to sys.path.
|
|
import print_python_deps
|
|
|
|
# When set and a difference is detected, a diff of what changed is printed.
|
|
PRINT_EXPLANATIONS = int(os.environ.get('PRINT_BUILD_EXPLANATIONS', 0))
|
|
|
|
# An escape hatch that causes all targets to be rebuilt.
|
|
_FORCE_REBUILD = int(os.environ.get('FORCE_REBUILD', 0))
|
|
|
|
|
|
def CallAndWriteDepfileIfStale(on_stale_md5,
|
|
options,
|
|
record_path=None,
|
|
input_paths=None,
|
|
input_strings=None,
|
|
output_paths=None,
|
|
force=False,
|
|
pass_changes=False,
|
|
track_subpaths_allowlist=None,
|
|
depfile_deps=None):
|
|
"""Wraps CallAndRecordIfStale() and writes a depfile if applicable.
|
|
|
|
Depfiles are automatically added to output_paths when present in the |options|
|
|
argument. They are then created after |on_stale_md5| is called.
|
|
|
|
By default, only python dependencies are added to the depfile. If there are
|
|
other input paths that are not captured by GN deps, then they should be listed
|
|
in depfile_deps. It's important to write paths to the depfile that are already
|
|
captured by GN deps since GN args can cause GN deps to change, and such
|
|
changes are not immediately reflected in depfiles (http://crbug.com/589311).
|
|
"""
|
|
if not output_paths:
|
|
raise Exception('At least one output_path must be specified.')
|
|
input_paths = list(input_paths or [])
|
|
input_strings = list(input_strings or [])
|
|
output_paths = list(output_paths or [])
|
|
|
|
input_paths += print_python_deps.ComputePythonDependencies()
|
|
|
|
CallAndRecordIfStale(
|
|
on_stale_md5,
|
|
record_path=record_path,
|
|
input_paths=input_paths,
|
|
input_strings=input_strings,
|
|
output_paths=output_paths,
|
|
force=force,
|
|
pass_changes=pass_changes,
|
|
track_subpaths_allowlist=track_subpaths_allowlist)
|
|
|
|
# Write depfile even when inputs have not changed to ensure build correctness
|
|
# on bots that build with & without patch, and the patch changes the depfile
|
|
# location.
|
|
if hasattr(options, 'depfile') and options.depfile:
|
|
action_helpers.write_depfile(options.depfile, output_paths[0], depfile_deps)
|
|
|
|
|
|
def CallAndRecordIfStale(function,
|
|
record_path=None,
|
|
input_paths=None,
|
|
input_strings=None,
|
|
output_paths=None,
|
|
force=False,
|
|
pass_changes=False,
|
|
track_subpaths_allowlist=None):
|
|
"""Calls function if outputs are stale.
|
|
|
|
Outputs are considered stale if:
|
|
- any output_paths are missing, or
|
|
- the contents of any file within input_paths has changed, or
|
|
- the contents of input_strings has changed.
|
|
|
|
To debug which files are out-of-date, set the environment variable:
|
|
PRINT_MD5_DIFFS=1
|
|
|
|
Args:
|
|
function: The function to call.
|
|
record_path: Path to record metadata.
|
|
Defaults to output_paths[0] + '.md5.stamp'
|
|
input_paths: List of paths to calcualte an md5 sum on.
|
|
input_strings: List of strings to record verbatim.
|
|
output_paths: List of output paths.
|
|
force: Whether to treat outputs as missing regardless of whether they
|
|
actually are.
|
|
pass_changes: Whether to pass a Changes instance to |function|.
|
|
track_subpaths_allowlist: Relevant only when pass_changes=True. List of .zip
|
|
files from |input_paths| to make subpath information available for.
|
|
"""
|
|
assert record_path or output_paths
|
|
input_paths = input_paths or []
|
|
input_strings = input_strings or []
|
|
output_paths = output_paths or []
|
|
record_path = record_path or output_paths[0] + '.md5.stamp'
|
|
|
|
assert record_path.endswith('.stamp'), (
|
|
'record paths must end in \'.stamp\' so that they are easy to find '
|
|
'and delete')
|
|
|
|
new_metadata = _Metadata(track_entries=pass_changes or PRINT_EXPLANATIONS)
|
|
new_metadata.AddStrings(input_strings)
|
|
|
|
zip_allowlist = set(track_subpaths_allowlist or [])
|
|
for path in input_paths:
|
|
# It's faster to md5 an entire zip file than it is to just locate & hash
|
|
# its central directory (which is what this used to do).
|
|
if path in zip_allowlist:
|
|
entries = _ExtractZipEntries(path)
|
|
new_metadata.AddZipFile(path, entries)
|
|
else:
|
|
new_metadata.AddFile(path, _ComputeTagForPath(path))
|
|
|
|
old_metadata = None
|
|
force = force or _FORCE_REBUILD
|
|
missing_outputs = [x for x in output_paths if force or not os.path.exists(x)]
|
|
too_new = []
|
|
# When outputs are missing, don't bother gathering change information.
|
|
if not missing_outputs and os.path.exists(record_path):
|
|
record_mtime = os.path.getmtime(record_path)
|
|
# Outputs newer than the change information must have been modified outside
|
|
# of the build, and should be considered stale.
|
|
too_new = [x for x in output_paths if os.path.getmtime(x) > record_mtime]
|
|
if not too_new:
|
|
with open(record_path, 'r') as jsonfile:
|
|
try:
|
|
old_metadata = _Metadata.FromFile(jsonfile)
|
|
except: # pylint: disable=bare-except
|
|
pass # Not yet using new file format.
|
|
|
|
changes = Changes(old_metadata, new_metadata, force, missing_outputs, too_new)
|
|
if not changes.HasChanges():
|
|
return
|
|
|
|
if PRINT_EXPLANATIONS:
|
|
print('=' * 80)
|
|
print('Target is stale: %s' % record_path)
|
|
print(changes.DescribeDifference())
|
|
print('=' * 80)
|
|
|
|
args = (changes,) if pass_changes else ()
|
|
function(*args)
|
|
|
|
with open(record_path, 'w') as f:
|
|
new_metadata.ToFile(f)
|
|
|
|
|
|
class Changes:
|
|
"""Provides and API for querying what changed between runs."""
|
|
|
|
def __init__(self, old_metadata, new_metadata, force, missing_outputs,
|
|
too_new):
|
|
self.old_metadata = old_metadata
|
|
self.new_metadata = new_metadata
|
|
self.force = force
|
|
self.missing_outputs = missing_outputs
|
|
self.too_new = too_new
|
|
|
|
def _GetOldTag(self, path, subpath=None):
|
|
return self.old_metadata and self.old_metadata.GetTag(path, subpath)
|
|
|
|
def HasChanges(self):
|
|
"""Returns whether any changes exist."""
|
|
return (self.HasStringChanges()
|
|
or self.old_metadata.FilesMd5() != self.new_metadata.FilesMd5())
|
|
|
|
def HasStringChanges(self):
|
|
"""Returns whether string metadata changed."""
|
|
return (self.force or not self.old_metadata
|
|
or self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5())
|
|
|
|
def AddedOrModifiedOnly(self):
|
|
"""Returns whether the only changes were from added or modified (sub)files.
|
|
|
|
No missing outputs, no removed paths/subpaths.
|
|
"""
|
|
if self.HasStringChanges():
|
|
return False
|
|
if any(self.IterRemovedPaths()):
|
|
return False
|
|
for path in self.IterModifiedPaths():
|
|
if any(self.IterRemovedSubpaths(path)):
|
|
return False
|
|
return True
|
|
|
|
def IterAllPaths(self):
|
|
"""Generator for paths."""
|
|
return self.new_metadata.IterPaths();
|
|
|
|
def IterAllSubpaths(self, path):
|
|
"""Generator for subpaths."""
|
|
return self.new_metadata.IterSubpaths(path);
|
|
|
|
def IterAddedPaths(self):
|
|
"""Generator for paths that were added."""
|
|
for path in self.new_metadata.IterPaths():
|
|
if self._GetOldTag(path) is None:
|
|
yield path
|
|
|
|
def IterAddedSubpaths(self, path):
|
|
"""Generator for paths that were added within the given zip file."""
|
|
for subpath in self.new_metadata.IterSubpaths(path):
|
|
if self._GetOldTag(path, subpath) is None:
|
|
yield subpath
|
|
|
|
def IterRemovedPaths(self):
|
|
"""Generator for paths that were removed."""
|
|
if self.old_metadata:
|
|
for path in self.old_metadata.IterPaths():
|
|
if self.new_metadata.GetTag(path) is None:
|
|
yield path
|
|
|
|
def IterRemovedSubpaths(self, path):
|
|
"""Generator for paths that were removed within the given zip file."""
|
|
if self.old_metadata:
|
|
for subpath in self.old_metadata.IterSubpaths(path):
|
|
if self.new_metadata.GetTag(path, subpath) is None:
|
|
yield subpath
|
|
|
|
def IterModifiedPaths(self):
|
|
"""Generator for paths whose contents have changed."""
|
|
for path in self.new_metadata.IterPaths():
|
|
old_tag = self._GetOldTag(path)
|
|
new_tag = self.new_metadata.GetTag(path)
|
|
if old_tag is not None and old_tag != new_tag:
|
|
yield path
|
|
|
|
def IterModifiedSubpaths(self, path):
|
|
"""Generator for paths within a zip file whose contents have changed."""
|
|
for subpath in self.new_metadata.IterSubpaths(path):
|
|
old_tag = self._GetOldTag(path, subpath)
|
|
new_tag = self.new_metadata.GetTag(path, subpath)
|
|
if old_tag is not None and old_tag != new_tag:
|
|
yield subpath
|
|
|
|
def IterChangedPaths(self):
|
|
"""Generator for all changed paths (added/removed/modified)."""
|
|
return itertools.chain(self.IterRemovedPaths(),
|
|
self.IterModifiedPaths(),
|
|
self.IterAddedPaths())
|
|
|
|
def IterChangedSubpaths(self, path):
|
|
"""Generator for paths within a zip that were added/removed/modified."""
|
|
return itertools.chain(self.IterRemovedSubpaths(path),
|
|
self.IterModifiedSubpaths(path),
|
|
self.IterAddedSubpaths(path))
|
|
|
|
def DescribeDifference(self):
|
|
"""Returns a human-readable description of what changed."""
|
|
if self.force:
|
|
return 'force=True'
|
|
if self.missing_outputs:
|
|
return 'Outputs do not exist:\n ' + '\n '.join(self.missing_outputs)
|
|
if self.too_new:
|
|
return 'Outputs newer than stamp file:\n ' + '\n '.join(self.too_new)
|
|
if self.old_metadata is None:
|
|
return 'Previous stamp file not found.'
|
|
|
|
if self.old_metadata.StringsMd5() != self.new_metadata.StringsMd5():
|
|
ndiff = difflib.ndiff(self.old_metadata.GetStrings(),
|
|
self.new_metadata.GetStrings())
|
|
changed = [s for s in ndiff if not s.startswith(' ')]
|
|
return 'Input strings changed:\n ' + '\n '.join(changed)
|
|
|
|
if self.old_metadata.FilesMd5() == self.new_metadata.FilesMd5():
|
|
return "There's no difference."
|
|
|
|
lines = []
|
|
lines.extend('Added: ' + p for p in self.IterAddedPaths())
|
|
lines.extend('Removed: ' + p for p in self.IterRemovedPaths())
|
|
for path in self.IterModifiedPaths():
|
|
lines.append('Modified: ' + path)
|
|
lines.extend(' -> Subpath added: ' + p
|
|
for p in self.IterAddedSubpaths(path))
|
|
lines.extend(' -> Subpath removed: ' + p
|
|
for p in self.IterRemovedSubpaths(path))
|
|
lines.extend(' -> Subpath modified: ' + p
|
|
for p in self.IterModifiedSubpaths(path))
|
|
if lines:
|
|
return 'Input files changed:\n ' + '\n '.join(lines)
|
|
return 'I have no idea what changed (there is a bug).'
|
|
|
|
|
|
class _Metadata:
|
|
"""Data model for tracking change metadata.
|
|
|
|
Args:
|
|
track_entries: Enables per-file change tracking. Slower, but required for
|
|
Changes functionality.
|
|
"""
|
|
# Schema:
|
|
# {
|
|
# "files-md5": "VALUE",
|
|
# "strings-md5": "VALUE",
|
|
# "input-files": [
|
|
# {
|
|
# "path": "path.jar",
|
|
# "tag": "{MD5 of entries}",
|
|
# "entries": [
|
|
# { "path": "org/chromium/base/Foo.class", "tag": "{CRC32}" }, ...
|
|
# ]
|
|
# }, {
|
|
# "path": "path.txt",
|
|
# "tag": "{MD5}",
|
|
# }
|
|
# ],
|
|
# "input-strings": ["a", "b", ...],
|
|
# }
|
|
def __init__(self, track_entries=False):
|
|
self._track_entries = track_entries
|
|
self._files_md5 = None
|
|
self._strings_md5 = None
|
|
self._files = []
|
|
self._strings = []
|
|
# Map of (path, subpath) -> entry. Created upon first call to _GetEntry().
|
|
self._file_map = None
|
|
|
|
@classmethod
|
|
def FromFile(cls, fileobj):
|
|
"""Returns a _Metadata initialized from a file object."""
|
|
ret = cls()
|
|
obj = json.load(fileobj)
|
|
ret._files_md5 = obj['files-md5']
|
|
ret._strings_md5 = obj['strings-md5']
|
|
ret._files = obj.get('input-files', [])
|
|
ret._strings = obj.get('input-strings', [])
|
|
return ret
|
|
|
|
def ToFile(self, fileobj):
|
|
"""Serializes metadata to the given file object."""
|
|
obj = {
|
|
'files-md5': self.FilesMd5(),
|
|
'strings-md5': self.StringsMd5(),
|
|
}
|
|
if self._track_entries:
|
|
obj['input-files'] = sorted(self._files, key=lambda e: e['path'])
|
|
obj['input-strings'] = self._strings
|
|
|
|
json.dump(obj, fileobj, indent=2)
|
|
|
|
def _AssertNotQueried(self):
|
|
assert self._files_md5 is None
|
|
assert self._strings_md5 is None
|
|
assert self._file_map is None
|
|
|
|
def AddStrings(self, values):
|
|
self._AssertNotQueried()
|
|
self._strings.extend(str(v) for v in values)
|
|
|
|
def AddFile(self, path, tag):
|
|
"""Adds metadata for a non-zip file.
|
|
|
|
Args:
|
|
path: Path to the file.
|
|
tag: A short string representative of the file contents.
|
|
"""
|
|
self._AssertNotQueried()
|
|
self._files.append({
|
|
'path': path,
|
|
'tag': tag,
|
|
})
|
|
|
|
def AddZipFile(self, path, entries):
|
|
"""Adds metadata for a zip file.
|
|
|
|
Args:
|
|
path: Path to the file.
|
|
entries: List of (subpath, tag) tuples for entries within the zip.
|
|
"""
|
|
self._AssertNotQueried()
|
|
tag = _ComputeInlineMd5(itertools.chain((e[0] for e in entries),
|
|
(e[1] for e in entries)))
|
|
self._files.append({
|
|
'path': path,
|
|
'tag': tag,
|
|
'entries': [{"path": e[0], "tag": e[1]} for e in entries],
|
|
})
|
|
|
|
def GetStrings(self):
|
|
"""Returns the list of input strings."""
|
|
return self._strings
|
|
|
|
def FilesMd5(self):
|
|
"""Lazily computes and returns the aggregate md5 of input files."""
|
|
if self._files_md5 is None:
|
|
# Omit paths from md5 since temporary files have random names.
|
|
self._files_md5 = _ComputeInlineMd5(
|
|
self.GetTag(p) for p in sorted(self.IterPaths()))
|
|
return self._files_md5
|
|
|
|
def StringsMd5(self):
|
|
"""Lazily computes and returns the aggregate md5 of input strings."""
|
|
if self._strings_md5 is None:
|
|
self._strings_md5 = _ComputeInlineMd5(self._strings)
|
|
return self._strings_md5
|
|
|
|
def _GetEntry(self, path, subpath=None):
|
|
"""Returns the JSON entry for the given path / subpath."""
|
|
if self._file_map is None:
|
|
self._file_map = {}
|
|
for entry in self._files:
|
|
self._file_map[(entry['path'], None)] = entry
|
|
for subentry in entry.get('entries', ()):
|
|
self._file_map[(entry['path'], subentry['path'])] = subentry
|
|
return self._file_map.get((path, subpath))
|
|
|
|
def GetTag(self, path, subpath=None):
|
|
"""Returns the tag for the given path / subpath."""
|
|
ret = self._GetEntry(path, subpath)
|
|
return ret and ret['tag']
|
|
|
|
def IterPaths(self):
|
|
"""Returns a generator for all top-level paths."""
|
|
return (e['path'] for e in self._files)
|
|
|
|
def IterSubpaths(self, path):
|
|
"""Returns a generator for all subpaths in the given zip.
|
|
|
|
If the given path is not a zip file or doesn't exist, returns an empty
|
|
iterable.
|
|
"""
|
|
outer_entry = self._GetEntry(path)
|
|
if not outer_entry:
|
|
return ()
|
|
subentries = outer_entry.get('entries', [])
|
|
return (entry['path'] for entry in subentries)
|
|
|
|
|
|
def _ComputeTagForPath(path):
|
|
stat = os.stat(path)
|
|
if stat.st_size > 1 * 1024 * 1024:
|
|
# Fallback to mtime for large files so that md5_check does not take too long
|
|
# to run.
|
|
return stat.st_mtime
|
|
md5 = hashlib.md5()
|
|
with open(path, 'rb') as f:
|
|
md5.update(f.read())
|
|
return md5.hexdigest()
|
|
|
|
|
|
def _ComputeInlineMd5(iterable):
|
|
"""Computes the md5 of the concatenated parameters."""
|
|
md5 = hashlib.md5()
|
|
for item in iterable:
|
|
md5.update(str(item).encode('ascii'))
|
|
return md5.hexdigest()
|
|
|
|
|
|
def _ExtractZipEntries(path):
|
|
"""Returns a list of (path, CRC32) of all files within |path|."""
|
|
entries = []
|
|
with zipfile.ZipFile(path) as zip_file:
|
|
for zip_info in zip_file.infolist():
|
|
# Skip directories and empty files.
|
|
if zip_info.CRC:
|
|
entries.append(
|
|
(zip_info.filename, zip_info.CRC + zip_info.compress_type))
|
|
return entries
|