314 lines
11 KiB
Python
314 lines
11 KiB
Python
# Copyright 2016 The Chromium Authors
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
import os
|
|
import re
|
|
import shutil
|
|
import sys
|
|
import tempfile
|
|
from xml.etree import ElementTree
|
|
from collections import namedtuple
|
|
from typing import Dict
|
|
|
|
from devil.utils import cmd_helper
|
|
from pylib import constants
|
|
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'gyp'))
|
|
from util import build_utils
|
|
|
|
DEXDUMP_PATH = os.path.join(constants.ANDROID_SDK_TOOLS, 'dexdump')
|
|
|
|
|
|
# Annotations dict format:
|
|
# {
|
|
# 'empty-annotation-class-name': None,
|
|
# 'annotation-class-name': {
|
|
# 'fieldA': 'primitive-value',
|
|
# 'fieldB': [ 'array-item-1', 'array-item-2', ... ],
|
|
# 'fieldC': { # CURRENTLY UNSUPPORTED.
|
|
# /* Object value */
|
|
# 'field': 'primitive-value',
|
|
# 'field': [ 'array-item-1', 'array-item-2', ... ],
|
|
# 'field': { /* Object value */ }
|
|
# }
|
|
# }
|
|
# }
|
|
Annotations = namedtuple('Annotations',
|
|
['classAnnotations', 'methodsAnnotations'])
|
|
|
|
# Finds each space-separated "foo=..." (where ... can contain spaces).
|
|
_ANNOTATION_VALUE_MATCHER = re.compile(r'\w+=.*?(?:$|(?= \w+=))')
|
|
|
|
|
|
def Dump(apk_path):
|
|
"""Dumps class and method information from a APK into a dict via dexdump.
|
|
|
|
Args:
|
|
apk_path: An absolute path to an APK file to dump.
|
|
Returns:
|
|
A dict in the following format:
|
|
{
|
|
<package_name>: {
|
|
'classes': {
|
|
<class_name>: {
|
|
'methods': [<method_1>, <method_2>],
|
|
'superclass': <string>,
|
|
'is_abstract': <boolean>,
|
|
'annotations': <Annotations>
|
|
}
|
|
}
|
|
}
|
|
}
|
|
"""
|
|
try:
|
|
dexfile_dir = tempfile.mkdtemp()
|
|
parsed_dex_files = []
|
|
for dex_file in build_utils.ExtractAll(apk_path,
|
|
dexfile_dir,
|
|
pattern='*classes*.dex'):
|
|
output_xml = cmd_helper.GetCmdOutput(
|
|
[DEXDUMP_PATH, '-a', '-j', '-l', 'xml', dex_file])
|
|
# Dexdump doesn't escape its XML output very well; decode it as utf-8 with
|
|
# invalid sequences replaced, then remove forbidden characters and
|
|
# re-encode it (as etree expects a byte string as input so it can figure
|
|
# out the encoding itself from the XML declaration)
|
|
BAD_XML_CHARS = re.compile(
|
|
u'[\x00-\x08\x0b-\x0c\x0e-\x1f\x7f-\x84\x86-\x9f' +
|
|
u'\ud800-\udfff\ufdd0-\ufddf\ufffe-\uffff]')
|
|
|
|
# Line duplicated to avoid pylint redefined-variable-type error.
|
|
clean_xml = BAD_XML_CHARS.sub(u'\ufffd', output_xml)
|
|
|
|
# Constructors are referenced as "<init>" in our annotations
|
|
# which will result in in the ElementTree failing to parse
|
|
# our xml as it won't find a closing tag for this
|
|
clean_xml = clean_xml.replace('<init>', 'constructor')
|
|
|
|
annotations = _ParseAnnotations(clean_xml)
|
|
|
|
parsed_dex_files.append(
|
|
_ParseRootNode(ElementTree.fromstring(clean_xml.encode('utf-8')),
|
|
annotations))
|
|
return parsed_dex_files
|
|
finally:
|
|
shutil.rmtree(dexfile_dir)
|
|
|
|
|
|
def _ParseAnnotationValues(values_str):
|
|
if not values_str:
|
|
return None
|
|
ret = {}
|
|
for key_value in _ANNOTATION_VALUE_MATCHER.findall(values_str):
|
|
key, value_str = key_value.split('=', 1)
|
|
# TODO: support for dicts if ever needed.
|
|
if value_str.startswith('{ ') and value_str.endswith(' }'):
|
|
value = value_str[2:-2].split()
|
|
else:
|
|
value = value_str
|
|
ret[key] = value
|
|
return ret
|
|
|
|
|
|
def _ParseAnnotations(dexRaw: str) -> Dict[int, Annotations]:
|
|
""" Parse XML strings and return a list of Annotations mapped to
|
|
classes by index.
|
|
|
|
Annotations are written to the dex dump as human readable blocks of text
|
|
The only prescription is that they appear before the class in our xml file
|
|
They are not required to be nested within the package as our classes
|
|
It is simpler to parse for all the annotations and then associate them
|
|
back to the
|
|
classes
|
|
|
|
Example:
|
|
Class #12 annotations:
|
|
Annotations on class
|
|
VISIBILITY_RUNTIME Ldalvik/annotation/EnclosingClass; value=...
|
|
Annotations on method #512 'example'
|
|
VISIBILITY_SYSTEM Ldalvik/annotation/Signature; value=...
|
|
VISIBILITY_RUNTIME Landroidx/test/filters/SmallTest;
|
|
VISIBILITY_RUNTIME Lorg/chromium/base/test/util/Feature; value={ Cronet }
|
|
VISIBILITY_RUNTIME LFoo; key1={ A B } key2=4104 key3=null
|
|
"""
|
|
|
|
# We want to find the lines matching the annotations header pattern
|
|
# Eg: Class #12 annotations -> true
|
|
annotationsBlockMatcher = re.compile(u'^Class #.*annotations:$')
|
|
# We want to retrieve the index of the class
|
|
# Eg: Class #12 annotations -> 12
|
|
classIndexMatcher = re.compile(u'(?<=#)[0-9]*')
|
|
# We want to retrieve the method name from between the quotes
|
|
# of the annotations line
|
|
# Eg: Annotations on method #512 'example' -> example
|
|
methodMatcher = re.compile(u"(?<=')[^']*")
|
|
# We want to match everything after the last slash until before the semi colon
|
|
# Eg: Ldalvik/annotation/Signature; -> Signature
|
|
annotationMatcher = re.compile(u'([^/]+); ?(.*)?')
|
|
|
|
annotations = {}
|
|
currentAnnotationsForClass = None
|
|
currentAnnotationsBlock: Dict[str, None] = None
|
|
|
|
# This loop does four things
|
|
# 1. It looks for a line telling us we are describing annotations for
|
|
# a new class
|
|
# 2. It looks for a line telling us if the annotations we find will be
|
|
# for the class or for any of it's methods; we will keep reference to
|
|
# this
|
|
# 3. It adds the annotations to whatever we are holding reference to
|
|
# 4. It looks for a line to see if we should start looking for a
|
|
# new class again
|
|
for line in dexRaw.splitlines():
|
|
if currentAnnotationsForClass is None:
|
|
# Step 1
|
|
# We keep searching until we find an annotation descriptor
|
|
# This lets us know that we are storing annotations for a new class
|
|
if annotationsBlockMatcher.match(line):
|
|
currentClassIndex = int(classIndexMatcher.findall(line)[0])
|
|
currentAnnotationsForClass = Annotations(classAnnotations={},
|
|
methodsAnnotations={})
|
|
annotations[currentClassIndex] = currentAnnotationsForClass
|
|
else:
|
|
# Step 2
|
|
# If we find a descriptor indicating we are tracking annotations
|
|
# for the class or it's methods, we'll keep a reference of this
|
|
# block for when we start finding annotation references
|
|
if line.startswith(u'Annotations on class'):
|
|
currentAnnotationsBlock = currentAnnotationsForClass.classAnnotations
|
|
elif line.startswith(u'Annotations on method'):
|
|
method = methodMatcher.findall(line)[0]
|
|
currentAnnotationsBlock = {}
|
|
currentAnnotationsForClass.methodsAnnotations[
|
|
method] = currentAnnotationsBlock
|
|
|
|
# If we match against any other type of annotations
|
|
# we will ignore them
|
|
elif line.startswith(u'Annotations on'):
|
|
currentAnnotationsBlock = None
|
|
|
|
# Step 3
|
|
# We are only adding runtime annotations as those are the types
|
|
# that will affect if we should run tests or not (where this is
|
|
# being used)
|
|
elif currentAnnotationsBlock is not None and line.strip().startswith(
|
|
'VISIBILITY_RUNTIME'):
|
|
annotationName, annotationValuesStr = annotationMatcher.findall(line)[0]
|
|
annotationValues = _ParseAnnotationValues(annotationValuesStr)
|
|
|
|
# Our instrumentation tests expect a mapping of "Annotation: Value"
|
|
# We aren't using the value for anything and this would increase
|
|
# the complexity of this parser so just mapping these to None
|
|
currentAnnotationsBlock.update({annotationName: annotationValues})
|
|
|
|
# Step 4
|
|
# Empty lines indicate that the annotation descriptions are complete
|
|
# and we should look for new classes
|
|
elif not line.strip():
|
|
currentAnnotationsForClass = None
|
|
currentAnnotationsBlock = None
|
|
|
|
return annotations
|
|
|
|
|
|
def _ParseRootNode(root, annotations: Dict[int, Annotations]):
|
|
"""Parses the XML output of dexdump. This output is in the following format.
|
|
|
|
This is a subset of the information contained within dexdump output.
|
|
|
|
<api>
|
|
<package name="foo.bar">
|
|
<class name="Class" extends="foo.bar.SuperClass">
|
|
<field name="Field">
|
|
</field>
|
|
<constructor name="Method">
|
|
<parameter name="Param" type="int">
|
|
</parameter>
|
|
</constructor>
|
|
<method name="Method">
|
|
<parameter name="Param" type="int">
|
|
</parameter>
|
|
</method>
|
|
</class>
|
|
</package>
|
|
</api>
|
|
"""
|
|
results = {}
|
|
|
|
# Annotations are referenced by the class order
|
|
# To match them, we need to keep track of the class number and
|
|
# match it to the appropriate annotation at that stage
|
|
classCount = 0
|
|
|
|
for child in root:
|
|
if child.tag == 'package':
|
|
package_name = child.attrib['name']
|
|
parsed_node, classCount = _ParsePackageNode(child, classCount,
|
|
annotations)
|
|
if package_name in results:
|
|
results[package_name]['classes'].update(parsed_node['classes'])
|
|
else:
|
|
results[package_name] = parsed_node
|
|
return results
|
|
|
|
|
|
def _ParsePackageNode(package_node, classCount: int,
|
|
annotations: Dict[int, Annotations]):
|
|
"""Parses a <package> node from the dexdump xml output.
|
|
|
|
Returns:
|
|
A tuple in the format:
|
|
(classes: {
|
|
'classes': {
|
|
<class_1>: {
|
|
'methods': [<method_1>, <method_2>],
|
|
'superclass': <string>,
|
|
'is_abstract': <boolean>,
|
|
'annotations': <Annotations or None>
|
|
},
|
|
<class_2>: {
|
|
'methods': [<method_1>, <method_2>],
|
|
'superclass': <string>,
|
|
'is_abstract': <boolean>,
|
|
'annotations': <Annotations or None>
|
|
},
|
|
}
|
|
}, classCount: number)
|
|
"""
|
|
classes = {}
|
|
for child in package_node:
|
|
if child.tag == 'class':
|
|
classes[child.attrib['name']] = _ParseClassNode(child, classCount,
|
|
annotations)
|
|
classCount += 1
|
|
return ({'classes': classes}, classCount)
|
|
|
|
|
|
def _ParseClassNode(class_node, classIndex: int,
|
|
annotations: Dict[int, Annotations]):
|
|
"""Parses a <class> node from the dexdump xml output.
|
|
|
|
Returns:
|
|
A dict in the format:
|
|
{
|
|
'methods': [<method_1>, <method_2>],
|
|
'superclass': <string>,
|
|
'is_abstract': <boolean>
|
|
}
|
|
"""
|
|
methods = []
|
|
for child in class_node:
|
|
if child.tag == 'method' and child.attrib['visibility'] == 'public':
|
|
methods.append(child.attrib['name'])
|
|
return {
|
|
'methods':
|
|
methods,
|
|
'superclass':
|
|
class_node.attrib['extends'],
|
|
'is_abstract':
|
|
class_node.attrib.get('abstract') == 'true',
|
|
'annotations':
|
|
annotations.get(classIndex,
|
|
Annotations(classAnnotations={}, methodsAnnotations={}))
|
|
}
|