apkcrawler: Store click map.
Each time the crawler clicks on an element that goes to a different
view, store this relationship for both the preceding and resulting
views.
This resulted in a little bit of a refactoring, where I moved
some shared code into separate methods.
Change-Id: I52729352ad725f9c7cfe03694cb75ec9cf4cee9c
diff --git a/apkcrawler/crawlui.py b/apkcrawler/crawlui.py
index 08dea1b..75bc8a9 100644
--- a/apkcrawler/crawlui.py
+++ b/apkcrawler/crawlui.py
@@ -1,7 +1,6 @@
# Copyright 2016 The Vanadium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style
# license that can be found in the LICENSE file.
-
"""A module for installing and crawling the UI of Android application."""
import copy
@@ -12,7 +11,6 @@
from view import View
-
ADB_PATH = None
MAX_HEIGHT = 0
@@ -28,9 +26,13 @@
# activities cannot have spaces, we ensure that no activity will be named this.
EXITED_APP = 'exited app'
+# How many times we should try pressing the back button to return to the app
+# before giving up.
+NUM_BACK_PRESSES = 3
+
def extract_between(text, sub1, sub2, nth=1):
- """Extract a substring from text between two given substrings."""
+ """Extracts a substring from text between two given substrings."""
# Credit to
# https://www.daniweb.com/programming/software-development/code/446964/extract-a-string-between-2-substrings-python-
@@ -41,13 +43,13 @@
def set_adb_path():
- """Define the ADB path based on operating system."""
+ """Defines the ADB path based on operating system."""
try:
global ADB_PATH
# For machines with multiple installations of adb, use the last listed
# version of adb. If this doesn't work for your setup, modify to taste.
- ADB_PATH = (subprocess.check_output(['which -a adb'], shell=True)
- .split('\n')[-2])
+ ADB_PATH = (
+ subprocess.check_output(['which -a adb'], shell=True).split('\n')[-2])
except subprocess.CalledProcessError:
print 'Could not find adb. Please check your PATH.'
@@ -56,42 +58,53 @@
"""Sets global variables to the dimensions of the device."""
global MAX_HEIGHT, MAX_WIDTH, NAVBAR_HEIGHT
vc_dump = vc.dump(window='-1')
+ # Returns a string similar to "Physical size: 1440x2560"
proc = subprocess.Popen([ADB_PATH, 'shell', 'wm size'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
size, _ = proc.communicate()
MAX_HEIGHT = extract_between(size, 'x', '\r')
MAX_WIDTH = extract_between(size, ': ', 'x')
- NAVBAR_HEIGHT = (vc_dump[0].getY() - int(vc_dump[0]
- ['layout:getLocationOnScreen_y()']))
+ NAVBAR_HEIGHT = (
+ vc_dump[0].getY() - int(vc_dump[0]['layout:getLocationOnScreen_y()']))
def perform_press_back():
subprocess.call([ADB_PATH, 'shell', 'input', 'keyevent', '4'])
+def attempt_return_to_app(package_name):
+ """Tries to press back a number of times to return to the app."""
+
+ # Returns whether or not we were successful after NUM_PRESSES attempts.
+ for _ in range(0, NUM_BACK_PRESSES):
+ perform_press_back()
+ activity = get_activity_name(package_name)
+ if activity != EXITED_APP:
+ return True
+
+ return False
+
+
def get_activity_name(package_name):
"""Gets the current running activity of the package."""
# TODO(afergan): See if we can consolidate this with get_fragment_list, but
# still make sure that the current app has focus.
# TODO(afergan): Check for Windows compatibility.
proc = subprocess.Popen([ADB_PATH, 'shell', 'dumpsys window windows '
- '| grep -E \'mCurrentFocus\''],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ '| grep -E \'mCurrentFocus\''],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
activity_str, _ = proc.communicate()
# If a popup menu has captured the focus, the focus will be in the format
# mCurrentFocus=Window{8f1328e u0 PopupWindow:53a5957}
if 'PopupWindow' in activity_str:
popup_str = extract_between(activity_str, 'PopupWindow', '}')
- return popup_str.replace(':', '')
+ return 'PopupWindow' + popup_str.replace(':', '')
- # We are no longer in the app.
if package_name not in activity_str:
- print 'Exited app'
- # If app opened a different app, try to get back to it.
- perform_press_back()
- if package_name not in activity_str:
- return EXITED_APP
+ return EXITED_APP
# The current focus returns a string in the format
# mCurrentFocus=Window{35f66c3 u0 com.google.zagat/com.google.android.apps.
@@ -103,7 +116,8 @@
def get_frag_list(package_name):
"""Gets the list of fragments in the current view."""
proc = subprocess.Popen([ADB_PATH, 'shell', 'dumpsys activity', package_name],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
adb_dump, _ = proc.communicate()
frag_dump = re.findall('Added Fragments:(.*?)FragmentManager', adb_dump,
re.DOTALL)
@@ -115,34 +129,39 @@
def get_package_name():
- """Get the package name of the current focused window."""
+ """Gets the package name of the current focused window."""
proc = subprocess.Popen([ADB_PATH, 'shell', 'dumpsys window windows '
- '| grep -E \'mCurrentFocus\''],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ '| grep -E \'mCurrentFocus\''],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
activity_str, _ = proc.communicate()
# The current focus returns a string in the format
# mCurrentFocus=Window{35f66c3 u0 com.google.zagat/com.google.android.apps.
# zagat.activities.BrowseListsActivity}
- # We want the text before the /
+ # We want the text before the backslash
pkg_name = extract_between(activity_str, ' ', '/', -1)
print 'Package name is ' + pkg_name
return pkg_name
def save_view_data(package_name, activity, frag_list, vc_dump):
- """Store the screenshot with a unique filename."""
- directory = (os.path.dirname(os.path.abspath(__file__)) + '/data/'
- + package_name)
+ """Stores the view hierarchy and screenshots with unique filenames."""
+ # Returns the path to the screenshot and the file number.
+
+ first_frag = frag_list[0]
+ directory = (
+ os.path.dirname(os.path.abspath(__file__)) + '/data/' + package_name)
if not os.path.exists(directory):
os.makedirs(directory)
file_num = 0
- dump_file = os.path.join(directory, activity + '-' + frag_list[0] + '-'
- + str(file_num) + '.json')
+ dump_file = os.path.join(
+ directory, activity + '-' + first_frag + '-' + str(file_num) + '.json')
while os.path.exists(dump_file):
file_num += 1
- dump_file = os.path.join(directory, activity + '-' + frag_list[0] + '-'
- + str(file_num) + '.json')
+ dump_file = os.path.join(
+ directory,
+ activity + '-' + first_frag + '-' + str(file_num) + '.json')
view_info = {}
view_info['hierarchy'] = {}
@@ -164,18 +183,30 @@
with open(dump_file, 'w') as out_file:
json.dump(view_info, out_file, indent=2)
- screen_name = (activity + '-' + frag_list[0] + '-' + str(file_num) + '.png')
+ screen_name = activity + '-' + first_frag + '-' + str(file_num) + '.png'
screen_path = os.path.join(directory, screen_name)
subprocess.call([ADB_PATH, 'shell', 'screencap', '/sdcard/' + screen_name])
subprocess.call([ADB_PATH, 'pull', '/sdcard/' + screen_name, screen_path])
-
- # Return the filename & num so that the screenshot can be accessed
+ # Returns the filename & num so that the screenshot can be accessed
# programatically.
return [screen_path, file_num]
-def find_view_idx(vc_dump, activity, frag_list, view_array):
- """Find the index of the current View in the view array (-1 if new view)."""
+def save_ui_flow_relationships(package_name, view_array):
+ """Dumps to file the click dictionary and preceding Views for each View."""
+ directory = (
+ os.path.dirname(os.path.abspath(__file__)) + '/data/' + package_name)
+ for v in view_array:
+ click_file = os.path.join(directory, v.get_name() + '-clicks.json')
+ click_info = {}
+ click_info['click_dict'] = v.click_dict
+ click_info['preceding'] = v.preceding
+ with open(click_file, 'w') as out_file:
+ json.dump(click_info, out_file, indent=2)
+
+
+def find_view_idx(activity, frag_list, vc_dump, view_array):
+ """Finds the index of the current View in the view array (-1 if new View)."""
for i in range(len(view_array)):
if view_array[i].is_duplicate(activity, frag_list, vc_dump):
return i
@@ -183,7 +214,7 @@
def create_view(package_name, vc_dump, activity, frag_list):
- """Store the current view in the View data structure."""
+ """Stores the current view in the View data structure."""
screenshot_info = save_view_data(package_name, activity, frag_list, vc_dump)
v = View(activity, frag_list, vc_dump, screenshot_info[0], screenshot_info[1])
@@ -195,8 +226,7 @@
if (component.isClickable() and component.getVisibility() == VISIBLE and
component.getX() >= 0 and component.getX() <= MAX_WIDTH and
int(component['layout:getWidth()']) > 0 and
- component.getY() >= NAVBAR_HEIGHT and
- component.getY() <= MAX_HEIGHT and
+ component.getY() >= NAVBAR_HEIGHT and component.getY() <= MAX_HEIGHT and
int(component['layout:getHeight()']) > 0):
print component['class'] + '-- will be clicked'
v.clickable.append(component)
@@ -204,20 +234,64 @@
return v
+def link_ui_views(last_view, curr_view, last_clicked, package_name,
+ view_array):
+ """Stores the relationship between last_view and curr_view."""
+
+ # We store in the View information that the last view links to the current
+ # view, and that the current view can be reached from the last view. We use
+ # the id of the last clicked element as the dictionary key so that we know
+ # which element leads from view to view.
+
+ if last_clicked:
+ print 'Last clicked: ' + last_clicked
+ last_view.click_dict[last_clicked] = curr_view.get_name()
+ curr_view.preceding.append(last_view.get_name())
+ else:
+ print 'Lost track of last clicked!'
+ view_array.append(curr_view)
+ # TODO(afergan): Remove this later. For debugging, we print the clicks after
+ # each click to a new view is recorded. However, later we can just do it when
+ # we're done crawling the app.
+ save_ui_flow_relationships(package_name, view_array)
+
+
+def get_activity_and_view(package_name, vc, view_array):
+ """Extracts UI info and return the current View."""
+
+ # Gets the current UI info. If we have seen this UI before, return the
+ # existing View. If not, create a new View and save it to the view array.
+
+ activity = get_activity_name(package_name)
+ frag_list = get_frag_list(package_name)
+ vc_dump = vc.dump(window='-1')
+ view_idx = find_view_idx(activity, frag_list, vc_dump, view_array)
+
+ if view_idx >= 0:
+ print 'Found duplicate'
+ return activity, view_array[view_idx]
+ else:
+ print 'New view'
+ new_view = create_view(package_name, vc_dump, activity, frag_list)
+ view_array.append(new_view)
+ return activity, new_view
+
+
def crawl_package(apk_dir, vc, device, debug, package_name=None):
- """Main crawler loop. Evaluate views, store new views, and click on items."""
+ """Main crawler loop. Evaluates views, store new views, and click on items."""
set_adb_path()
set_device_dimens(vc)
- view_root = []
view_array = []
+ last_clicked = ''
+
if debug or not package_name: # These should be equal
package_name = get_package_name()
else:
# Install the app.
- subprocess.call([ADB_PATH, 'install', '-r', apk_dir + package_name
- + '.apk'])
+ subprocess.call([ADB_PATH, 'install', '-r',
+ apk_dir + package_name + '.apk'])
# Launch the app.
subprocess.call([ADB_PATH, 'shell', 'monkey', '-p', package_name, '-c',
'android.intent.category.LAUNCHER', '1'])
@@ -226,8 +300,6 @@
print 'Storing root'
vc_dump = vc.dump(window='-1')
activity = get_activity_name(package_name)
- if activity == EXITED_APP:
- return
frag_list = get_frag_list(package_name)
view_root = create_view(package_name, vc_dump, activity, frag_list)
view_array.append(view_root)
@@ -242,35 +314,38 @@
if device.isKeyboardShown():
perform_press_back()
else:
- # Determine if this is a View that has already been seen.
- view_idx = find_view_idx(vc_dump, activity, frag_list, view_array)
- if view_idx >= 0:
- print '**FOUND DUPLICATE'
- curr_view = view_array[view_idx]
- else:
- print '**NEW VIEW'
- curr_view = create_view(package_name, vc_dump, activity, frag_list)
- view_array.append(curr_view)
+ last_view = curr_view
+ activity, curr_view = get_activity_and_view(package_name, vc, view_array)
+ if not last_view.is_duplicate_view(curr_view):
+ print 'At a diff view!'
+ link_ui_views(last_view, curr_view, last_clicked, package_name,
+ view_array)
print 'Num clickable: ' + str(len(curr_view.clickable))
if curr_view.clickable:
c = curr_view.clickable[-1]
- print ('Clickable: {} {}, ({},{})'.format(c['uniqueId'], c['class'],
- c.getX(), c.getY()))
+ print('Clickable: {} {}, ({},{})'.format(c['uniqueId'], c['class'],
+ c.getX(), c.getY()))
subprocess.call([ADB_PATH, 'shell', 'input', 'tap', str(c.getX()),
str(c.getY())])
print str(len(curr_view.clickable)) + ' elements left to click'
+ last_clicked = c['uniqueId']
del curr_view.clickable[-1]
else:
- print '!!! Clicking back button'
+ print 'Clicking back button'
perform_press_back()
- if curr_view == view_root:
- return
+ activity, curr_view = get_activity_and_view(package_name, vc, view_array)
+ if last_view.is_duplicate_view(curr_view):
+ # We have nothing left to click, and the back button doesn't change
+ # views.
+ break
+ else:
+ link_ui_views(last_view, curr_view, 'back button', package_name,
+ view_array)
- vc_dump = vc.dump(window='-1')
- activity = get_activity_name(package_name)
if activity == EXITED_APP:
- return
- frag_list = get_frag_list(package_name)
+ break
+
+ save_ui_flow_relationships(package_name, view_array)
diff --git a/apkcrawler/view.py b/apkcrawler/view.py
index 7219a6d..34617a7 100644
--- a/apkcrawler/view.py
+++ b/apkcrawler/view.py
@@ -15,6 +15,7 @@
"""
def __init__(self, activity, frag_list, hierarchy, screenshot, num):
+ """Constructor for View class."""
self.activity = activity
self.frag_list = frag_list
self.hierarchy = hierarchy
@@ -22,17 +23,17 @@
self.num = num
self.clickable = []
self.preceding = []
+ self.click_dict = {}
def get_name(self):
- # Return the identifying name of the View (activity, fragment list, and
- # number).
- return [self.activity, self.frag_list, self.num]
+ """Returns the identifying name of the View."""
+ return self.activity + '-' + self.frag_list[0] + '-' + str(self.num)
def num_components(self):
return len(self.hierarchy)
def is_duplicate(self, cv_activity, cv_frag_list, cv_hierarchy):
- """Determine if the passed-in current view is identical to this View."""
+ """Determines if the passed-in information is identical to this View."""
# Since the fragment names are hashable, this is the most efficient method
# to compare two unordered lists according to
@@ -42,7 +43,7 @@
Counter(self.frag_list) != Counter(cv_frag_list)):
return False
- if len(cv_hierarchy) != self.num_components():
+ if self.num_components() != len(cv_hierarchy):
return False
hierarchy_ids = [h['uniqueId'] for h in self.hierarchy]
@@ -50,8 +51,22 @@
return Counter(hierarchy_ids) == Counter(curr_view_ids)
- def print_info(self):
+ def is_duplicate_view(self, other_view):
+ """Determines if the passed-in View is identical to this View."""
+ if (self.activity != other_view.activity or
+ Counter(self.frag_list) != Counter(other_view.frag_list)):
+ return False
+ if self.num_components() != len(other_view.hierarchy):
+ return False
+
+ hierarchy_ids = [h['uniqueId'] for h in self.hierarchy]
+ other_view_ids = [ov['uniqueId'] for ov in other_view.hierarchy]
+
+ return Counter(hierarchy_ids) == Counter(other_view_ids)
+
+ def print_info(self):
+ """Prints out information about the view."""
print 'Activity: ' + self.activity
print 'Fragment: ' + self.frag_list
print 'Num: " + str(self.num)'