apkcrawler: Store click map.

Each time the crawler clicks on an element that goes to a different
view, store this relationship for both the preceding and resulting
views.

This resulted in a little bit of a refactoring, where I moved
some shared code into separate methods.

Change-Id: I52729352ad725f9c7cfe03694cb75ec9cf4cee9c
diff --git a/apkcrawler/crawlui.py b/apkcrawler/crawlui.py
index 08dea1b..75bc8a9 100644
--- a/apkcrawler/crawlui.py
+++ b/apkcrawler/crawlui.py
@@ -1,7 +1,6 @@
 # Copyright 2016 The Vanadium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style
 # license that can be found in the LICENSE file.
-
 """A module for installing and crawling the UI of Android application."""
 
 import copy
@@ -12,7 +11,6 @@
 
 from view import View
 
-
 ADB_PATH = None
 
 MAX_HEIGHT = 0
@@ -28,9 +26,13 @@
 # activities cannot have spaces, we ensure that no activity will be named this.
 EXITED_APP = 'exited app'
 
+# How many times we should try pressing the back button to return to the app
+# before giving up.
+NUM_BACK_PRESSES = 3
+
 
 def extract_between(text, sub1, sub2, nth=1):
-  """Extract a substring from text between two given substrings."""
+  """Extracts a substring from text between two given substrings."""
   # Credit to
   # https://www.daniweb.com/programming/software-development/code/446964/extract-a-string-between-2-substrings-python-
 
@@ -41,13 +43,13 @@
 
 
 def set_adb_path():
-  """Define the ADB path based on operating system."""
+  """Defines the ADB path based on operating system."""
   try:
     global ADB_PATH
     # For machines with multiple installations of adb, use the last listed
     # version of adb. If this doesn't work for your setup, modify to taste.
-    ADB_PATH = (subprocess.check_output(['which -a adb'], shell=True)
-                .split('\n')[-2])
+    ADB_PATH = (
+        subprocess.check_output(['which -a adb'], shell=True).split('\n')[-2])
   except subprocess.CalledProcessError:
     print 'Could not find adb. Please check your PATH.'
 
@@ -56,42 +58,53 @@
   """Sets global variables to the dimensions of the device."""
   global MAX_HEIGHT, MAX_WIDTH, NAVBAR_HEIGHT
   vc_dump = vc.dump(window='-1')
+  # Returns a string similar to "Physical size: 1440x2560"
   proc = subprocess.Popen([ADB_PATH, 'shell', 'wm size'],
-                          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE)
   size, _ = proc.communicate()
   MAX_HEIGHT = extract_between(size, 'x', '\r')
   MAX_WIDTH = extract_between(size, ': ', 'x')
-  NAVBAR_HEIGHT = (vc_dump[0].getY() - int(vc_dump[0]
-                                           ['layout:getLocationOnScreen_y()']))
+  NAVBAR_HEIGHT = (
+      vc_dump[0].getY() - int(vc_dump[0]['layout:getLocationOnScreen_y()']))
 
 
 def perform_press_back():
   subprocess.call([ADB_PATH, 'shell', 'input', 'keyevent', '4'])
 
 
+def attempt_return_to_app(package_name):
+  """Tries to press back a number of times to return to the app."""
+
+  # Returns whether or not we were successful after NUM_PRESSES attempts.
+  for _ in range(0, NUM_BACK_PRESSES):
+    perform_press_back()
+    activity = get_activity_name(package_name)
+    if activity != EXITED_APP:
+      return True
+
+  return False
+
+
 def get_activity_name(package_name):
   """Gets the current running activity of the package."""
   # TODO(afergan): See if we can consolidate this with get_fragment_list, but
   # still make sure that the current app has focus.
   # TODO(afergan): Check for Windows compatibility.
   proc = subprocess.Popen([ADB_PATH, 'shell', 'dumpsys window windows '
-                                              '| grep -E \'mCurrentFocus\''],
-                          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                           '| grep -E \'mCurrentFocus\''],
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE)
   activity_str, _ = proc.communicate()
 
   # If a popup menu has captured the focus, the focus will be in the format
   # mCurrentFocus=Window{8f1328e u0 PopupWindow:53a5957}
   if 'PopupWindow' in activity_str:
     popup_str = extract_between(activity_str, 'PopupWindow', '}')
-    return popup_str.replace(':', '')
+    return 'PopupWindow' + popup_str.replace(':', '')
 
-  # We are no longer in the app.
   if package_name not in activity_str:
-    print 'Exited app'
-    # If app opened a different app, try to get back to it.
-    perform_press_back()
-    if package_name not in activity_str:
-      return EXITED_APP
+    return EXITED_APP
 
   # The current focus returns a string in the format
   # mCurrentFocus=Window{35f66c3 u0 com.google.zagat/com.google.android.apps.
@@ -103,7 +116,8 @@
 def get_frag_list(package_name):
   """Gets the list of fragments in the current view."""
   proc = subprocess.Popen([ADB_PATH, 'shell', 'dumpsys activity', package_name],
-                          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE)
   adb_dump, _ = proc.communicate()
   frag_dump = re.findall('Added Fragments:(.*?)FragmentManager', adb_dump,
                          re.DOTALL)
@@ -115,34 +129,39 @@
 
 
 def get_package_name():
-  """Get the package name of the current focused window."""
+  """Gets the package name of the current focused window."""
   proc = subprocess.Popen([ADB_PATH, 'shell', 'dumpsys window windows '
-                                              '| grep -E \'mCurrentFocus\''],
-                          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                           '| grep -E \'mCurrentFocus\''],
+                          stdout=subprocess.PIPE,
+                          stderr=subprocess.PIPE)
   activity_str, _ = proc.communicate()
 
   # The current focus returns a string in the format
   # mCurrentFocus=Window{35f66c3 u0 com.google.zagat/com.google.android.apps.
   # zagat.activities.BrowseListsActivity}
-  # We want the text before the /
+  # We want the text before the backslash
   pkg_name = extract_between(activity_str, ' ', '/', -1)
   print 'Package name is ' + pkg_name
   return pkg_name
 
 
 def save_view_data(package_name, activity, frag_list, vc_dump):
-  """Store the screenshot with a unique filename."""
-  directory = (os.path.dirname(os.path.abspath(__file__)) + '/data/'
-               + package_name)
+  """Stores the view hierarchy and screenshots with unique filenames."""
+  # Returns the path to the screenshot and the file number.
+
+  first_frag = frag_list[0]
+  directory = (
+      os.path.dirname(os.path.abspath(__file__)) + '/data/' + package_name)
   if not os.path.exists(directory):
     os.makedirs(directory)
   file_num = 0
-  dump_file = os.path.join(directory, activity + '-' + frag_list[0] + '-'
-                           + str(file_num) + '.json')
+  dump_file = os.path.join(
+      directory, activity + '-' + first_frag + '-' + str(file_num) + '.json')
   while os.path.exists(dump_file):
     file_num += 1
-    dump_file = os.path.join(directory, activity + '-' + frag_list[0] + '-'
-                             + str(file_num) + '.json')
+    dump_file = os.path.join(
+        directory,
+        activity + '-' + first_frag + '-' + str(file_num) + '.json')
 
   view_info = {}
   view_info['hierarchy'] = {}
@@ -164,18 +183,30 @@
   with open(dump_file, 'w') as out_file:
     json.dump(view_info, out_file, indent=2)
 
-  screen_name = (activity + '-' + frag_list[0] + '-' + str(file_num) + '.png')
+  screen_name = activity + '-' + first_frag + '-' + str(file_num) + '.png'
   screen_path = os.path.join(directory, screen_name)
   subprocess.call([ADB_PATH, 'shell', 'screencap', '/sdcard/' + screen_name])
   subprocess.call([ADB_PATH, 'pull', '/sdcard/' + screen_name, screen_path])
-
-  # Return the filename & num so that the screenshot can be accessed
+  # Returns the filename & num so that the screenshot can be accessed
   # programatically.
   return [screen_path, file_num]
 
 
-def find_view_idx(vc_dump, activity, frag_list, view_array):
-  """Find the index of the current View in the view array (-1 if new view)."""
+def save_ui_flow_relationships(package_name, view_array):
+  """Dumps to file the click dictionary and preceding Views for each View."""
+  directory = (
+      os.path.dirname(os.path.abspath(__file__)) + '/data/' + package_name)
+  for v in view_array:
+    click_file = os.path.join(directory, v.get_name() + '-clicks.json')
+    click_info = {}
+    click_info['click_dict'] = v.click_dict
+    click_info['preceding'] = v.preceding
+    with open(click_file, 'w') as out_file:
+      json.dump(click_info, out_file, indent=2)
+
+
+def find_view_idx(activity, frag_list, vc_dump, view_array):
+  """Finds the index of the current View in the view array (-1 if new View)."""
   for i in range(len(view_array)):
     if view_array[i].is_duplicate(activity, frag_list, vc_dump):
       return i
@@ -183,7 +214,7 @@
 
 
 def create_view(package_name, vc_dump, activity, frag_list):
-  """Store the current view in the View data structure."""
+  """Stores the current view in the View data structure."""
   screenshot_info = save_view_data(package_name, activity, frag_list, vc_dump)
   v = View(activity, frag_list, vc_dump, screenshot_info[0], screenshot_info[1])
 
@@ -195,8 +226,7 @@
     if (component.isClickable() and component.getVisibility() == VISIBLE and
         component.getX() >= 0 and component.getX() <= MAX_WIDTH and
         int(component['layout:getWidth()']) > 0 and
-        component.getY() >= NAVBAR_HEIGHT and
-        component.getY() <= MAX_HEIGHT and
+        component.getY() >= NAVBAR_HEIGHT and component.getY() <= MAX_HEIGHT and
         int(component['layout:getHeight()']) > 0):
       print component['class'] + '-- will be clicked'
       v.clickable.append(component)
@@ -204,20 +234,64 @@
   return v
 
 
+def link_ui_views(last_view, curr_view, last_clicked, package_name,
+                  view_array):
+  """Stores the relationship between last_view and curr_view."""
+
+  # We store in the View information that the last view links to the current
+  # view, and that the current view can be reached from the last view. We use
+  # the id of the last clicked element as the dictionary key so that we know
+  # which element leads from view to view.
+
+  if last_clicked:
+    print 'Last clicked: ' + last_clicked
+    last_view.click_dict[last_clicked] = curr_view.get_name()
+    curr_view.preceding.append(last_view.get_name())
+  else:
+    print 'Lost track of last clicked!'
+  view_array.append(curr_view)
+  # TODO(afergan): Remove this later. For debugging, we print the clicks after
+  # each click to a new view is recorded. However, later we can just do it when
+  # we're done crawling the app.
+  save_ui_flow_relationships(package_name, view_array)
+
+
+def get_activity_and_view(package_name, vc, view_array):
+  """Extracts UI info and return the current View."""
+
+  # Gets the current UI info. If we have seen this UI before, return the
+  # existing View. If not, create a new View and save it to the view array.
+
+  activity = get_activity_name(package_name)
+  frag_list = get_frag_list(package_name)
+  vc_dump = vc.dump(window='-1')
+  view_idx = find_view_idx(activity, frag_list, vc_dump, view_array)
+
+  if view_idx >= 0:
+    print 'Found duplicate'
+    return activity, view_array[view_idx]
+  else:
+    print 'New view'
+    new_view = create_view(package_name, vc_dump, activity, frag_list)
+    view_array.append(new_view)
+    return activity, new_view
+
+
 def crawl_package(apk_dir, vc, device, debug, package_name=None):
-  """Main crawler loop. Evaluate views, store new views, and click on items."""
+  """Main crawler loop. Evaluates views, store new views, and click on items."""
   set_adb_path()
   set_device_dimens(vc)
 
-  view_root = []
   view_array = []
 
+  last_clicked = ''
+
   if debug or not package_name:  # These should be equal
     package_name = get_package_name()
   else:
     # Install the app.
-    subprocess.call([ADB_PATH, 'install', '-r', apk_dir + package_name
-                     + '.apk'])
+    subprocess.call([ADB_PATH, 'install', '-r',
+                     apk_dir + package_name + '.apk'])
     # Launch the app.
     subprocess.call([ADB_PATH, 'shell', 'monkey', '-p', package_name, '-c',
                      'android.intent.category.LAUNCHER', '1'])
@@ -226,8 +300,6 @@
   print 'Storing root'
   vc_dump = vc.dump(window='-1')
   activity = get_activity_name(package_name)
-  if activity == EXITED_APP:
-    return
   frag_list = get_frag_list(package_name)
   view_root = create_view(package_name, vc_dump, activity, frag_list)
   view_array.append(view_root)
@@ -242,35 +314,38 @@
     if device.isKeyboardShown():
       perform_press_back()
     else:
-      # Determine if this is a View that has already been seen.
-      view_idx = find_view_idx(vc_dump, activity, frag_list, view_array)
-      if view_idx >= 0:
-        print '**FOUND DUPLICATE'
-        curr_view = view_array[view_idx]
-      else:
-        print '**NEW VIEW'
-        curr_view = create_view(package_name, vc_dump, activity, frag_list)
-        view_array.append(curr_view)
+      last_view = curr_view
+      activity, curr_view = get_activity_and_view(package_name, vc, view_array)
+      if not last_view.is_duplicate_view(curr_view):
+        print 'At a diff view!'
+        link_ui_views(last_view, curr_view, last_clicked, package_name,
+                      view_array)
 
     print 'Num clickable: ' + str(len(curr_view.clickable))
 
     if curr_view.clickable:
       c = curr_view.clickable[-1]
-      print ('Clickable: {} {}, ({},{})'.format(c['uniqueId'], c['class'],
-                                                c.getX(), c.getY()))
+      print('Clickable: {} {}, ({},{})'.format(c['uniqueId'], c['class'],
+                                               c.getX(), c.getY()))
       subprocess.call([ADB_PATH, 'shell', 'input', 'tap', str(c.getX()),
                        str(c.getY())])
       print str(len(curr_view.clickable)) + ' elements left to click'
+      last_clicked = c['uniqueId']
       del curr_view.clickable[-1]
 
     else:
-      print '!!! Clicking back button'
+      print 'Clicking back button'
       perform_press_back()
-      if curr_view == view_root:
-        return
+      activity, curr_view = get_activity_and_view(package_name, vc, view_array)
+      if last_view.is_duplicate_view(curr_view):
+        # We have nothing left to click, and the back button doesn't change
+        # views.
+        break
+      else:
+        link_ui_views(last_view, curr_view, 'back button', package_name,
+                      view_array)
 
-    vc_dump = vc.dump(window='-1')
-    activity = get_activity_name(package_name)
     if activity == EXITED_APP:
-      return
-    frag_list = get_frag_list(package_name)
+      break
+
+  save_ui_flow_relationships(package_name, view_array)
diff --git a/apkcrawler/view.py b/apkcrawler/view.py
index 7219a6d..34617a7 100644
--- a/apkcrawler/view.py
+++ b/apkcrawler/view.py
@@ -15,6 +15,7 @@
   """
 
   def __init__(self, activity, frag_list, hierarchy, screenshot, num):
+    """Constructor for View class."""
     self.activity = activity
     self.frag_list = frag_list
     self.hierarchy = hierarchy
@@ -22,17 +23,17 @@
     self.num = num
     self.clickable = []
     self.preceding = []
+    self.click_dict = {}
 
   def get_name(self):
-    # Return the identifying name of the View (activity, fragment list, and
-    # number).
-    return [self.activity, self.frag_list, self.num]
+    """Returns the identifying name of the View."""
+    return self.activity + '-' + self.frag_list[0] + '-' + str(self.num)
 
   def num_components(self):
     return len(self.hierarchy)
 
   def is_duplicate(self, cv_activity, cv_frag_list, cv_hierarchy):
-    """Determine if the passed-in current view is identical to this View."""
+    """Determines if the passed-in information is identical to this View."""
 
     # Since the fragment names are hashable, this is the most efficient method
     # to compare two unordered lists according to
@@ -42,7 +43,7 @@
         Counter(self.frag_list) != Counter(cv_frag_list)):
       return False
 
-    if len(cv_hierarchy) != self.num_components():
+    if self.num_components() != len(cv_hierarchy):
       return False
 
     hierarchy_ids = [h['uniqueId'] for h in self.hierarchy]
@@ -50,8 +51,22 @@
 
     return Counter(hierarchy_ids) == Counter(curr_view_ids)
 
-  def print_info(self):
+  def is_duplicate_view(self, other_view):
+    """Determines if the passed-in View is identical to this View."""
+    if (self.activity != other_view.activity or
+        Counter(self.frag_list) != Counter(other_view.frag_list)):
+      return False
 
+    if self.num_components() != len(other_view.hierarchy):
+      return False
+
+    hierarchy_ids = [h['uniqueId'] for h in self.hierarchy]
+    other_view_ids = [ov['uniqueId'] for ov in other_view.hierarchy]
+
+    return Counter(hierarchy_ids) == Counter(other_view_ids)
+
+  def print_info(self):
+    """Prints out information about the view."""
     print 'Activity: ' + self.activity
     print 'Fragment: ' + self.frag_list
     print 'Num: " + str(self.num)'