From af62e73dce8d522460c38cc0102db2ed2a564a41 Mon Sep 17 00:00:00 2001
From: Chris Sosa <sosa@chromium.org>
Date: Tue, 12 Oct 2010 15:07:54 -0700
Subject: [PATCH] Add ability to download using newest image from zip url.

This changes the behavior of ctest to try to grab the latest zip from the zip web pages if it can't use the latest link (due to IOError or none provided).  It assumes they have an Apache server and they reference images using version->ChromeOS-version-.*.zip) for each board/channel.

Change-Id: I4ee075db7afd58c8d08f59aca2e69b5bab5ff5e9

BUG=7675
TEST=Ran it with -l "http://Doesnotexist" and also without a -l option.  Added new unittests to test and ran them as well.

Review URL: http://codereview.chromium.org/3659002
---
 bin/ctest.py          | 98 ++++++++++++++++++++++++++++++++++++-------
 bin/ctest_unittest.py | 65 +++++++++++++++++++++++++++-
 2 files changed, 147 insertions(+), 16 deletions(-)
diff --git a/bin/ctest.py b/bin/ctest.py
index 2e12293cbc..70dbec8ffe 100755
--- a/bin/ctest.py
+++ b/bin/ctest.py
@@ -9,15 +9,46 @@
 import fileinput
 import optparse
 import os
+import re
 import sys
 import traceback
 import urllib
+import HTMLParser
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../lib'))
-from cros_build_lib import Info, RunCommand, ReinterpretPathForChroot
+from cros_build_lib import Info
+from cros_build_lib import ReinterpretPathForChroot
+from cros_build_lib import RunCommand
+from cros_build_lib import Warning
 
 _IMAGE_TO_EXTRACT = 'chromiumos_test_image.bin'
 
+class HTMLDirectoryParser(HTMLParser.HTMLParser):
+  """HTMLParser for parsing the default apache file index."""
+
+  def __init__(self, regex):
+    HTMLParser.HTMLParser.__init__(self)
+    self.regex_object = re.compile(regex)
+    self.link_list = []
+
+  def handle_starttag(self, tag, attrs):
+    """Overrides from HTMLParser and is called at the start of every tag.
+
+    This implementation grabs attributes from links (i.e. <a ... > </a>
+    and adds the target from href=<target> if the <target> matches the
+    regex given at the start.
+    """
+    if not tag.lower() == 'a':
+      return
+
+    for attr in attrs:
+      if not attr[0].lower() == 'href':
+        continue
+
+      match = self.regex_object.match(attr[1])
+      if match:
+        self.link_list.append(match.group(0).rstrip('/'))
+
 
 def ModifyBootDesc(download_folder, redirect_file=None):
   """Modifies the boot description of a downloaded image to work with path.
@@ -62,6 +93,40 @@ def ModifyBootDesc(download_folder, redirect_file=None):
   fileinput.close()
 
 
+def GetLatestLinkFromPage(url, regex):
+  """Returns the latest link from the given url that matches regex.
+
+  Args:
+    url: Url to download and parse.
+    regex: Regular expression to match links against.
+  """
+  url_file = urllib.urlopen(url)
+  url_html = url_file.read()
+  url_file.close()
+
+  # Parses links with versions embedded.
+  url_parser = HTMLDirectoryParser(regex=regex)
+  url_parser.feed(url_html)
+  return max(url_parser.link_list)
+
+
+def GetNewestLinkFromZipBase(board, channel, zip_server_base):
+  """Returns the url to the newest image from the zip server.
+
+  Args:
+    board: board for the image zip.
+    channel: channel for the image zip.
+    zip_server_base:  base url for zipped images.
+  """
+  zip_base = os.path.join(zip_server_base, channel, board)
+  latest_version = GetLatestLinkFromPage(zip_base, '\d+\.\d+\.\d+\.\d+/')
+
+  zip_dir = os.path.join(zip_base, latest_version)
+  zip_name = GetLatestLinkFromPage(zip_dir,
+                                   'ChromeOS-\d+\.\d+\.\d+\.\d+-.*\.zip')
+  return os.path.join(zip_dir, zip_name)
+
+
 def GetLatestZipUrl(board, channel, latest_url_base, zip_server_base):
   """Returns the url of the latest image zip for the given arguments.
 
@@ -71,18 +136,24 @@ def GetLatestZipUrl(board, channel, latest_url_base, zip_server_base):
     latest_url_base: base url for latest links.
     zip_server_base:  base url for zipped images.
   """
-  # Grab the latest image info.
-  latest_file_url = os.path.join(latest_url_base, channel,
-                                 'LATEST-%s' % board)
-  latest_image_file = urllib.urlopen(latest_file_url)
-  latest_image = latest_image_file.read()
-  latest_image_file.close()
+  if latest_url_base:
+    try:
+      # Grab the latest image info.
+      latest_file_url = os.path.join(latest_url_base, channel,
+                                   'LATEST-%s' % board)
+      latest_image_file = urllib.urlopen(latest_file_url)
+      latest_image = latest_image_file.read()
+      latest_image_file.close()
+      # Convert bin.gz into zip.
+      latest_image = latest_image.replace('.bin.gz', '.zip')
+      version = latest_image.split('-')[1]
+      zip_base = os.path.join(zip_server_base, channel, board)
+      return os.path.join(zip_base, version, latest_image)
+    except IOError:
+      Warning(('Could not use latest link provided, defaulting to parsing'
+               ' latest from zip url base.'))
 
-  # Convert bin.gz into zip.
-  latest_image = latest_image.replace('.bin.gz', '.zip')
-  version = latest_image.split('-')[1]
-  zip_base = os.path.join(zip_server_base, channel, board)
-  return os.path.join(zip_base, version, latest_image)
+  return GetNewestLinkFromZipBase(board, channel, zip_server_base)
 
 
 def GrabZipAndExtractImage(zip_url, download_folder, image_name) :
@@ -201,9 +272,6 @@ def main():
   if not options.channel:
     parser.error('Need channel for image to compare against.')
 
-  if not options.latestbase:
-    parser.error('Need latest url base to get images.')
-
   if not options.zipbase:
     parser.error('Need zip url base to get images.')
 
diff --git a/bin/ctest_unittest.py b/bin/ctest_unittest.py
index c384c1e209..cc00e329ce 100755
--- a/bin/ctest_unittest.py
+++ b/bin/ctest_unittest.py
@@ -6,12 +6,13 @@
 
 """Unit tests for ctest."""
 
-import ctest
 import mox
 import os
 import unittest
 import urllib
 
+import ctest
+
 _TEST_BOOT_DESC = """
   --arch="x86"
   --output_dir="/home/chrome-bot/0.8.70.5-a1"
@@ -35,6 +36,7 @@ class CrosTestTest(mox.MoxTestBase):
     self.image_url = '%s/%s/%s/%s/%s.zip' % (self.zipbase, self.channel,
                                              self.board, self.version,
                                              self.image_name)
+    self.test_regex = 'ChromeOS-\d+\.\d+\.\d+\.\d+-.*\.zip'
 
   def testModifyBootDesc(self):
     """Tests to make sure we correctly modify a boot desc."""
@@ -76,6 +78,23 @@ class CrosTestTest(mox.MoxTestBase):
                       self.image_url)
     self.mox.VerifyAll()
 
+  def testGetLatestZipFromBadUrl(self):
+    """Tests whether GetLatestZipUrl returns correct url given bad link."""
+    self.mox.StubOutWithMock(urllib, 'urlopen')
+    self.mox.StubOutWithMock(ctest, 'GetNewestLinkFromZipBase')
+    m_file = self.mox.CreateMock(file)
+
+    urllib.urlopen('%s/%s/LATEST-%s' % (self.latestbase, self.channel,
+                   self.board)).AndRaise(IOError('Cannot open url.'))
+    ctest.GetNewestLinkFromZipBase(self.board, self.channel,
+                                   self.zipbase).AndReturn(self.image_url)
+
+    self.mox.ReplayAll()
+    self.assertEquals(ctest.GetLatestZipUrl(self.board, self.channel,
+                                            self.latestbase, self.zipbase),
+                                            self.image_url)
+    self.mox.VerifyAll()
+
   def testGrabZipAndExtractImageUseCached(self):
     """Test case where cache holds our image."""
     self.mox.StubOutWithMock(os.path, 'exists')
@@ -160,6 +179,50 @@ class CrosTestTest(mox.MoxTestBase):
 
     self.CommonDownloadAndExtractImage()
 
+  def testGetLatestLinkFromPage(self):
+    """Tests whether we get the latest link from a url given a regex."""
+    test_url = 'test_url'
+    test_html = """
+    <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+    <html>
+    <body>
+    <h1>Test Index</h1>
+    <a href="ZsomeCruft">Cruft</a>
+    <a href="YotherCruft">Cruft</a>
+    <a href="ChromeOS-0.9.12.4-blahblah.zip">testlink1/</a>
+    <a href="ChromeOS-0.9.12.4-blahblah.zip.other/">testlink2/</a>
+    <a href="ChromeOS-Factory-0.9.12.4-blahblah.zip/">testlink3/</a>
+    </body></html>
+    """
+    self.mox.StubOutWithMock(urllib, 'urlopen')
+    m_file = self.mox.CreateMock(file)
+
+    urllib.urlopen(test_url).AndReturn(m_file)
+    m_file.read().AndReturn(test_html)
+    m_file.close()
+
+    self.mox.ReplayAll()
+    latest_link = ctest.GetLatestLinkFromPage(test_url, regex=self.test_regex)
+    self.assertTrue(latest_link == 'ChromeOS-0.9.12.4-blahblah.zip')
+    self.mox.VerifyAll()
+
+
+class HTMLDirectoryParserTest(unittest.TestCase):
+  """Test class for HTMLDirectoryParser."""
+
+  def setUp(self):
+    self.test_regex = '\d+\.\d+\.\d+\.\d+/'
+
+  def testHandleStarttagGood(self):
+    parser = ctest.HTMLDirectoryParser(regex=self.test_regex)
+    parser.handle_starttag('a', [('href', '0.9.74.1/')])
+    self.assertTrue('0.9.74.1' in parser.link_list)
+
+  def testHandleStarttagBad(self):
+    parser = ctest.HTMLDirectoryParser(regex=self.test_regex)
+    parser.handle_starttag('a', [('href', 'ZsomeCruft/')])
+    self.assertTrue('ZsomeCruft' not in parser.link_list)
+
 
 if __name__ == '__main__':
   unittest.main()