devserver: Better accounting of limited update responses.

This is a revision of the dedupe mechanism used for limiting the number
of updates that a client can get from a devserver (a feature that was
originally added to compensate for the lack of a true rule-based
handling of update requests). The way it is currently implemented
sometimes results in a situation where a client receives a "no update"
response instead of the expected "update available" response.

With this change, the devserver only decrements the allowed updates
counter when it receives an event notification from the client that the
download of an update has actually started. Unlike the update request,
whose handling may time out and lead to retransmits, this event is
a unique indication that a client has actually consumed the update. As
before, this accounting is done within a critical section, to prevent
race conditions.

This also takes care of another TODO item: when the devserver receives
a non-update ping, it will correctly return an ack (instead of a "no
update" response, as it currently does).

Finally, this prepends an underscore to internal constant names in
autoupdate_lib.

BUG=chromium:348097
TEST=None

Change-Id: I3cf5a7554e6cbc08df4df4cdadb2706c1747f9ba
Reviewed-on: https://chromium-review.googlesource.com/196314
Tested-by: Gilad Arnold <garnold@chromium.org>
Reviewed-by: Chris Sosa <sosa@chromium.org>
Commit-Queue: Gilad Arnold <garnold@chromium.org>
diff --git a/autoupdate.py b/autoupdate.py
index 58c83fa..803a15e 100644
--- a/autoupdate.py
+++ b/autoupdate.py
@@ -5,9 +5,9 @@
 """Devserver module for handling update client requests."""
 
 import base64
+import collections
 import json
 import os
-import random
 import struct
 import subprocess
 import sys
@@ -203,8 +203,7 @@
     # host, as well as a dictionary of current attributes derived from events.
     self.host_infos = HostInfoTable()
 
-    self.curr_request_id = -1
-    self._update_response_lock = threading.Lock()
+    self._update_count_lock = threading.Lock()
 
   @classmethod
   def _ReadMetadataFromStream(cls, stream):
@@ -613,9 +612,16 @@
     return metadata_obj
 
   def _ProcessUpdateComponents(self, app, event):
-    """Processes the app and event components of an update request.
+    """Processes the components of an update request.
 
-    Returns tuple containing forced_update_label, client_version, and board.
+    Args:
+      app: An app component of an update request.
+      event: An event component of an update request.
+
+    Returns:
+      A named tuple containing attributes of the update requests as the
+      following fields: 'forced_update_label', 'client_version', 'board',
+      'event_result' and 'event_type'.
     """
     # Initialize an empty dictionary for event attributes to log.
     log_message = {}
@@ -638,6 +644,8 @@
       log_message['board'] = board
       curr_host_info.attrs['last_known_version'] = client_version
 
+    event_result = None
+    event_type = None
     if event:
       event_result = int(event[0].getAttribute('eventresult'))
       event_type = int(event[0].getAttribute('eventtype'))
@@ -657,8 +665,14 @@
     if self.host_log:
       curr_host_info.AddLogEntry(log_message)
 
-    return (curr_host_info.attrs.pop('forced_update_label', None),
-            client_version, board)
+    UpdateRequestAttrs = collections.namedtuple(
+        'UpdateRequestAttrs',
+        ('forced_update_label', 'client_version', 'board', 'event_result',
+         'event_type'))
+
+    return UpdateRequestAttrs(
+        curr_host_info.attrs.pop('forced_update_label', None),
+        client_version, board, event_result, event_type)
 
   @classmethod
   def _CheckOmahaRequest(cls, app):
@@ -835,33 +849,40 @@
     protocol, app, event, update_check = autoupdate_lib.ParseUpdateRequest(data)
 
     # Process attributes of the update check.
-    forced_update_label, client_version, board = self._ProcessUpdateComponents(
-        app, event)
+    request_attrs = self._ProcessUpdateComponents(app, event)
 
     if not update_check:
-      # TODO(sosa): Generate correct non-updatecheck payload to better test
-      # update clients.
-      _Log('Non-update check received.  Returning blank payload')
-      return autoupdate_lib.GetNoUpdateResponse(protocol)
+      if ((request_attrs.event_type ==
+           autoupdate_lib.EVENT_TYPE_UPDATE_DOWNLOAD_STARTED) and
+          request_attrs.event_result == autoupdate_lib.EVENT_RESULT_SUCCESS):
+        with self._update_count_lock:
+          if self.max_updates == 0:
+            _Log('Received too many download_started notifications. This '
+                 'probably means a bug in the test environment, such as too '
+                 'many clients running concurrently. Alternatively, it could '
+                 'be a bug in the update client.')
+          elif self.max_updates > 0:
+            self.max_updates -= 1
 
-    if forced_update_label:
+      _Log('A non-update event notification received. Returning an ack.')
+      return autoupdate_lib.GetEventResponse(protocol)
+
+    if request_attrs.forced_update_label:
       if label:
         _Log('Label: %s set but being overwritten to %s by request', label,
-             forced_update_label)
-      label = forced_update_label
+             request_attrs.forced_update_label)
+      label = request_attrs.forced_update_label
 
-    # Make sure that we did not already exceed the max number of allowed
-    # responses; note that this is merely an optimization, as the definitive
-    # check and updating of the counter is done later, right before returning a
-    # response.
+    # Make sure that we did not already exceed the max number of allowed update
+    # responses. Note that the counter is only decremented when the client
+    # reports an actual download, to avoid race conditions between concurrent
+    # update requests from the same client due to a timeout.
     if self.max_updates == 0:
-      _Log('Request received but max number of updates handled')
+      _Log('Request received but max number of updates already served.')
       return autoupdate_lib.GetNoUpdateResponse(protocol)
 
-    request_id = random.randint(0, sys.maxint)
-    self.curr_request_id = request_id
-    _Log('Update Check Received (id=%d). Client is using protocol version: %s',
-         request_id, protocol)
+    _Log('Update Check Received. Client is using protocol version: %s',
+         protocol)
 
     # Finally its time to generate the omaha response to give to client that
     # lets them know where to find the payload and its associated metadata.
@@ -886,7 +907,8 @@
         # Get remote payload attributes.
         metadata_obj = self._GetRemotePayloadAttrs(url)
       else:
-        path_to_payload = self.GetPathToPayload(label, client_version, board)
+        path_to_payload = self.GetPathToPayload(
+            label, request_attrs.client_version, request_attrs.board)
         url = _NonePathJoin(static_urlbase, path_to_payload,
                             constants.UPDATE_FILE)
         local_payload_dir = _NonePathJoin(self.static_dir, path_to_payload)
@@ -913,31 +935,7 @@
         metadata_obj.is_delta_format, metadata_obj.metadata_size,
         signed_metadata_hash, public_key_data, protocol, self.critical_update)
 
-    # Make sure we can proceed with the response (critical section).
-    with self._update_response_lock:
-      # If the number of responses sent already exceeds the max allowed, abort.
-      if self.max_updates == 0:
-        _Log('Max allowed number of update responses already sent, '
-             'aborting this one (id=%d)', request_id)
-        return autoupdate_lib.GetNoUpdateResponse(protocol)
-
-      # If there's been a more recent request, we assume the client timed out
-      # on this request and should not respond to it.
-      # IMPORTANT: we want to do this as close as posible to where we commit to
-      # making a response, i.e. right before we update the response tally. This
-      # is why this check happens in the critical section.
-      curr_request_id = self.curr_request_id
-      if curr_request_id != request_id:
-        _Log('A more recent request was received (id=%d), aborting this one '
-             '(id=%d)', curr_request_id, request_id)
-        return autoupdate_lib.GetNoUpdateResponse(protocol)
-
-      # Update the counter, committing to make the response.
-      self.max_updates -= 1
-
-    # At this point, we're good to go with the response.
-    _Log('Responding to client to use url %s to get image (id=%d)', url,
-         request_id)
+    _Log('Responding to client to use url %s to get image', url)
     return update_response
 
   def HandleHostInfoPing(self, ip):
diff --git a/autoupdate_lib.py b/autoupdate_lib.py
index a61d5e5..2e7a08b 100644
--- a/autoupdate_lib.py
+++ b/autoupdate_lib.py
@@ -10,11 +10,29 @@
 from xml.dom import minidom
 
 
-APP_ID = '87efface-864d-49a5-9bb3-4b050a7c227a'
+# Update events and result codes.
+EVENT_TYPE_UNKNOWN = 0
+EVENT_TYPE_DOWNLOAD_COMPLETE = 1
+EVENT_TYPE_INSTALL_COMPLETE = 2
+EVENT_TYPE_UPDATE_COMPLETE = 3
+EVENT_TYPE_UPDATE_DOWNLOAD_STARTED = 13
+EVENT_TYPE_UPDATE_DOWNLOAD_FINISHED = 14
+
+EVENT_RESULT_ERROR = 0
+EVENT_RESULT_SUCCESS = 1
+EVENT_RESULT_SUCCESS_REBOOT = 2
+EVENT_RESULT_UPDATE_DEFERRED = 9
+
+
+# A default app_id value.
+_APP_ID = '87efface-864d-49a5-9bb3-4b050a7c227a'
+
 
 # Responses for the various Omaha protocols indexed by the protocol version.
-UPDATE_RESPONSE = {}
-UPDATE_RESPONSE['2.0'] = """<?xml version="1.0" encoding="UTF-8"?>
+#
+# Update available responses:
+_UPDATE_RESPONSE = {}
+_UPDATE_RESPONSE['2.0'] = """<?xml version="1.0" encoding="UTF-8"?>
   <gupdate xmlns="http://www.google.com/update2/response" protocol="2.0">
     <daystart elapsed_seconds="%(time_elapsed)s"/>
     <app appid="{%(appid)s}" status="ok">
@@ -30,11 +48,8 @@
         status="ok"
         %(extra_attr)s/>
     </app>
-  </gupdate>
-  """
-
-
-UPDATE_RESPONSE['3.0'] = """<?xml version="1.0" encoding="UTF-8"?>
+  </gupdate>"""
+_UPDATE_RESPONSE['3.0'] = """<?xml version="1.0" encoding="UTF-8"?>
   <response protocol="3.0">
     <daystart elapsed_seconds="%(time_elapsed)s"/>
     <app appid="{%(appid)s}" status="ok">
@@ -59,33 +74,46 @@
         </manifest>
       </updatecheck>
     </app>
-  </response>
-  """
+  </response>"""
 
-
-# Responses for the various Omaha protocols indexed by the protocol version
-# when there's no update to be served.
-NO_UPDATE_RESPONSE = {}
-NO_UPDATE_RESPONSE['2.0'] = """<?xml version="1.0" encoding="UTF-8"?>
+# No update responses:
+_NO_UPDATE_RESPONSE = {}
+_NO_UPDATE_RESPONSE['2.0'] = """<?xml version="1.0" encoding="UTF-8"?>
   <gupdate xmlns="http://www.google.com/update2/response" protocol="2.0">
     <daystart elapsed_seconds="%(time_elapsed)s"/>
     <app appid="{%(appid)s}" status="ok">
       <ping status="ok"/>
       <updatecheck status="noupdate"/>
     </app>
-  </gupdate>
-  """
-
-
-NO_UPDATE_RESPONSE['3.0'] = """<?xml version="1.0" encoding="UTF-8"?>
+  </gupdate>"""
+_NO_UPDATE_RESPONSE['3.0'] = """<?xml version="1.0" encoding="UTF-8"?>
   <response protocol="3.0">
     <daystart elapsed_seconds="%(time_elapsed)s"/>
     <app appid="{%(appid)s}" status="ok">
       <ping status="ok"/>
       <updatecheck status="noupdate"/>
     </app>
-  </response>
-  """
+  </response>"""
+
+
+# Non-update event responses:
+_EVENT_RESPONSE = {}
+_EVENT_RESPONSE['2.0'] = """<?xml version="1.0" encoding="UTF-8"?>
+  <gupdate xmlns="http://www.google.com/update2/response" protocol="2.0">
+    <daystart elapsed_seconds="%(time_elapsed)s"/>
+    <app appid="{%(appid)s}" status="ok">
+      <ping status="ok"/>
+      <event status="ok"/>
+    </app>
+  </gupdate>"""
+_EVENT_RESPONSE['3.0'] = """<?xml version="1.0" encoding="UTF-8"?>
+  <response protocol="3.0">
+    <daystart elapsed_seconds="%(time_elapsed)s"/>
+    <app appid="{%(appid)s}" status="ok">
+      <ping status="ok"/>
+      <event status="ok"/>
+    </app>
+  </response>"""
 
 
 class UnknownProtocolRequestedException(Exception):
@@ -101,7 +129,7 @@
 def GetCommonResponseValues():
   """Returns a dictionary of default values for the response."""
   response_values = {}
-  response_values['appid'] = APP_ID
+  response_values['appid'] = _APP_ID
   response_values['time_elapsed'] = GetSecondsSinceMidnight()
   return response_values
 
@@ -113,6 +141,7 @@
     response_dict: Canned response messages indexed by protocol.
     protocol: client's protocol version from the request Xml.
     response_values: Values to be substituted in the canned response.
+
   Returns:
     Xml string to be passed back to client.
   """
@@ -136,6 +165,7 @@
     public_key: the public key to transmit to the client or None if no key.
     protocol: client's protocol version from the request Xml.
     critical_update: whether this is a critical update.
+
   Returns:
     Xml string to be passed back to client.
   """
@@ -164,7 +194,7 @@
     extra_attributes.append('PublicKeyRsa="%s"' % public_key)
 
   response_values['extra_attr'] = ' '.join(extra_attributes)
-  return GetSubstitutedResponse(UPDATE_RESPONSE, protocol, response_values)
+  return GetSubstitutedResponse(_UPDATE_RESPONSE, protocol, response_values)
 
 
 def GetNoUpdateResponse(protocol):
@@ -172,21 +202,37 @@
 
   Args:
     protocol: client's protocol version from the request Xml.
+
   Returns:
     Xml string to be passed back to client.
   """
   response_values = GetCommonResponseValues()
-  return GetSubstitutedResponse(NO_UPDATE_RESPONSE, protocol, response_values)
+  return GetSubstitutedResponse(_NO_UPDATE_RESPONSE, protocol, response_values)
+
+
+def GetEventResponse(protocol):
+  """Returns a protocol-specific response to a client event notification.
+
+  Args:
+    protocol: client's protocol version from the request Xml.
+
+  Returns:
+    Xml string to be passed back to client.
+  """
+  response_values = GetCommonResponseValues()
+  return GetSubstitutedResponse(_EVENT_RESPONSE, protocol, response_values)
 
 
 def ParseUpdateRequest(request_string):
   """Returns a tuple containing information parsed from an update request.
 
   Args:
-    request_dom: an xml string containing the update request.
+    request_string: an xml string containing the update request.
+
   Returns:
     Tuple consisting of protocol string, app element, event element, and
     update_check element.
+
   Raises UnknownProtocolRequestedException if we do not understand the
     protocol.
   """