Revert "gs_archive_server: add content-length header to extract rpc" This reverts commit 9dce0f9d6cd825597b76eae4bb1dcad2ba697ebb. Reason for revert: crbug.com/1158258 Original change's description: > gs_archive_server: add content-length header to extract rpc > > BUG=chromium:1143435 > TEST=Tested manually. Details http://gpaste/5452986865156096 > > Change-Id: I4eb2f858d6498e6fd3aa67ad413553837631f0d6 > Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/2538137 > Tested-by: Sanika Kulkarni <sanikak@chromium.org> > Reviewed-by: Allen Li <ayatane@chromium.org> > Reviewed-by: Congbin Guo <guocb@chromium.org> > Commit-Queue: Sanika Kulkarni <sanikak@chromium.org> > Auto-Submit: Sanika Kulkarni <sanikak@chromium.org> Bug: chromium:1143435 Change-Id: Ie1c3865996149ce32ee564e5da30aa43e9cfc624 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/2589894 Reviewed-by: Gregory Nisbet <gregorynisbet@google.com> Reviewed-by: Congbin Guo <guocb@chromium.org> Commit-Queue: Congbin Guo <guocb@chromium.org> Tested-by: Congbin Guo <guocb@chromium.org>

commit: f7b9a4ecf98b6996f0c94b1d7a27d7cea9e5f49d [log] [tgz]
author: Eshwar Narayan <eshwarn@chromium.org> Mon Dec 14 17:34:36 2020 +0000
committer: Congbin Guo <guocb@chromium.org> Mon Dec 14 18:31:07 2020 +0000
tree: fbc9ee768c33cf23dc0dec513bbf553307d095be
parent: 89c7fac0949e66a41dc50f0302f44920766bd8ca [diff]
diff --git a/gs_cache/gs_archive_server.py b/gs_cache/gs_archive_server.py
index 7f207b5..d0326ae 100644
--- a/gs_cache/gs_archive_server.py
+++ b/gs_cache/gs_archive_server.py

@@ -343,57 +343,27 @@
       Extracted file content (Binary data).
     """
     rsp = self._caching_server.download(archive, headers=headers)
-    # --to-command pipes the extracted file contents to the provided command.
-    # See https://www.gnu.org/software/tar/manual/html_node/
-    # Writing-to-an-External-Program.html
-    cmd = ['tar', '-x', target_file, '--to-command',
-           "sh -c 'echo $TAR_SIZE; cat'"]
-    ef = tempfile.TemporaryFile()
-    try:
-      proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=ef)
+    cmd = ['tar', '-O', '-x', target_file]
+
+    with tempfile.SpooledTemporaryFile(max_size=_SPOOL_FILE_SIZE_BYTES) as df:
+      proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=df)
       for chunk in rsp.iter_content(constants.READ_BUFFER_SIZE_BYTES):
         proc.stdin.write(chunk)
       proc.stdin.close()
-      returncode = proc.wait()
-      if returncode:
-        raise cherrypy.HTTPError(
-            httplib.INTERNAL_SERVER_ERROR,
-            'Tar command to extract the %s from %s finished with exit code: '
-            '%d.', target_file, archive, returncode)
-
-      # Go to the beginning of the file. The first line will contain the file
-      # size. Beyond that will be contents of the extracted file.
-      ef.seek(0)
-      extracted_content_length = ef.readline().strip()
-      _log('Extracted content length is %s bytes.', extracted_content_length)
-      cherrypy.response.headers['Content-Length'] = extracted_content_length
+      proc.wait()
 
       # Update the response's content type to support yielding binary data.
       cherrypy.response.headers['Content-Type'] = 'application/octet-stream'
-    except Exception as e:
-      ef.close()
-      raise cherrypy.HTTPError(
-          httplib.INTERNAL_SERVER_ERROR,
-          'An exception occurred while extracting %s from %s: %s' %
-          (target_file, archive, e))
 
-    def extracted_content():
-      _log('Begin streaming extracted contents of "%s".', target_file)
-      try:
-        # Read the TemporaryFile in chunks and yield the data.
-        while True:
-          data = ef.read(constants.READ_BUFFER_SIZE_BYTES)
-          if not data:
-            break
-          yield data
-        _log('Streaming of "%s" done.', target_file)
-      except Exception as e:
-        raise cherrypy.HTTPError(
-            httplib.INTERNAL_SERVER_ERROR,
-            'An exception occurred while reading extracted data: %s' % e)
-      finally:
-        ef.close()
-    return extracted_content()
+      # Go to the beginning of the file.
+      df.seek(0)
+
+      # Read the SpooledFile in chunks and yield the data.
+      while True:
+        data = df.read(constants.READ_BUFFER_SIZE_BYTES)
+        if not data:
+          break
+        yield data
 
   @cherrypy.expose
   @cherrypy.config(**{'response.stream': True})
@@ -427,48 +397,37 @@
         '.xz': ['xz', '-d', '-c'],
         '.bz2': ['bzip2', '-d', '-c'],
     }
-    decompressed_file = tempfile.TemporaryFile()
-    try:
-      proc = subprocess.Popen(commands[extname], stdin=subprocess.PIPE,
-                              stdout=decompressed_file)
-      _log('Decompress process id: %s.', proc.pid)
-      for chunk in rsp.iter_content(constants.READ_BUFFER_SIZE_BYTES):
-        proc.stdin.write(chunk)
-      proc.stdin.close()
-      _log('Decompression done.')
-      proc.wait()
+    decompressed_file = tempfile.SpooledTemporaryFile(
+        max_size=_SPOOL_FILE_SIZE_BYTES)
+    proc = subprocess.Popen(commands[extname], stdin=subprocess.PIPE,
+                            stdout=decompressed_file)
+    _log('Decompress process id: %s.', proc.pid)
+    for chunk in rsp.iter_content(constants.READ_BUFFER_SIZE_BYTES):
+      proc.stdin.write(chunk)
+    proc.stdin.close()
+    _log('Decompression done.')
+    proc.wait()
 
-      # The header of Content-Length is necessary for supporting range request.
-      # So we have to decompress the file locally to get the size. This may
-      # cause connection timeout issue if the decompression take too long time
-      # (e.g. 90 seconds). As a reference, it takes about 10 seconds to
-      # decompress a 400MB tgz file.
-      decompressed_file.seek(0, os.SEEK_END)
-      content_length = decompressed_file.tell()
-      _log('Decompressed content length is %d bytes.', content_length)
-      cherrypy.response.headers['Content-Length'] = str(content_length)
-      decompressed_file.seek(0)
-    except Exception as e:
-      decompressed_file.close()
-      raise cherrypy.HTTPError(
-          httplib.INTERNAL_SERVER_ERROR,
-          'An exception occurred while decompressing %s: %s' % (zarchive, e))
+    # The header of Content-Length is necessary for supporting range request.
+    # So we have to decompress the file locally to get the size. This may cause
+    # connection timeout issue if the decompression take too long time (e.g. 90
+    # seconds). As a reference, it takes about 10 seconds to decompress a 400MB
+    # tgz file.
+    decompressed_file.seek(0, os.SEEK_END)
+    content_length = decompressed_file.tell()
+    _log('Decompressed content length is %d bytes.', content_length)
+    cherrypy.response.headers['Content-Length'] = str(content_length)
+    decompressed_file.seek(0)
 
     def decompressed_content():
-      _log('Begin streaming decompressed content of "%s".', zarchive)
-      try:
-        while True:
-          data = decompressed_file.read(constants.READ_BUFFER_SIZE_BYTES)
-          if not data:
-            break
-          yield data
-        _log('Streaming of "%s" done.', zarchive)
-      except Exception as e:
-        raise cherrypy.HTTPError(
-            httplib.INTERNAL_SERVER_ERROR,
-            'An exception occurred while reading decompressed data: %s' % e)
-      finally:
-        decompressed_file.close()
+      _log('Streaming decompressed content of "%s" begin.', zarchive)
+      while True:
+        data = decompressed_file.read(constants.READ_BUFFER_SIZE_BYTES)
+        if not data:
+          break
+        yield data
+      decompressed_file.close()
+      _log('Streaming of "%s" done.', zarchive)
 
     return decompressed_content()
commit	f7b9a4ecf98b6996f0c94b1d7a27d7cea9e5f49d	[log] [tgz]
author	Eshwar Narayan <eshwarn@chromium.org>	Mon Dec 14 17:34:36 2020 +0000
committer	Congbin Guo <guocb@chromium.org>	Mon Dec 14 18:31:07 2020 +0000
tree	fbc9ee768c33cf23dc0dec513bbf553307d095be
parent	89c7fac0949e66a41dc50f0302f44920766bd8ca [diff]