gs_archive_server: add content-length header to extract rpc
BUG=chromium:1143435
TEST=Tested manually. Details http://gpaste/5452986865156096
Change-Id: I4eb2f858d6498e6fd3aa67ad413553837631f0d6
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/dev-util/+/2538137
Tested-by: Sanika Kulkarni <sanikak@chromium.org>
Reviewed-by: Allen Li <ayatane@chromium.org>
Reviewed-by: Congbin Guo <guocb@chromium.org>
Commit-Queue: Sanika Kulkarni <sanikak@chromium.org>
Auto-Submit: Sanika Kulkarni <sanikak@chromium.org>
diff --git a/gs_cache/gs_archive_server.py b/gs_cache/gs_archive_server.py
index d0326ae..7f207b5 100644
--- a/gs_cache/gs_archive_server.py
+++ b/gs_cache/gs_archive_server.py
@@ -343,27 +343,57 @@
Extracted file content (Binary data).
"""
rsp = self._caching_server.download(archive, headers=headers)
- cmd = ['tar', '-O', '-x', target_file]
-
- with tempfile.SpooledTemporaryFile(max_size=_SPOOL_FILE_SIZE_BYTES) as df:
- proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=df)
+ # --to-command pipes the extracted file contents to the provided command.
+ # See https://www.gnu.org/software/tar/manual/html_node/
+ # Writing-to-an-External-Program.html
+ cmd = ['tar', '-x', target_file, '--to-command',
+ "sh -c 'echo $TAR_SIZE; cat'"]
+ ef = tempfile.TemporaryFile()
+ try:
+ proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=ef)
for chunk in rsp.iter_content(constants.READ_BUFFER_SIZE_BYTES):
proc.stdin.write(chunk)
proc.stdin.close()
- proc.wait()
+ returncode = proc.wait()
+ if returncode:
+ raise cherrypy.HTTPError(
+ httplib.INTERNAL_SERVER_ERROR,
+ 'Tar command to extract the %s from %s finished with exit code: '
+ '%d.', target_file, archive, returncode)
+
+ # Go to the beginning of the file. The first line will contain the file
+ # size. Beyond that will be contents of the extracted file.
+ ef.seek(0)
+ extracted_content_length = ef.readline().strip()
+ _log('Extracted content length is %s bytes.', extracted_content_length)
+ cherrypy.response.headers['Content-Length'] = extracted_content_length
# Update the response's content type to support yielding binary data.
cherrypy.response.headers['Content-Type'] = 'application/octet-stream'
+ except Exception as e:
+ ef.close()
+ raise cherrypy.HTTPError(
+ httplib.INTERNAL_SERVER_ERROR,
+ 'An exception occurred while extracting %s from %s: %s' %
+ (target_file, archive, e))
- # Go to the beginning of the file.
- df.seek(0)
-
- # Read the SpooledFile in chunks and yield the data.
- while True:
- data = df.read(constants.READ_BUFFER_SIZE_BYTES)
- if not data:
- break
- yield data
+ def extracted_content():
+ _log('Begin streaming extracted contents of "%s".', target_file)
+ try:
+ # Read the TemporaryFile in chunks and yield the data.
+ while True:
+ data = ef.read(constants.READ_BUFFER_SIZE_BYTES)
+ if not data:
+ break
+ yield data
+ _log('Streaming of "%s" done.', target_file)
+ except Exception as e:
+ raise cherrypy.HTTPError(
+ httplib.INTERNAL_SERVER_ERROR,
+ 'An exception occurred while reading extracted data: %s' % e)
+ finally:
+ ef.close()
+ return extracted_content()
@cherrypy.expose
@cherrypy.config(**{'response.stream': True})
@@ -397,37 +427,48 @@
'.xz': ['xz', '-d', '-c'],
'.bz2': ['bzip2', '-d', '-c'],
}
- decompressed_file = tempfile.SpooledTemporaryFile(
- max_size=_SPOOL_FILE_SIZE_BYTES)
- proc = subprocess.Popen(commands[extname], stdin=subprocess.PIPE,
- stdout=decompressed_file)
- _log('Decompress process id: %s.', proc.pid)
- for chunk in rsp.iter_content(constants.READ_BUFFER_SIZE_BYTES):
- proc.stdin.write(chunk)
- proc.stdin.close()
- _log('Decompression done.')
- proc.wait()
+ decompressed_file = tempfile.TemporaryFile()
+ try:
+ proc = subprocess.Popen(commands[extname], stdin=subprocess.PIPE,
+ stdout=decompressed_file)
+ _log('Decompress process id: %s.', proc.pid)
+ for chunk in rsp.iter_content(constants.READ_BUFFER_SIZE_BYTES):
+ proc.stdin.write(chunk)
+ proc.stdin.close()
+ _log('Decompression done.')
+ proc.wait()
- # The header of Content-Length is necessary for supporting range request.
- # So we have to decompress the file locally to get the size. This may cause
- # connection timeout issue if the decompression take too long time (e.g. 90
- # seconds). As a reference, it takes about 10 seconds to decompress a 400MB
- # tgz file.
- decompressed_file.seek(0, os.SEEK_END)
- content_length = decompressed_file.tell()
- _log('Decompressed content length is %d bytes.', content_length)
- cherrypy.response.headers['Content-Length'] = str(content_length)
- decompressed_file.seek(0)
+ # The header of Content-Length is necessary for supporting range request.
+ # So we have to decompress the file locally to get the size. This may
+ # cause connection timeout issue if the decompression take too long time
+ # (e.g. 90 seconds). As a reference, it takes about 10 seconds to
+ # decompress a 400MB tgz file.
+ decompressed_file.seek(0, os.SEEK_END)
+ content_length = decompressed_file.tell()
+ _log('Decompressed content length is %d bytes.', content_length)
+ cherrypy.response.headers['Content-Length'] = str(content_length)
+ decompressed_file.seek(0)
+ except Exception as e:
+ decompressed_file.close()
+ raise cherrypy.HTTPError(
+ httplib.INTERNAL_SERVER_ERROR,
+ 'An exception occurred while decompressing %s: %s' % (zarchive, e))
def decompressed_content():
- _log('Streaming decompressed content of "%s" begin.', zarchive)
- while True:
- data = decompressed_file.read(constants.READ_BUFFER_SIZE_BYTES)
- if not data:
- break
- yield data
- decompressed_file.close()
- _log('Streaming of "%s" done.', zarchive)
+ _log('Begin streaming decompressed content of "%s".', zarchive)
+ try:
+ while True:
+ data = decompressed_file.read(constants.READ_BUFFER_SIZE_BYTES)
+ if not data:
+ break
+ yield data
+ _log('Streaming of "%s" done.', zarchive)
+ except Exception as e:
+ raise cherrypy.HTTPError(
+ httplib.INTERNAL_SERVER_ERROR,
+ 'An exception occurred while reading decompressed data: %s' % e)
+ finally:
+ decompressed_file.close()
return decompressed_content()