blob: 8883003821ed34f96af29d325b7e6846a160525c [file] [log] [blame]
# -*- coding: utf-8 -*-
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""This module provides utils to handle response of "Range Request"."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import constants
class FormatError(Exception):
"""Exception raised when we parse wrong format of response."""
class NoFileFoundError(Exception):
"""Exception raised when we cannot get a file match the range."""
class FileIterator(object):
"""The iterator of files in a response of multipart range request.
An example response is like:
HTTP/1.1 206 Partial Content
Content-Type: multipart/byteranges; boundary=magic_string
Content-Length: 282
--magic_string
Content-Type: text/html
Content-Range: bytes 0-50/1270
<data>
--magic_string
Content-Type: text/html
Content-Range: bytes 100-150/1270
<data>
--magic_string--
In our application, each part is the content of a file. This class iterates
the files.
"""
def __init__(self, response, file_info_list):
"""Constructor.
Args:
response: An instance of requests.response.
file_info_list: A list of tarfile_utils.TarMemberInfo. We use it to look
up file name by content start offset and size.
"""
self._response_iter = response.iter_content(
constants.READ_BUFFER_SIZE_BYTES)
self._chunk = None
self._file_name_map = {(f.content_start, int(f.size)): f.filename
for f in file_info_list}
def __iter__(self):
self._chunk = next(self._response_iter)
return self._iter_files()
def _read_next_chunk(self):
"""Helper function to read next chunk of data and return current chunk."""
buffered = self._chunk
try:
self._chunk = next(self._response_iter)
except StopIteration:
self._chunk = None
return buffered
def _read_line(self):
"""Read one CRLF ended line from the response.
Returns:
The line read. Return None if nothing to read.
"""
if self._chunk is None:
return None
buffered = ''
while True:
buffered += self._chunk
parts = buffered.split('\r\n', 1)
if len(parts) == 2:
line, self._chunk = parts
return line
else: # No '\r\n' in current chunk. Read one more.
self._read_next_chunk()
if self._chunk is None:
return buffered
def _read_bytes(self, max_bytes):
"""Read at most |max_bytes| bytes from the response.
Args:
max_bytes: An integer of maximum bytes of bytes to read.
Returns:
The bytes read. Return None if nothing to read.
"""
if self._chunk is None:
return None
buffered = ''
bytes_remaining = max_bytes
while True:
bytes_remaining -= len(self._chunk)
if bytes_remaining < 0:
buffered += self._chunk[:bytes_remaining]
self._chunk = self._chunk[bytes_remaining:]
return buffered
buffered += self._read_next_chunk()
if self._chunk is None:
return buffered
def _read_empty_line(self):
"""Read one line and assert it is empty."""
line = self._read_line()
if line is None:
raise FormatError('Expect an empty line, but got EOF.')
if line:
raise FormatError('Expect an empty line, but got "%s".' % line)
def _iter_files(self):
"""Iterate the files in the response.
Yields:
A pair of (name, content) of the file.
Raises:
FormatError: Raised when response content interrupted.
NoFileFoundError: Raised when we cannot get a file matches the range.
"""
self._read_empty_line() # The first line is empty.
while True:
self._read_line() # The second line is the boundary.
self._read_line() # The line sub content type.
sub_range_header = self._read_line() # The line of sub content range.
if sub_range_header is None:
break
self._read_empty_line() # Another empty line.
# The header format is: "Content-Range: bytes START-END/TOTAL"
try:
start, end = sub_range_header.split(' ')[2].split('/')[0].split('-')
size = int(end) - int(start) + 1
except (IndexError, ValueError):
raise FormatError('Wrong format of sub content range header: %s' %
sub_range_header)
try:
filename = self._file_name_map[(start, size)]
except KeyError:
raise NoFileFoundError('Cannot find a file matches the range %s' %
sub_range_header)
content = self._read_bytes(size)
self._read_empty_line() # Every content has a trailing '\r\n'.
bytes_read = 0 if content is None else len(content)
if bytes_read != size:
raise FormatError(
'%s: Error in reading content (read %d B, expect %d B)' %
(filename, bytes_read, size)
)
yield filename, content