gs_cache/range_response.py - mirrors/cros/chromiumos/platform/dev-util - Git at Google

 # -*- coding: utf-8 -*-
 # Copyright 2018 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """This module provides utils to handle response of "Range Request"."""

 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import collections
 import itertools
 import json
 import re

 import constants

 _RANGE_HEADER_SEPARATORS = re.compile('[-/ ]')
 _ONE_LINE = object()  # Special object to indicate data reader to read one line.

 _ContentRangeHeader = collections.namedtuple('_ContentRangeHeader',
                                              ('bytes', 'start', 'end', 'total'))


 class FormatError(Exception):
   """Exception raised when we parse wrong format of response."""


 class NoFileFoundError(Exception):
   """Exception raised when we cannot get a file match the range."""


 class ResponseQueueError(Exception):
   """Exception raised when trying to queue responses not allowed."""


 def _get_file_by_range_header(range_header_str, file_name_map):
   """Get file name and size by the Content-Range header.

   The format of Content-Range header is like:
     Content-Range: bytes <start>-<end>/<total>
   We get the <start> and <end> from it and retrieve the file name from
   |file_name_map|.

   Args:
     range_header_str: A string of range header.
     file_name_map: A dict of {(<start:str>, <size:int>): filename, ...}.

   Returns:
     A tuple of (filename, size).

   Raises:
     FormatError: Raised when response content interrupted.
     NoFileFoundError: Raised when we cannot get a file matches the range.
   """
   # Split the part of 'Content-Range:' first if needed.
   if range_header_str.lower().startswith('content-range:'):
     range_header_str = range_header_str.split(': ', 1)[1]

   try:
     range_header = _ContentRangeHeader._make(
         _RANGE_HEADER_SEPARATORS.split(range_header_str)
     )
     size = int(range_header.end) - int(range_header.start) + 1
   except (IndexError, ValueError):
     raise FormatError('Wrong format of content range header: %s' %
                       range_header_str)

   try:
     filename = file_name_map[(range_header.start, size)]
   except KeyError:
     raise NoFileFoundError('Cannot find a file matches the range %s' %
                            range_header_str)

   return filename, size


 class JsonStreamer(object):
   """A class to stream the responses for range requests.

   The class accepts responses and format the file content in all of them as a
   JSON stream. The format:
     '{"<filename>": "<content>", "<filename>": "<content>", ...}'
   """

   def __init__(self):
     self._files_iter_list = []
     self._can_add_more_response = True

   def queue_response(self, response, file_info_list):
     """Add a reponse to the queue to be streamed as JSON.

     We can add either:
       1. one and only one response for single-part range requests, or
       2. a series of responses for multi-part range requests.

     Args:
       response: An instance of requests.Response, which may be the response of a
         single range request, or a multi-part range request.
       file_info_list: A list of tarfile_utils.TarMemberInfo. We use it to look
         up file name by content start offset and size.

     Raises:
       FormatError: Raised when response to be queued isn't for a range request.
       ResponseQueueError: Raised when either queuing more than one response for
         single-part range request, or mixed responses for single-part and
         multi-part range request.
     """
     if not self._can_add_more_response:
       raise ResponseQueueError(
           'No more reponses can be added when there was a response for '
           'single-part range request in the queue!')

     file_name_map = {(f.content_start, int(f.size)): f.filename
                      for f in file_info_list}

     # Check if the response is for single range, or multi-part range. For a
     # single range request, the response must have header 'Content-Range'. For a
     # multi-part ranges request, the Content-Type header must be like
     # 'multipart/byteranges; ......'.
     content_range = response.headers.get('Content-Range', None)
     content_type = response.headers.get('Content-Type', '')

     if content_range:
       if self._files_iter_list:
         raise ResponseQueueError(
             'Cannot queue more than one responses for single-part range '
             'request, or mix responses for single-part and multi-part.')
       filename, _ = _get_file_by_range_header(content_range, file_name_map)
       self._files_iter_list = [iter([(filename, response.content)])]
       self._can_add_more_response = False

     elif content_type.startswith('multipart/byteranges;'):
       self._files_iter_list.append(
           _file_iterator(response, file_name_map))

     else:
       raise FormatError('The response is not for a range request.')

   def stream(self):
     """Yield the series of responses content as a JSON stream.

     Yields:
       A JSON stream in format described above.
     """
     files_iter = itertools.chain(*self._files_iter_list)

     json_encoder = json.JSONEncoder()
     filename, content = next(files_iter)
     yield '{%s: %s' % (json_encoder.encode(filename),
                        json_encoder.encode(content))
     for filename, content in files_iter:
       yield ', %s: %s' % (json_encoder.encode(filename),
                           json_encoder.encode(content))
     yield '}'


 def _data_reader(data_iter):
   """A coroutine to read data from |data_iter|.

   It accepts two type of parameter:
     1. _ONE_LINE: Read one CRLF ended line if possible.
     2. An integer N: Read at most N bytes.

   Args:
     data_iter: An iterator of data source.

   Yields:
     The data read.
   """
   buffered = next(data_iter)

   # Get what to be read in runtime by passing value into the generator. See
   # https://docs.python.org/2.5/whatsnew/pep-342.html for syntax details.
   to_be_read = yield

   while True:
     if to_be_read is _ONE_LINE:
       parts = buffered.split('\r\n', 1)
       if len(parts) == 2:
         line, buffered = parts
         to_be_read = (yield line)
         continue

     else:  # Read at most |to_be_read| bytes of data.
       bytes_remaining = to_be_read - len(buffered)
       if bytes_remaining < 0:
         read_bytes = buffered[:bytes_remaining]
         buffered = buffered[bytes_remaining:]
         to_be_read = (yield read_bytes)
         continue

     try:
       buffered += next(data_iter)
     except StopIteration:
       break

   if buffered:
     yield buffered


 def _read_line(reader):
   """Read one CRLF ended line from the response.

   Returns:
     The line read. Return None if nothing to read.
   """
   return reader.send(_ONE_LINE)


 def _read_empty_line(reader):
   """Read one line and assert it is empty."""
   try:
     line = _read_line(reader)
   except StopIteration:
     raise FormatError('Expect an empty line, but got EOF.')
   if line:
     raise FormatError('Expect an empty line, but got "%s".' % line)


 def _read_bytes(reader, max_bytes):
   """Read at most |max_bytes| bytes from the reader.

   Args:
     reader:
     max_bytes: An integer of maximum bytes of bytes to read.

   Returns:
     The bytes read. Return None if nothing to read.
   """
   return reader.send(max_bytes)


 def _file_iterator(response, file_name_map):
   """The iterator of files in a response of multi-part range request.

   An example response is like:

     HTTP/1.1 206 Partial Content
     Content-Type: multipart/byteranges; boundary=magic_string
     Content-Length: 282

     --magic_string
     Content-Type: text/html
     Content-Range: bytes 0-50/1270

     <data>
     --magic_string
     Content-Type: text/html
     Content-Range: bytes 100-150/1270

     <data>
     --magic_string--

   In our application, each part is the content of a file. This class iterates
   the files.

   Args:
     response: An instance of requests.response.
     file_name_map: A dict of {(<start:str>, <size:int>): filename, ...}.

   Yields:
     A pair of (name, content) of the file.

   Raises:
     FormatError: Raised when response content interrupted.
   """
   reader = _data_reader(
       response.iter_content(constants.READ_BUFFER_SIZE_BYTES))
   reader.next()  # initialize the coroutine

   _read_empty_line(reader)  # The first line is empty.
   while True:
     _read_line(reader)  # The second line is the boundary.
     _read_line(reader)  # The line sub content type.
     sub_range_header = _read_line(reader)  # The line of sub content range.
     if sub_range_header is None:
       break
     _read_empty_line(reader)  # Another empty line.

     filename, size = _get_file_by_range_header(sub_range_header,
                                                file_name_map)
     content = _read_bytes(reader, size)

     _read_empty_line(reader)  # Every content has a trailing '\r\n'.

     bytes_read = 0 if content is None else len(content)
     if bytes_read != size:
       raise FormatError(
           '%s: Error in reading content (read %d B, expect %d B)' %
           (filename, bytes_read, size)
       )

     yield filename, content
	# -- coding: utf-8 --
	# Copyright 2018 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""This module provides utils to handle response of "Range Request"."""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import collections
	import itertools
	import json
	import re

	import constants

	_RANGE_HEADER_SEPARATORS = re.compile('[-/ ]')
	_ONE_LINE = object() # Special object to indicate data reader to read one line.

	_ContentRangeHeader = collections.namedtuple('_ContentRangeHeader',
	('bytes', 'start', 'end', 'total'))


	class FormatError(Exception):
	"""Exception raised when we parse wrong format of response."""


	class NoFileFoundError(Exception):
	"""Exception raised when we cannot get a file match the range."""


	class ResponseQueueError(Exception):
	"""Exception raised when trying to queue responses not allowed."""


	def _get_file_by_range_header(range_header_str, file_name_map):
	"""Get file name and size by the Content-Range header.

	The format of Content-Range header is like:
	Content-Range: bytes <start>-<end>/<total>
	We get the <start> and <end> from it and retrieve the file name from
	\|file_name_map\|.

	Args:
	range_header_str: A string of range header.
	file_name_map: A dict of {(<start:str>, <size:int>): filename, ...}.

	Returns:
	A tuple of (filename, size).

	Raises:
	FormatError: Raised when response content interrupted.
	NoFileFoundError: Raised when we cannot get a file matches the range.
	"""
	# Split the part of 'Content-Range:' first if needed.
	if range_header_str.lower().startswith('content-range:'):
	range_header_str = range_header_str.split(': ', 1)[1]

	try:
	range_header = _ContentRangeHeader._make(
	_RANGE_HEADER_SEPARATORS.split(range_header_str)
	)
	size = int(range_header.end) - int(range_header.start) + 1
	except (IndexError, ValueError):
	raise FormatError('Wrong format of content range header: %s' %
	range_header_str)

	try:
	filename = file_name_map[(range_header.start, size)]
	except KeyError:
	raise NoFileFoundError('Cannot find a file matches the range %s' %
	range_header_str)

	return filename, size


	class JsonStreamer(object):
	"""A class to stream the responses for range requests.

	The class accepts responses and format the file content in all of them as a
	JSON stream. The format:
	'{"<filename>": "<content>", "<filename>": "<content>", ...}'
	"""

	def __init__(self):
	self._files_iter_list = []
	self._can_add_more_response = True

	def queue_response(self, response, file_info_list):
	"""Add a reponse to the queue to be streamed as JSON.

	We can add either:
	1. one and only one response for single-part range requests, or
	2. a series of responses for multi-part range requests.

	Args:
	response: An instance of requests.Response, which may be the response of a
	single range request, or a multi-part range request.
	file_info_list: A list of tarfile_utils.TarMemberInfo. We use it to look
	up file name by content start offset and size.

	Raises:
	FormatError: Raised when response to be queued isn't for a range request.
	ResponseQueueError: Raised when either queuing more than one response for
	single-part range request, or mixed responses for single-part and
	multi-part range request.
	"""
	if not self._can_add_more_response:
	raise ResponseQueueError(
	'No more reponses can be added when there was a response for '
	'single-part range request in the queue!')

	file_name_map = {(f.content_start, int(f.size)): f.filename
	for f in file_info_list}

	# Check if the response is for single range, or multi-part range. For a
	# single range request, the response must have header 'Content-Range'. For a
	# multi-part ranges request, the Content-Type header must be like
	# 'multipart/byteranges; ......'.
	content_range = response.headers.get('Content-Range', None)
	content_type = response.headers.get('Content-Type', '')

	if content_range:
	if self._files_iter_list:
	raise ResponseQueueError(
	'Cannot queue more than one responses for single-part range '
	'request, or mix responses for single-part and multi-part.')
	filename, _ = _get_file_by_range_header(content_range, file_name_map)
	self._files_iter_list = [iter([(filename, response.content)])]
	self._can_add_more_response = False

	elif content_type.startswith('multipart/byteranges;'):
	self._files_iter_list.append(
	_file_iterator(response, file_name_map))

	else:
	raise FormatError('The response is not for a range request.')

	def stream(self):
	"""Yield the series of responses content as a JSON stream.

	Yields:
	A JSON stream in format described above.
	"""
	files_iter = itertools.chain(*self._files_iter_list)

	json_encoder = json.JSONEncoder()
	filename, content = next(files_iter)
	yield '{%s: %s' % (json_encoder.encode(filename),
	json_encoder.encode(content))
	for filename, content in files_iter:
	yield ', %s: %s' % (json_encoder.encode(filename),
	json_encoder.encode(content))
	yield '}'


	def _data_reader(data_iter):
	"""A coroutine to read data from \|data_iter\|.

	It accepts two type of parameter:
	1. _ONE_LINE: Read one CRLF ended line if possible.
	2. An integer N: Read at most N bytes.

	Args:
	data_iter: An iterator of data source.

	Yields:
	The data read.
	"""
	buffered = next(data_iter)

	# Get what to be read in runtime by passing value into the generator. See
	# https://docs.python.org/2.5/whatsnew/pep-342.html for syntax details.
	to_be_read = yield

	while True:
	if to_be_read is _ONE_LINE:
	parts = buffered.split('\r\n', 1)
	if len(parts) == 2:
	line, buffered = parts
	to_be_read = (yield line)
	continue

	else: # Read at most \|to_be_read\| bytes of data.
	bytes_remaining = to_be_read - len(buffered)
	if bytes_remaining < 0:
	read_bytes = buffered[:bytes_remaining]
	buffered = buffered[bytes_remaining:]
	to_be_read = (yield read_bytes)
	continue

	try:
	buffered += next(data_iter)
	except StopIteration:
	break

	if buffered:
	yield buffered


	def _read_line(reader):
	"""Read one CRLF ended line from the response.

	Returns:
	The line read. Return None if nothing to read.
	"""
	return reader.send(_ONE_LINE)


	def _read_empty_line(reader):
	"""Read one line and assert it is empty."""
	try:
	line = _read_line(reader)
	except StopIteration:
	raise FormatError('Expect an empty line, but got EOF.')
	if line:
	raise FormatError('Expect an empty line, but got "%s".' % line)


	def _read_bytes(reader, max_bytes):
	"""Read at most \|max_bytes\| bytes from the reader.

	Args:
	reader:
	max_bytes: An integer of maximum bytes of bytes to read.

	Returns:
	The bytes read. Return None if nothing to read.
	"""
	return reader.send(max_bytes)


	def _file_iterator(response, file_name_map):
	"""The iterator of files in a response of multi-part range request.

	An example response is like:

	HTTP/1.1 206 Partial Content
	Content-Type: multipart/byteranges; boundary=magic_string
	Content-Length: 282

	--magic_string
	Content-Type: text/html
	Content-Range: bytes 0-50/1270

	<data>
	--magic_string
	Content-Type: text/html
	Content-Range: bytes 100-150/1270

	<data>
	--magic_string--

	In our application, each part is the content of a file. This class iterates
	the files.

	Args:
	response: An instance of requests.response.
	file_name_map: A dict of {(<start:str>, <size:int>): filename, ...}.

	Yields:
	A pair of (name, content) of the file.

	Raises:
	FormatError: Raised when response content interrupted.
	"""
	reader = _data_reader(
	response.iter_content(constants.READ_BUFFER_SIZE_BYTES))
	reader.next() # initialize the coroutine

	_read_empty_line(reader) # The first line is empty.
	while True:
	_read_line(reader) # The second line is the boundary.
	_read_line(reader) # The line sub content type.
	sub_range_header = _read_line(reader) # The line of sub content range.
	if sub_range_header is None:
	break
	_read_empty_line(reader) # Another empty line.

	filename, size = _get_file_by_range_header(sub_range_header,
	file_name_map)
	content = _read_bytes(reader, size)

	_read_empty_line(reader) # Every content has a trailing '\r\n'.

	bytes_read = 0 if content is None else len(content)
	if bytes_read != size:
	raise FormatError(
	'%s: Error in reading content (read %d B, expect %d B)' %
	(filename, bytes_read, size)
	)

	yield filename, content