|  | # Copyright 2018 The Chromium OS Authors. All rights reserved. | 
|  | # Use of this source code is governed by a BSD-style license that can be | 
|  | # found in the LICENSE file. | 
|  |  | 
|  | """Utils for manipulating tar format archives. | 
|  |  | 
|  | We use tar command to manipulate tar file other than using Python tarfile module | 
|  | because that module is very slow in the case of large file. | 
|  | """ | 
|  |  | 
|  | from __future__ import absolute_import | 
|  | from __future__ import division | 
|  | from __future__ import print_function | 
|  |  | 
|  | import collections | 
|  | import re | 
|  |  | 
|  | from chromite.lib import cros_logging as logging | 
|  |  | 
|  | _logger = logging.getLogger(__name__) | 
|  |  | 
|  |  | 
|  | def _round_up_to_512(number): | 
|  | """Up round the given |number| to smallest multiple of 512. | 
|  |  | 
|  | Examples: | 
|  | >>> for n in (0, 1, 512, 1025): | 
|  | ...   _round_up_to_512(n) | 
|  | 0 | 
|  | 512 | 
|  | 512 | 
|  | 1536 | 
|  |  | 
|  | Args: | 
|  | number: Zero or positive integer. | 
|  |  | 
|  | Returns: | 
|  | The smallest multiple of 512. | 
|  | """ | 
|  | return (number + 511) & -512 | 
|  |  | 
|  |  | 
|  | def _get_command_result_from_tar_tvR(an_output_line): | 
|  | """Get an object of _TarListCommandResult from one line of `tar tvR` output. | 
|  |  | 
|  | Args: | 
|  | an_output_line: One line of `tar tvR` output. Trailing '\n' is acceptable. | 
|  | The last line of `tar tvR` is acceptable. | 
|  |  | 
|  | Returns: | 
|  | An object of _TarListCommandResult. | 
|  | """ | 
|  | separators = re.compile('[ \t:]+') | 
|  | fields_num = len(_TarListCommandResult._fields) | 
|  | fields = re.split(separators, an_output_line.rstrip('\n'), | 
|  | maxsplit=fields_num - 1) | 
|  | try: | 
|  | return _TarListCommandResult._make(fields) | 
|  | except TypeError: | 
|  | # The last line of `tar tvR` hasn't enough fields. Fill with fake data. | 
|  | _logger.debug('This should be the last line of `tar tvR`: %s', | 
|  | an_output_line) | 
|  | fields.extend(_TarListCommandResult._fields[len(fields):]) | 
|  | return _TarListCommandResult._make(fields) | 
|  |  | 
|  |  | 
|  | def _block_to_bytes(block_num): | 
|  | """Get offset of the block |block_num| in bytes, i.e. times 512""" | 
|  | return block_num << 9  # * 512 | 
|  |  | 
|  |  | 
|  | # The tuple of tar member information to be returned to caller. | 
|  | # Fields: | 
|  | #   filename: The file name of the tar member. | 
|  | #   record_start: The zero-based start offset of the file record, in bytes. | 
|  | #   record_size: The size of the file record, in bytes. | 
|  | #   content_start: The zero-based start offset of the file content, in bytes. | 
|  | #   size: The size of the file content, in bytes. | 
|  | TarMemberInfo = collections.namedtuple( | 
|  | 'TarMemberInfo', ('filename', 'record_start', 'record_size', | 
|  | 'content_start', 'size')) | 
|  |  | 
|  |  | 
|  | class _TarListCommandResult(collections.namedtuple( | 
|  | '_TarListCommandResult', ('block', 'block_num', 'mode', 'ownership', | 
|  | 'size_str', 'date', 'hour', 'min', 'filename'))): | 
|  | """Information of each member in a Tar archive. | 
|  |  | 
|  | This class using the output of command `tar tvR` to compute more information | 
|  | we need, e.g. file content start offset, etc. | 
|  |  | 
|  | The output of `tar tvR` is like: | 
|  | block 0: -rw-r--r-- user/group <size> <date> <time> <file name> | 
|  | ... | 
|  | block 7: ** Block of NULs ** | 
|  | """ | 
|  |  | 
|  | @property | 
|  | def record_start(self): | 
|  | """Start offset of the file record, in bytes.""" | 
|  | return _block_to_bytes(int(self.block_num)) | 
|  |  | 
|  | @property | 
|  | def size(self): | 
|  | return int(self.size_str) | 
|  |  | 
|  |  | 
|  | def _get_prev_content_start(cur_record_start, prev_file): | 
|  | """Deduct prev file content information from current file record information. | 
|  |  | 
|  | In tar format, each file record has a header and followed by file content. | 
|  | Both header and file content are rounded up to 512 Bytes. The header length is | 
|  | variable, but we can get the current file content starting offset by | 
|  | subtracting up rounded file size from next file header starting offset, i.e. | 
|  |  | 
|  | current_offset = block(next_file) * 512 - round_up_to_512(current_size) | 
|  |  | 
|  | |********|************************.......|********|**** | 
|  | | header |         content               | header | | 
|  | |        |<----- prev_size ----->| | 
|  | |        |<- prev_size round up to 512 ->| | 
|  | ^prev_content_start             ^cur_record_start | 
|  |  | 
|  | Args: | 
|  | cur_record_start: The zero-based start position of current file record, in | 
|  | bytes. | 
|  | prev_file: An instance of _TarListCommandResult which has size of the | 
|  | previous file. | 
|  |  | 
|  | Returns: | 
|  | The zero-based start position of previous file content, in bytes. | 
|  | """ | 
|  | return cur_record_start - _round_up_to_512(prev_file.size) | 
|  |  | 
|  |  | 
|  | def list_tar_members(tar_tvR_output): | 
|  | """List the members of a tar with information. | 
|  |  | 
|  | Yield each member of the tar archive with information of record start/size, | 
|  | content start/size, etc. | 
|  |  | 
|  | Args: | 
|  | tar_tvR_output: The output of command 'tar tvR'. Option 'R' print out the | 
|  | starting block number of the file record. | 
|  |  | 
|  | Yields: | 
|  | A tuple of data described above in the same order. | 
|  | """ | 
|  | prev_file = _get_command_result_from_tar_tvR(tar_tvR_output.readline()) | 
|  |  | 
|  | for line in tar_tvR_output: | 
|  | cur_file = _get_command_result_from_tar_tvR(line) | 
|  |  | 
|  | prev_content_start = _get_prev_content_start(cur_file.record_start, | 
|  | prev_file) | 
|  | prev_record_size = cur_file.record_start - prev_file.record_start | 
|  |  | 
|  | yield TarMemberInfo(prev_file.filename, | 
|  | prev_file.record_start, prev_record_size, | 
|  | prev_content_start, prev_file.size) | 
|  |  | 
|  | prev_file = cur_file |