gconv_strip: Remove script and call it from chromite.
The script was moved to chromite in CL:212261.
BUG=chromium:403050
TEST=`./build_image` still works.
CQ-DEPEND=CL:212261
Change-Id: I67cc860f429a64173bc52d5f25895a66143cffcf
Reviewed-on: https://chromium-review.googlesource.com/212252
Tested-by: Alex Deymo <deymo@chromium.org>
Reviewed-by: Alex Deymo <deymo@chromium.org>
Commit-Queue: Alex Deymo <deymo@chromium.org>
diff --git a/build_library/base_image_util.sh b/build_library/base_image_util.sh
index 47903b9..19d2144 100755
--- a/build_library/base_image_util.sh
+++ b/build_library/base_image_util.sh
@@ -131,8 +131,7 @@
# and we don't known which ones will be used until all the applications are
# installed. This script looks for the charset names on all the binaries
# installed on the the ${root_fs_dir} and removes the unreferenced ones.
- sudo python "${SRC_ROOT}/scripts/build_library/gconv_strip.py" \
- "${root_fs_dir}"
+ sudo "${GCLIENT_ROOT}/chromite/bin/gconv_strip" "${root_fs_dir}"
# Run ldconfig to create /etc/ld.so.cache.
run_ldconfig "${root_fs_dir}"
diff --git a/build_library/gconv_strip.py b/build_library/gconv_strip.py
deleted file mode 100755
index 07d9156..0000000
--- a/build_library/gconv_strip.py
+++ /dev/null
@@ -1,306 +0,0 @@
-#!/usr/bin/python
-# Copyright 2014 The Chromium OS Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""Script to remove unused gconv charset modules from a build."""
-
-import logging.handlers
-
-import ahocorasick
-import argparse
-import glob
-import operator
-import os
-import stat
-import subprocess
-import sys
-
-
-# Path pattern to search for the gconv-modules file.
-GCONV_MODULES_PATH = 'usr/*/gconv/gconv-modules'
-
-# Sticky modules. These charsets modules are always included even if they
-# aren't used. You can specify any charset name as supported by 'iconv_open',
-# for example, 'LATIN1' or 'ISO-8859-1'.
-STICKY_MODULES = ('UTF-16', 'UTF-32', 'UNICODE')
-
-# List of function names (symbols) known to use a charset as a parameter.
-GCONV_SYMBOLS = (
- # glibc
- 'iconv_open',
- 'iconv',
- # glib
- 'g_convert',
- 'g_convert_with_fallback',
- 'g_iconv',
- 'g_locale_to_utf8',
- 'g_get_charset',
- )
-
-class GconvModules(object):
- """Class to manipulate the gconv/gconv-modules file and referenced modules.
-
- This class parses the contents of the gconv-modules file installed by glibc
- which provides the definition of the charsets supported by iconv_open(3). It
- allows to load the current gconv-modules file and rewrite it to include only
- a subset of the supported modules, removing the other modules.
-
- Each charset is involved on some transformation between that charset and an
- internal representation. This transformation is defined on a .so file loaded
- dynamically with dlopen(3) when the charset defined in this file is requested
- to iconv_open(3).
-
- See the comments on gconv-modules file for syntax details.
- """
-
- def __init__(self, gconv_modules_fn):
- """Initialize the class.
-
- Args:
- gconv_modules_fn: Path to gconv/gconv-modules file.
- """
- self._fn = gconv_modules_fn
-
- # An alias map of charsets. The key (fromcharset) is the alias name and
- # the value (tocharset) is the real charset name. We also support a value
- # that is an alias for another charset.
- self._alias = {}
-
- # The modules dict goes from charset to module names (the filenames without
- # the .so extension). Since several transformations involving the same
- # charset could be defined in different files, the values of this dict are
- # a set of module names.
- self._modules = {}
-
- def Load(self):
- """Load the charsets from gconv-modules."""
- for l in open(self._fn):
- l = l.rstrip('\n')
- if not l or l[0] == '#': # Comment
- continue
-
- lst = l.split()
- if not lst:
- continue
- elif lst[0] == 'module':
- _, fromset, toset, filename = lst[:4]
- for charset in (fromset, toset):
- charset = charset.rstrip('/')
- mods = self._modules.get(charset, set())
- mods.add(filename)
- self._modules[charset] = mods
- elif lst[0] == 'alias':
- _, fromset, toset = lst
- fromset = fromset.rstrip('/')
- toset = toset.rstrip('/')
- # Warn if the same charset is defined as two different aliases
- if self._alias.get(fromset, toset) != toset:
- logging.error('charset "%s" already defined as "%s".',
- fromset, self._alias[fromset])
- self._alias[fromset] = toset
- else:
- logging.error('Unknown line: %s', l)
-
- logging.debug('Found %d modules and %d alias on %s',
- len(self._modules), len(self._alias), self._fn)
- charsets = sorted(self._alias.keys() + self._modules.keys())
- # Remove the 'INTERNAL' charset from the list, since it is not a charset
- # but an internal representation used to convert to and from other charsets.
- if 'INTERNAL' in charsets:
- charsets.remove('INTERNAL')
- return charsets
-
- def Rewrite(self, used_charsets, dry_run=False):
- """Rewrite gconv-modules file with only the used charsets.
-
- Args:
- used_charsets: A list of used charsets. This should be a subset of the
- list returned by Load().
- dry_run: Whether this function should not change any file.
- """
-
- # Compute the used modules.
- used_modules = set()
- for charset in used_charsets:
- while charset in self._alias:
- charset = self._alias[charset]
- used_modules.update(self._modules[charset])
- unused_modules = reduce(set.union, self._modules.values()) - used_modules
-
- logging.debug('Used modules: %s', ', '.join(sorted(used_modules)))
-
- modules_dir = os.path.dirname(self._fn)
- unused_size = 0
- for module in sorted(unused_modules):
- module_path = os.path.join(modules_dir, '%s.so' % module)
- unused_size += os.lstat(module_path).st_size
- logging.debug('rm %s', module_path)
- if not dry_run:
- os.unlink(module_path)
- logging.info('Done. Using %d gconv modules. Removed %d unused modules'
- ' (%.1f KiB)',
- len(used_modules), len(unused_modules), unused_size / 1024.)
-
- # Recompute the gconv-modules file with only the included gconv modules.
- result = []
- for ln in open(self._fn):
- l = ln.rstrip('\n')
- lst = l.split()
-
- if not l or l[0] == '#' or not lst:
- result.append(ln) # Keep comments and copyright headers.
- elif lst[0] == 'module':
- _, _, _, filename = lst[:4]
- if filename in used_modules:
- result.append(ln) # Used module
- elif lst[0] == 'alias':
- _, charset, _ = lst
- charset = charset.rstrip('/')
- while charset in self._alias:
- charset = self._alias[charset]
- if used_modules.intersection(self._modules[charset]):
- result.append(ln) # Alias to an used module
- else:
- logging.error('Unknown line: %s', l)
-
- if not dry_run:
- with open(self._fn, 'w') as f:
- f.write(''.join(result))
-
-
-def MultipleStringMatch(patterns, corpus):
- """Search a list of strings in a corpus string.
-
- Args:
- patterns: A list of strings.
- corpus: The text where to search for the strings.
-
- Result:
- A list of Booleans stating whether each pattern string was found on the
- corpus or not.
- """
- tree = ahocorasick.KeywordTree()
- for word in patterns:
- tree.add(word)
- tree.make()
-
- result = [False] * len(patterns)
- for i, j in tree.findall(corpus):
- match = corpus[i:j]
- result[patterns.index(match)] = True
-
- return result
-
-
-def GconvStrip(args):
- """Process gconv-modules and remove unused modules.
-
- Args:
- args: The command-line args passed to the script.
-
- Returns:
- The exit code number indicating whether the process succeeded.
- """
- root_st = os.lstat(args.root)
- if not stat.S_ISDIR(root_st.st_mode):
- raise Exception("root (%s) must be a directory.")
-
- # Detect the possible locations of the gconv-modules file.
- gconv_modules_files = glob.glob(os.path.join(args.root, GCONV_MODULES_PATH))
-
- if not gconv_modules_files:
- logging.error('gconv-modules file not found.')
- return 1
-
- # Only one gconv-modules files should be present, either on /usr/lib or
- # /usr/lib64, but not both.
- if len(gconv_modules_files) > 1:
- logging.error('Found several gconv-modules files.')
- return 1
-
- gconv_modules_fn = gconv_modules_files[0]
- logging.info('Searching for unused gconv files defined in %s',
- gconv_modules_fn)
-
- gmods = GconvModules(gconv_modules_fn)
- charsets = gmods.Load()
-
- # Use scanelf to search for all the binary files on the rootfs that require
- # or define the symbol iconv_open. We also include the binaries that define
- # it since there could be internal calls to it from other functions.
- files = set()
- for symbol in GCONV_SYMBOLS:
- output = subprocess.check_output([
- 'scanelf', '--mount', '--quiet', '--recursive', '--format', '#s%F',
- '--symbol', symbol, args.root])
- symbol_files = output.splitlines()
- logging.debug('Symbol %s found on %d files.', symbol, len(symbol_files))
- files.update(symbol_files)
-
- # The charsets are represented as null-terminated strings on the binary files,
- # so we append the '\0' to each string. This prevents some false positives
- # when the name of the charset is a substring of some other string. It doesn't
- # prevent false positives when the charset name is the suffix of another
- # string, for example a binary with the string "DON'T DO IT\0" will match the
- # 'IT' charset. Empirical test on ChromeOS images suggests that only 4
- # charsets could fall in category.
- strings = [s + '\0' for s in charsets]
- logging.info('Will search for %d strings in %d files',
- len(strings), len(files))
-
- # Charsets listed in STICKY_MOUDLES are initialized as used. Note that those
- # strings should be listed in the gconv-modules file.
- unknown_sticky_modules = set(STICKY_MODULES) - set(charsets)
- if unknown_sticky_modules:
- logging.warning('The following charsets were explicitly requested in '
- 'STICKY_MODULES even though they don\'t exist: %s',
- ', '.join(unknown_sticky_modules))
- global_used = [charset in STICKY_MODULES for charset in charsets]
-
- for fn in files:
- with open(fn, 'rb') as f:
- used_fn = MultipleStringMatch(strings, f.read())
-
- global_used = map(operator.or_, global_used, used_fn)
- # Check the verbose flag to avoid running an useless loop.
- if args.verbose and any(used_fn):
- logging.debug('File %s:', fn)
- for i in range(len(used_fn)):
- if used_fn[i]:
- logging.debug(' - %s:', strings[i])
-
- used_charsets = [charsets[i] for i in range(len(charsets)) if global_used[i]]
- gmods.Rewrite(used_charsets, args.dry_run)
- return 0
-
-
-def main():
- """Main function to start the script."""
- parser = argparse.ArgumentParser()
-
- parser.add_argument(
- '-V', '--verbose', dest='verbose', action='store_true', default=False,
- help='Verbose',)
- parser.add_argument(
- '--dry-run', dest='dry_run', action='store_true', default=False,
- help='process but don\'t modify any file.',)
- parser.add_argument(
- 'root', help='path to the directory where the rootfs is mounted.',)
-
- logging_format = '%(asctime)s - %(filename)s - %(levelname)-8s: %(message)s'
- date_format = '%Y/%m/%d %H:%M:%S'
- logging.basicConfig(level=logging.INFO, format=logging_format,
- datefmt=date_format)
-
- args = parser.parse_args()
- if args.verbose:
- logging.getLogger().setLevel(logging.DEBUG)
-
- logging.debug('Options are %s ', args)
-
- return GconvStrip(args)
-
-
-if __name__ == '__main__':
- sys.exit(main())