Add a repohook to check source headers

Check for the Apache 2.0 License and "Google LLC" copyright in the
source headers. For repositories such as cos/repohooks, which was forked
from chromiumos/repohooks, already have ChromiumOS license and copyright
headers in their source files. So this repohook will allow ChromiumOS
license and copyright headers for pre-existing files, but enforces that
added files use Apache 2.0 license and "Google LLC" copyright headers.

This repohook only applies to cos sources that are not
ebuild files. So at the moment, the only repositories that will use this
repohook are cos/manifest and cos/repohooks. We will also need to turn
off the cros_license_check for these repos.

BUG=b/149121748
TEST=./pre-upload_unittest.py and ran `repo upload` for this change and
a change in cos/manifest repo

Change-Id: If556d8d45ed78f7e51b402fabf51dbc46a3ce594
diff --git a/PRESUBMIT.cfg b/PRESUBMIT.cfg
index 6dd237a..15d0057 100644
--- a/PRESUBMIT.cfg
+++ b/PRESUBMIT.cfg
@@ -6,13 +6,12 @@
 pre-upload_unittest = ./pre-upload_unittest.py
 
 [Hook Overrides]
-cros_license_check: true
+cros_license_check: false
 long_line_check: true
 tab_check: true
 stray_whitespace_check: true
 
 [Hook Overrides Options]
-cros_license_check: --exclude_regex=\b(checkpatch\.pl|kernel-doc)$
 long_line_check: --exclude_regex=\b(checkpatch\.pl|kernel-doc)$
 tab_check: --exclude_regex=\b(checkpatch\.pl|kernel-doc)$
 stray_whitespace_check: --exclude_regex=\b(checkpatch\.pl|kernel-doc)$
diff --git a/pre-upload.py b/pre-upload.py
index d71753c..b0c3f30 100755
--- a/pre-upload.py
+++ b/pre-upload.py
@@ -75,7 +75,7 @@
     # Other
     r'.*\.java$', r'.*\.mk$', r'.*\.am$',
     r'.*\.policy$', r'.*\.conf$', r'.*\.go$',
-    r'(^OWNERS|/OWNERS)',
+    r'.*\.xml', r'(^OWNERS|/OWNERS)',
 ]
 
 
@@ -1433,6 +1433,109 @@
   return errors
 
 
+def _check_cos_license(_project, commit, options=()):
+  """Verifies the license/copyright header for COS sources.
+
+  For repositories like cos/repohooks, which was forked from
+  chromiumos/repohooks, source files already have ChromiumOS
+  copyright and license headers. This check will allow ChromiumOS
+  license and copyright headers for files that already exist, but
+  enforces that new files must use the Google LLC copyright header
+  and Apache 2.0 license
+  """
+  COS_LICENSE_HEADER = (
+      r"""^[<!\-#/\*]*
+[#/\*]* ?Copyright (20[0-9]{2}) Google LLC
+[#/\*]* ?
+[#/\*]* ?Licensed under the Apache License, Version 2.0 \(the "License"\);
+[#/\*]* ?you may not use this file except in compliance with the License\.
+[#/\*]* ?You may obtain a copy of the License at
+[#/\*]* ?
+[#/\*]* ?      http://www\.apache\.org/licenses/LICENSE-2\.0
+[#/\*]* ?
+[#/\*]* ?Unless required by applicable law or agreed to in writing, software
+[#/\*]* ?distributed under the License is distributed on an "AS IS" BASIS,
+[#/\*]* ?WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or """
+      r"""implied\.
+[#/\*]* ?See the License for the specific language governing permissions and
+[#/\*]* ?limitations under the License\.
+[\->#/\*]*$
+"""
+  )
+  cos_license_re = re.compile(COS_LICENSE_HEADER, re.MULTILINE)
+
+  CROS_LICENSE_HEADER = (
+      r'.*Copyright(?: \(c\))? (20[0-9]{2})(?:-20[0-9]{2})? The Chromium OS '
+      r'Authors\. All rights reserved\.\n'
+      r'.*Use of this source code is governed by a BSD-style license that can '
+      r'be\n'
+      r'.*found in the LICENSE file\.'
+      r'\n'
+  )
+  cros_license_re = re.compile(CROS_LICENSE_HEADER, re.MULTILINE)
+
+  included, excluded = _parse_common_inclusion_options(options)
+
+  bad_files = []
+  new_bad_files = []
+  bad_year_files = []
+
+  files = _filter_files(
+      _get_affected_files(commit, relative=True),
+      included + COMMON_INCLUDED_PATHS,
+      excluded + COMMON_EXCLUDED_PATHS + LICENSE_EXCLUDED_PATHS)
+  existing_files = set(_get_affected_files(commit, relative=True,
+                                           include_adds=False))
+
+  current_year = str(datetime.datetime.now().year)
+  for f in files:
+    contents = _get_file_content(f, commit)
+    if not contents:
+      # Ignore empty files.
+      continue
+
+    m = cos_license_re.search(contents)
+    if not m:
+      if f not in existing_files:
+        # New files must include Google LLC copyright and Apache 2.0
+        # license
+        new_bad_files.append(f)
+      else:
+        # If this is not a new file, check for Chromium OS license
+        # and copyright headers
+        cros_m = cros_license_re.search(contents)
+        if cros_m:
+          # Allow Chromium OS license and copyright headers for older
+          # files
+          continue
+        else:
+          bad_files.append(f)
+
+    if m and f not in existing_files:
+      year = m.group(1)
+      if year != current_year:
+        bad_year_files.append(f)
+
+  errors = []
+  if new_bad_files:
+    msg = '%s:\n%s\n%s' % (
+        'License must match', cos_license_re.pattern,
+        'Include Google copyright and Apache 2.0 license in new files:')
+    errors.append(HookFailure(msg, new_bad_files))
+  if bad_files:
+    msg = '%s:\n%s\n%s' % (
+        'Unless the file was previously using ChromiumOS license and '
+        'copyright, license must match', cos_license_re.pattern,
+        'Found a bad header in these files:')
+    errors.append(HookFailure(msg, bad_files))
+  if bad_year_files:
+    msg = 'Use current year (%s) in copyright headers in new files:' % (
+        current_year)
+    errors.append(HookFailure(msg, bad_year_files))
+
+  return errors
+
+
 def _check_aosp_license(_project, commit, options=()):
   """Verifies the AOSP license/copyright header.
 
@@ -1967,6 +2070,8 @@
 _PROJECT_SPECIFIC_HOOKS = {
     'chromiumos/third_party/kernel': [_kernel_configcheck],
     'chromiumos/third_party/kernel-next': [_kernel_configcheck],
+    'cos/manifest': [_check_cos_license],
+    'cos/repohooks': [_check_cos_license],
 }
 
 
diff --git a/pre-upload_unittest.py b/pre-upload_unittest.py
index b68967d..489f4ba 100755
--- a/pre-upload_unittest.py
+++ b/pre-upload_unittest.py
@@ -954,6 +954,193 @@
     self.assertIsNone(pre_upload._check_aosp_license('proj', 'sha1'))
 
 
+class CheckCOSLicenseCopyrightHeader(PreUploadTestCase):
+  """Tests for _check_cos_license."""
+
+  def setUp(self):
+    self.file_mock = self.PatchObject(pre_upload, '_get_affected_files')
+    self.content_mock = self.PatchObject(pre_upload, '_get_file_content')
+
+  def testHeaders(self):
+    """Accept old header styles."""
+    HEADERS = (
+        u"""//
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+""",
+        u"""#
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+""",
+        u"""<!--
+ Copyright 2020 Google LLC
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+""",
+    )
+    self.file_mock.return_value = ['file']
+    for header in HEADERS:
+      self.content_mock.return_value = header
+      self.assertFalse(pre_upload._check_cos_license('proj', 'sha1'))
+
+  def testRejectNoLinesAround(self):
+    """Reject headers missing the empty lines before/after the license."""
+    HEADERS = (
+        u"""# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""",
+    )
+    self.file_mock.return_value = ['file']
+    for header in HEADERS:
+      self.content_mock.return_value = header
+      self.assertTrue(pre_upload._check_cos_license('proj', 'sha1'))
+
+  def testNewFileYear(self):
+    """Added files should have the current year in license header."""
+    year = datetime.datetime.now().year
+    HEADERS = (
+        u"""//
+// Copyright 2015 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+""",
+        u"""//
+// Copyright {} Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+""".format(year),
+    )
+    want_error = (True, False)
+    def fake_get_affected_files(_, relative, include_adds=True):
+      _ = relative
+      if include_adds:
+        return ['file']
+      else:
+        return []
+
+    self.file_mock.side_effect = fake_get_affected_files
+    for i, header in enumerate(HEADERS):
+      self.content_mock.return_value = header
+      if want_error[i]:
+        self.assertTrue(pre_upload._check_cos_license('proj', 'sha1'))
+      else:
+        self.assertFalse(pre_upload._check_cos_license('proj', 'sha1'))
+
+  def testAcceptsCrosLicenseForOlderFiles(self):
+    """Older files with ChromiumOS license/copyright are accepted."""
+    header = (
+        u'#!/bin/sh\n'
+        u'# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.\n'
+        u'# Use of this source code is governed by a BSD-style license that'
+        u' can be\n'
+        u'# found in the LICENSE file.\n'
+    )
+    self.file_mock.return_value = ['file']
+    self.content_mock.return_value = header
+    self.assertFalse(pre_upload._check_cos_license('proj', 'sha1'))
+
+  def testRejectsCrosLicenseForAddedFiles(self):
+    """Added files with ChromiumOS license/copyright are rejected."""
+    header = (
+        u'#!/bin/sh\n'
+        u'# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.\n'
+        u'# Use of this source code is governed by a BSD-style license that'
+        u' can be\n'
+        u'# found in the LICENSE file.\n'
+    )
+
+    def fake_get_affected_files(_, relative, include_adds=True):
+      _ = relative
+      if include_adds:
+        return ['file']
+      else:
+        return []
+
+    self.file_mock.side_effect = fake_get_affected_files
+    self.content_mock.return_value = header
+    self.assertTrue(pre_upload._check_cos_license('proj', 'sha1'))
+
+  def testIgnoreExcludedPaths(self):
+    """Ignores excluded paths for license checks."""
+    self.file_mock.return_value = ['foo/OWNERS']
+    self.content_mock.return_value = u'owner@chromium.org'
+    self.assertFalse(pre_upload._check_cos_license('proj', 'sha1'))
+
+  def testIgnoreTopLevelExcludedPaths(self):
+    """Ignores excluded paths for license checks."""
+    self.file_mock.return_value = ['OWNERS']
+    self.content_mock.return_value = u'owner@chromium.org'
+    self.assertFalse(pre_upload._check_cos_license('proj', 'sha1'))
+
+
 class CheckLayoutConfTestCase(PreUploadTestCase):
   """Tests for _check_layout_conf."""