Fix locking

Remove race conditions that cause referenced bug.

The test script does not reproduce said bug, but it provides some
psychological comfort at least that the locking code works no worse
than previously.

See the docstring in the flock.py for more rationale on design
choices.

BUG=chromium:756001
TEST=test script
TEST=unit tests
TEST=Run turtle script

Change-Id: I74070ca9f4fa8118233a8ebd91880521cb47d2a8
Reviewed-on: https://chromium-review.googlesource.com/622246
Commit-Ready: Allen Li <ayatane@chromium.org>
Tested-by: Allen Li <ayatane@chromium.org>
Reviewed-by: Allen Li <ayatane@chromium.org>
diff --git a/bin/run_tests b/bin/run_tests
index 967e7df..54b5eae 100755
--- a/bin/run_tests
+++ b/bin/run_tests
@@ -11,11 +11,5 @@
 cd "${bin_dir}/../venv"
 
 find . -name "*.pyc" -print0 | xargs -0 rm -f
-while getopts v name; do
-    case $name in
-        v) verbose=1;;
-        ?) printf "Usage: %s: [-v]\n" "$0" >&2; exit 2;;
-    esac
-done
 
-exec "${bin_dir}/python_venv" -m unittest discover "${verbose:+-v}" -s cros_venv
+exec "${bin_dir}/python_venv" -m pytest "$@"
diff --git a/bin/test_locking b/bin/test_locking
new file mode 100755
index 0000000..8166685
--- /dev/null
+++ b/bin/test_locking
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Copyright 2017 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+#
+# Test locking.
+set -eu
+basedir=$(readlink -f -- "$(dirname -- "${BASH_SOURCE[0]}")")
+cd "${basedir}"
+
+venv_path=$(./create_venv ../venv/requirements.txt)
+case "${venv_path}" in
+  ~/.cache/cros_venv/*)
+    rm -rf "${venv_path}"
+    ;;
+  *)
+    printf "\
+test_locking: create_venv returned a suspicious path: %s
+test_locking: refusing to run rm -rf
+" "${venv_path}" >&2
+    exit 1
+    ;;
+esac
+
+for _ in {1..5}; do
+  ./create_venv ../venv/requirements.txt >/dev/null &
+done
+for _ in {1..30}; do
+  ./create_venv ../venv/requirements.txt >/dev/null &
+  sleep 0.01
+done
+
+for pid in $(jobs -p); do
+  if wait "$pid"; then
+    printf "test_locking: %d okay\n" "${pid}" >&2
+  else
+    printf "test_locking: %d failed\n" "${pid}" >&2
+    echo FAIL
+    exit 1
+  fi
+done
+
+echo OK
diff --git a/venv/.coveragerc b/venv/.coveragerc
new file mode 100644
index 0000000..d3a41e9
--- /dev/null
+++ b/venv/.coveragerc
@@ -0,0 +1,12 @@
+[run]
+branch = True
+
+[report]
+exclude_lines =
+  pragma: no cover
+  raise NotImplementedError
+  if __name__ == .__main__.:
+  def main\(args\):
+  def main\(\):
+show_missing = True
+skip_covered = True
diff --git a/venv/.gitignore b/venv/.gitignore
index 126a1a9..a33841e 100644
--- a/venv/.gitignore
+++ b/venv/.gitignore
@@ -1 +1,4 @@
+/.cache/
+/.coverage
+
 /.venv*
diff --git a/venv/cros_venv/flock.py b/venv/cros_venv/flock.py
index 0543285..8b97cc2 100644
--- a/venv/cros_venv/flock.py
+++ b/venv/cros_venv/flock.py
@@ -2,7 +2,23 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
-"""Lock files"""
+"""Lock files.
+
+Please read:
+
+fcntl(2)
+flock(2)
+lockf(3)
+http://pubs.opengroup.org/onlinepubs/9699919799/functions/lockf.html
+http://pubs.opengroup.org/onlinepubs/9699919799/functions/fcntl.html
+https://github.com/python/cpython/blob/2.7/Modules/fcntlmodule.c
+
+Informal references:
+
+http://0pointer.de/blog/projects/locking.html
+https://www.samba.org/samba/news/articles/low_point/tale_two_stds_os2.html
+http://chris.improbable.org/2010/12/16/everything-you-never-wanted-to-know-about-file-locking/
+"""
 
 from __future__ import absolute_import
 from __future__ import print_function
@@ -19,6 +35,118 @@
 logger = logging.getLogger(__name__)
 
 
+class FileLock(object):
+    """Context manager for an exclusive file lock.
+
+    This uses fcntl() locks for portability.  Refer to the module
+    docstring and the links therein for details that informed this
+    choice.
+
+    fcntl() locks have a number of important quirks:
+
+    This may or may not work if the lock is on a remote filesystem.
+
+    Locks are per-process and therefore not thread-safe.
+
+    The lock is bound to the file inode:
+
+    >>> tmp = getfixture('tmpdir')
+    >>> f1 = str(tmp.join('lock'))
+    >>> f2 = str(tmp.join('lock2'))
+    >>> with FileLock(f1):
+    ...     os.link(f1, f2)
+    ...     f = open(f2)  # Same inode
+    ...     f.close()     # This will release the lock
+    ...     # Stuff here will run without the lock
+    """
+
+    def __init__(self, lockfile, timeout=180):
+        """Initialize instance.
+
+        Args:
+            lockfile: Path to lockfile
+            timeout: Timeout for grabbing lock, in seconds
+        """
+        self._lockfile = lockfile
+        self._timeout = timeout
+        self._fd = None
+
+    def __enter__(self):
+        timeout = time.time() + self._timeout
+        # Wait for possible symlink locks and open lock file.
+        while True:
+            if not self._symlink_lock_present():
+                try:
+                    self._fd = os.open(self._lockfile,
+                                       os.O_RDWR | os.O_CREAT,
+                                       0666)
+                    break
+                except OSError as e:  # pragma: no cover
+                    logger.debug('Error opening lock file: %s', e)
+            if time.time() > timeout:
+                raise TimeoutError(self._lockfile, self._timeout)
+            time.sleep(random.random())
+
+        # Grab lock on file.
+        while True:
+            try:
+                fcntl.lockf(self._fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+            except IOError as e:
+                logger.debug('Error acquiring lock: %s', e)
+            else:
+                return
+            if time.time() > timeout:
+                raise TimeoutError(self._lockfile, self._timeout)
+            time.sleep(random.random())
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        os.close(self._fd)
+
+    def _symlink_lock_present(self):
+        """Check if an old style symlink lock is in place."""
+        # TODO(ayatane): 2017-08-21 Old symlink lock file handling can
+        # be removed after the code has existed for Long Enough that
+        # there aren't reasonably any instances of it left on developer
+        # or prod machines.
+        try:
+            target = os.readlink(self._lockfile)
+        except OSError as e:
+            if e.errno == errno.EINVAL:
+                # Not a symlink.
+                return False
+            elif e.errno == errno.ENOENT:
+                # Lock is gone.
+                return False
+            else:  # pragma: no cover
+                raise
+        pid = _procdir.getpid(target)
+        if _is_pid_running(pid):
+            return True
+        try:
+            os.unlink(self._lockfile)
+        except OSError as e:  # pragma: no cover
+            if e.errno != errno.ENOENT:
+                raise
+        return False
+
+
+class TimeoutError(Exception):
+    """Grabbing lock timed out."""
+
+    def __init__(self, filename, timeout):
+        super(TimeoutError, self).__init__(filename, timeout)
+        self.filename = filename
+        self.timeout = timeout
+
+    def __str__(self):
+        return ('Grabbing lock %(filename)s'
+                ' timed out after %(timeout)s seconds'
+                % {
+                    'filename': self.filename,
+                    'timeout': self.timeout,
+                })
+
+
 class _ProcDir(object):
     """Class wrapping /proc related functions."""
 
@@ -46,88 +174,3 @@
         return e.errno != errno.ESRCH
     else:
         return True
-
-
-class FileLock(object):
-    """Context manager for an exclusive file lock."""
-
-    def __init__(self, lockfile, timeout=180):
-        """Initialize instance.
-
-        Args:
-            lockfile: Path to lockfile
-            timeout: Timeout for grabbing lock, in seconds.
-        """
-        self._lockfile = lockfile
-        self._timeout = timeout
-
-    @property
-    def _proc_path(self):
-        return _procdir.getpath(os.getpid())
-
-    def __enter__(self):
-        self._acquire_lock()
-        return self
-
-    def __exit__(self, exc_type, exc_value, traceback):
-        self._release_lock()
-
-    def _acquire_lock(self):
-        """Try to acquire lock within timeout."""
-        timeout = time.time() + self._timeout
-        self._clean_stale_lock()
-        while True:
-            try:
-                os.symlink(self._proc_path, self._lockfile)
-            except OSError as e:
-                logger.warning('Error acquiring lock: %s', e)
-            else:
-                return
-            if time.time() > timeout:
-                break
-            time.sleep(random.random())
-        raise Exception('Trying to acquire lock %r timed out.'
-                        % (self._lockfile,))
-
-    def _release_lock(self):
-        """Release own lock."""
-        if os.readlink(self._lockfile) != self._proc_path:
-            logger.warning('Lockfile %s does not belong to this process',
-                           self._lockfile)
-        else:
-            os.unlink(self._lockfile)
-
-    def _clean_stale_lock(self):
-        """Clean up stale locks."""
-        # TODO(ayatane): Old flock(2) lock file handling can be removed
-        # after the code has existed for Long Enough that there aren't
-        # reasonably any instances of it left on developer or prod
-        # machines.
-        self._clean_flock_lock()
-        self._clean_orphaned_lock()
-
-    def _clean_flock_lock(self):
-        """Clean up old flock(2)-based lock."""
-        if self._is_lock_flock():
-            logger.warning('Removing old style lock file')
-            os.unlink(self._lockfile)
-
-    def _clean_orphaned_lock(self):
-        """Clean up orphaned lock."""
-        if self._is_lock_orphaned():
-            logger.warning('Removing orphaned lock')
-            os.unlink(self._lockfile)
-
-    def _is_lock_flock(self):
-        """Return True if old flock(2) lock exists."""
-        return (os.path.isfile(self._lockfile)
-                and not os.path.islink(self._lockfile))
-
-    def _is_lock_orphaned(self):
-        """Return True if lock is orphaned."""
-        return (os.path.islink(self._lockfile)
-                and not _is_pid_running(self._get_lock_pid()))
-
-    def _get_lock_pid(self):
-        """Return pid of current lock file as int."""
-        return _procdir.getpid(os.readlink(self._lockfile))
diff --git a/venv/cros_venv/test_flock.py b/venv/cros_venv/test_flock.py
index ad4e87e..928a891 100644
--- a/venv/cros_venv/test_flock.py
+++ b/venv/cros_venv/test_flock.py
@@ -6,26 +6,37 @@
 from __future__ import print_function
 from __future__ import unicode_literals
 
+import contextlib
 import errno
 import fcntl
 import logging
-import mock
 import os
 import unittest
 
+import mock
+import subprocess32
+
 from cros_venv import testcases
 from cros_venv import flock
 
-
-_nullhandler = logging.NullHandler()
+logger = logging.getLogger(__name__)
+_log_handler = None
 
 
 def setUpModule():
-    flock.logger.addHandler(_nullhandler)
+    global _log_handler
+    root = logging.getLogger()
+    root.setLevel(logging.DEBUG)
+    _log_handler = logging.StreamHandler()
+    _log_handler.setFormatter(logging.Formatter(
+        '%(asctime)s:%(levelname)s:%(message)s'))
+    root.addHandler(_log_handler)
 
 
 def tearDownModule():
-    flock.logger.removeHandler(_nullhandler)
+    root = logging.getLogger()
+    root.removeHandler(_log_handler)
+    root.setLevel(logging.WARNING)
 
 
 class ProcDirTestCase(unittest.TestCase):
@@ -64,120 +75,83 @@
 
 
 class FileLockTestCase(testcases.TmpdirTestCase):
-    """Tests for flock."""
+    """Tests for flock (provide code coverage, not full functional testing)."""
 
-    def test_symlink_fail(self):
-        """Test that symlink raises OSError when it already exists."""
-        os.symlink('tmp', 'foo')
-        with self.assertRaises(OSError):
-            os.symlink('tmp', 'foo')
-
-    def test_FileLock(self):
-        """Test that FileLock acquires and releases lock."""
-        self.assertFalse(os.path.lexists('lockfile'))
+    def test_happy_path(self):
+        """Test happy path."""
         with flock.FileLock('lockfile', timeout=0):
-            self.assertTrue(os.path.lexists('lockfile'))
-        self.assertFalse(os.path.lexists('lockfile'))
+            pass
 
-    def test_FileLock_proc_target(self):
-        """Test that FileLock lock points to process /proc."""
-        with flock.FileLock('lockfile', timeout=0):
-            self.assertEqual(os.readlink('lockfile'),
-                             '/proc/' + str(os.getpid()))
+    def test_other_process_has_lock(self):
+        """Test with other process holding lock."""
+        logger.debug('Starting test')
+        with _lock('lockfile'):
+            with self.assertRaises(flock.TimeoutError):
+                with flock.FileLock('lockfile', timeout=0):
+                    pass  # pragma: no cover
 
-    def test_FileLock_timeout(self):
-        """Test that FileLock raises exception on timeout."""
-        os.symlink('tmp', 'lockfile')
-        with self.assertRaises(Exception):
+    def test_with_stale_symlink_lock(self):
+        """Test with existing symlink lock.
+
+        TODO(ayatane): Old symlink lock file handling can be removed,
+        see flock.py
+        """
+        os.symlink('/proc/99999', 'lockfile')
+        with mock.patch.object(flock, '_is_pid_running',
+                               autospec=True) as running:
+            running.return_value = False
             with flock.FileLock('lockfile', timeout=0):
                 pass
+        self.assertFalse(os.path.islink('lockfile'))
 
-    def test__release_lock(self):
-        """Test _release_lock()."""
-        file_lock = flock.FileLock('lockfile')
-        os.symlink(file_lock._proc_path, 'lockfile')
-        file_lock._release_lock()
-        self.assertFalse(os.path.lexists('lockfile'))
+    def test_with_active_symlink_lock(self):
+        """Test with existing symlink lock.
 
-    def test__release_lock_other_process(self):
-        """Test _release_lock() ignores other process's lock."""
-        file_lock = flock.FileLock('lockfile')
-        os.symlink('/proc/1', 'lockfile')
-        file_lock._release_lock()
-        self.assertTrue(os.path.lexists('lockfile'))
+        TODO(ayatane): Old symlink lock file handling can be removed,
+        see flock.py
+        """
+        os.symlink('/proc/99999', 'lockfile')
+        with mock.patch.object(flock, '_is_pid_running',
+                               autospec=True) as running:
+            running.return_value = True
+            with self.assertRaises(flock.TimeoutError):
+                with flock.FileLock('lockfile', timeout=0):
+                    pass  # pragma: no cover
 
-    def test__get_lock_pid(self):
-        """Test _get_lock_pid."""
-        file_lock = flock.FileLock('lockfile')
-        os.symlink('/proc/1', 'lockfile')
-        self.assertEqual(file_lock._get_lock_pid(), 1)
 
-    def test__is_lock_orphaned_own_lock(self):
-        """Test _is_lock_orphaned() with own lock."""
-        file_lock = flock.FileLock('lockfile')
-        os.symlink(file_lock._proc_path, 'lockfile')
-        self.assertFalse(file_lock._is_lock_orphaned())
+class TimeoutErrorTestCase(unittest.TestCase):
+    """Tests for Timeout Error"""
 
-    def test__is_lock_orphaned_missing(self):
-        """Test _is_lock_orphaned() with missing lock."""
-        file_lock = flock.FileLock('lockfile')
-        self.assertFalse(file_lock._is_lock_orphaned())
+    def test_str(self):
+        """Test basic error string contents."""
+        e = flock.TimeoutError(filename='foo', timeout=123)
+        self.assertIn('foo', str(e))
+        self.assertIn('123', str(e))
 
-    @mock.patch.object(flock, '_is_pid_running')
-    def test__is_lock_orphaned(self, is_running):
-        """Test _is_lock_orphaned()."""
-        is_running.return_value = False
-        file_lock = flock.FileLock('lockfile')
-        os.symlink(file_lock._proc_path, 'lockfile')
-        self.assertTrue(file_lock._is_lock_orphaned())
 
-    def test__is_lock_flock(self):
-        """Test _is_lock_flock()."""
-        file_lock = flock.FileLock('lockfile')
-        with open('lockfile', 'w') as _f:
-            pass
-        self.assertTrue(file_lock._is_lock_flock())
+_LOCK_SCRIPT = """\
+import fcntl
+import os
+import time
 
-    def test__is_lock_flock_symlink(self):
-        """Test _is_lock_flock() with symlink lock."""
-        file_lock = flock.FileLock('lockfile')
-        os.symlink(file_lock._proc_path, 'lockfile')
-        self.assertFalse(file_lock._is_lock_flock())
+fd = os.open(%(filename)r, os.O_CREAT | os.O_RDWR)
+fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+print('hi')
+while True:
+    time.sleep(10)
+"""
 
-    def test__is_lock_flock_missing(self):
-        """Test _is_lock_flock() with missing lock."""
-        file_lock = flock.FileLock('lockfile')
-        self.assertFalse(file_lock._is_lock_flock())
 
-    @mock.patch.object(flock, '_is_pid_running')
-    def test__clean_orphaned_lock(self, is_running):
-        """Test _clean_orphaned_lock()."""
-        is_running.return_value = False
-        file_lock = flock.FileLock('lockfile')
-        os.symlink('/proc/1', 'lockfile')
-        file_lock._clean_orphaned_lock()
-        self.assertFalse(os.path.lexists('lockfile'))
-
-    @mock.patch.object(flock, '_is_pid_running')
-    def test__clean_orphaned_lock_owned(self, is_running):
-        """Test _clean_orphaned_lock() ignores owned lock."""
-        is_running.return_value = True
-        file_lock = flock.FileLock('lockfile')
-        os.symlink('/proc/1', 'lockfile')
-        file_lock._clean_orphaned_lock()
-        self.assertTrue(os.path.lexists('lockfile'))
-
-    def test__clean_flock_lock(self):
-        """Test _clean_flock_lock()."""
-        file_lock = flock.FileLock('lockfile')
-        with open('lockfile', 'w') as _f:
-            pass
-        file_lock._clean_flock_lock()
-        self.assertFalse(os.path.exists('lockfile'))
-
-    def test__clean_flock_lock_symlink(self):
-        """Test _clean_flock_lock() skips symlink lock."""
-        file_lock = flock.FileLock('lockfile')
-        os.symlink('/proc/1', 'lockfile')
-        file_lock._clean_flock_lock()
-        self.assertTrue(os.path.exists('lockfile'))
+@contextlib.contextmanager
+def _lock(filename):
+    """Lock a file in another process for testing."""
+    logger.debug('Grabbing external lock on %s', filename)
+    with subprocess32.Popen(
+            ['python', '-u', '-c', _LOCK_SCRIPT % {'filename': filename}],
+            stdout=subprocess32.PIPE) as proc:
+        logger.debug('Started external lock process for %s', filename)
+        proc.stdout.readline()
+        logger.debug('Finished grabbing external lock on %s', filename)
+        yield proc
+        logger.debug('Releasing external lock on %s', filename)
+        proc.terminate()
diff --git a/venv/cros_venv/test_venvlib.py b/venv/cros_venv/test_venvlib.py
index 66315d1..b203fe4 100644
--- a/venv/cros_venv/test_venvlib.py
+++ b/venv/cros_venv/test_venvlib.py
@@ -125,7 +125,7 @@
         os.mkdir('foo')
         try:
             venvlib._makedirs_exist_ok('foo')
-        except OSError:
+        except OSError:  # pragma: no cover
             self.fail('OSError raised')
 
     def test__log_check_call(self):
@@ -236,7 +236,7 @@
         venv = venvlib.VersionedVenv(spec)
         try:
             venv._check(spec)
-        except venvlib.SpecMismatchError:
+        except venvlib.SpecMismatchError:  # pragma: no cover
             self.fail('Check did not pass')
 
     def test__check_should_fail_with_different_spec(self):
diff --git a/venv/pytest.ini b/venv/pytest.ini
new file mode 100644
index 0000000..060586e
--- /dev/null
+++ b/venv/pytest.ini
@@ -0,0 +1,6 @@
+[pytest]
+addopts =
+  --doctest-modules
+  --durations=5
+  --ignore setup.py
+  --cov cros_venv
diff --git a/venv/requirements.txt b/venv/requirements.txt
index 14fe0d3..e680622 100644
--- a/venv/requirements.txt
+++ b/venv/requirements.txt
@@ -1 +1,4 @@
 mock==2.0.0
+pytest==3.1.3
+pytest-cov==2.5.1
+subprocess32==3.2.7