Add a tail_until_writer_finished script.
This is useful for tailing Apache logs in a rigorous way. We don't want
to use 'tail -f' because it never terminates, even if the writer closes
the file. However, we can't just repeatedly reopen and read the file
without risking losing messages during log rotation. This script uses
inotify to detect when a writer to the tailed file closes the file.
BUG=chromium:621745
TEST=unit tests
Change-Id: I5b060b38e8c3ec316ff6f467b317f895cbf5c394
Reviewed-on: https://chromium-review.googlesource.com/357953
Commit-Ready: Paul Hobbs <phobbs@google.com>
Tested-by: Paul Hobbs <phobbs@google.com>
Reviewed-by: Dan Shi <dshi@google.com>
diff --git a/tail_until_writer_finished.py b/tail_until_writer_finished.py
new file mode 100755
index 0000000..394f6cf
--- /dev/null
+++ b/tail_until_writer_finished.py
@@ -0,0 +1,78 @@
+#!/usr/bin/python2
+
+# Copyright 2016 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tails a file, and quits when inotify detects that it has been closed."""
+
+from __future__ import print_function
+
+import argparse
+import select
+import subprocess
+import sys
+import time
+
+
+def TailFile(path, sleep_interval, chunk_size,
+ outfile=sys.stdout,
+ seek_to_end=True):
+ """Tails a file, and quits when there are no writers on the file.
+
+ Args:
+ path: The path to the file to open
+ sleep_interval: The amount to sleep in between reads to reduce wasted IO
+ chunk_size: The amount of bytes to read in between print() calls
+ outfile: A file handle to write to. Defaults to sys.stdout
+ seek_to_end: Whether to start at the end of the file at |path| when reading.
+ """
+
+ writer_closed = subprocess.Popen(['inotifywait', '-qe', 'close_write', path],
+ stdout=subprocess.PIPE)
+
+ # stdout.read is blocking, so use select.select to detect if input is
+ # available.
+ def WriterClosedFile():
+ read_list, _, _ = select.select([writer_closed.stdout], [], [], 0)
+ return bool(read_list)
+
+ def ReadChunks(fh):
+ for chunk in iter(lambda: fh.read(chunk_size), b''):
+ print(chunk, end='', file=outfile)
+
+ with open(path) as fh:
+ if seek_to_end == True:
+ fh.seek(0, 2)
+ while True:
+ ReadChunks(fh)
+ if WriterClosedFile():
+ # We need to read the chunks again to avoid a race condition where the
+ # writer finishes writing some output in between the ReadChunks() and
+ # the WriterClosedFile() call.
+ ReadChunks(fh)
+ break
+
+ # Sleep a bit to limit the number of wasted reads.
+ time.sleep(sleep_interval)
+
+ writer_closed.kill()
+
+
+def main():
+ p = argparse.ArgumentParser(description=__doc__)
+ p.add_argument('file', help='The file to tail')
+ p.add_argument('--sleep_interval', type=float, default=0.1,
+ help='Time sleeping between file reads')
+ p.add_argument('--chunk_size', type=int, default=64 * 2**10,
+ help='Bytes to read before yielding')
+ p.add_argument('--from_beginning', action='store_true',
+ help='If given, read from the beginning of the file.')
+ args = p.parse_args()
+
+ TailFile(args.file, args.sleep_interval, args.chunk_size,
+ seek_to_end=not args.from_beginning)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tail_until_writer_finished_unittest.py b/tail_until_writer_finished_unittest.py
new file mode 100755
index 0000000..919d82c
--- /dev/null
+++ b/tail_until_writer_finished_unittest.py
@@ -0,0 +1,63 @@
+#!/usr/bin/python2
+
+# Copyright 2016 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Unit tests for apache_log_metrics.py"""
+
+from __future__ import print_function
+
+import StringIO
+import tempfile
+import threading
+import time
+import unittest
+
+import tail_until_writer_finished
+
+
+class TestTailUntilWriterFinished(unittest.TestCase):
+ """Tests tail_until_writer_finished."""
+
+ def testTail(self):
+ self.GetsEntireInput(seek_to_end=True)
+
+ def testRead(self):
+ self.GetsEntireInput(seek_to_end=False)
+
+ def GetsEntireInput(self, seek_to_end):
+ """Tails a temp file in a thread
+
+ Check that it read the file correctly.
+ """
+
+ f = tempfile.NamedTemporaryFile()
+ output = StringIO.StringIO()
+
+ f.write('This line will not get read if we seek to end.\n')
+ f.flush()
+
+ def Tail():
+ tail_until_writer_finished.TailFile(f.name, 0.1, 64000, outfile=output,
+ seek_to_end=seek_to_end)
+
+ thread = threading.Thread(target=Tail)
+ thread.start()
+
+ time.sleep(0.1) # The inotify process must start before we close the file.
+
+ for i in range(100):
+ f.write(str(i) + '\n')
+ f.flush()
+ f.close()
+ thread.join()
+
+ expected = ''.join([str(i) + '\n' for i in range(100)])
+ if not seek_to_end:
+ expected = 'This line will not get read if we seek to end.\n' + expected
+ self.assertEqual(output.getvalue(), expected)
+
+
+if __name__ == '__main__':
+ unittest.main()