blob: 8e825d3d6093593733ffdb27b313de8e3a077907 [file] [log] [blame]
# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import logging, sys, time
from autotest_lib.client.common_lib import error
from autotest_lib.server import autotest
from autotest_lib.server import hosts
from autotest_lib.server import test
class hardware_StorageStress(test.test):
"""
Integrity stress test for storage device
"""
version = 1
_HOURS_IN_SEC = 3600
# Define default value for the test case
_TEST_GAP = 60 # 1 min
_TEST_DURATION = 12 * _HOURS_IN_SEC
_SUSPEND_DURATION = _HOURS_IN_SEC
_FIO_REQUIREMENT_FILE = '8k_async_randwrite'
_FIO_WRITE_FLAGS = []
_FIO_VERIFY_FLAGS = ['--verifyonly']
def run_once(self, client_ip, gap=_TEST_GAP, duration=_TEST_DURATION,
power_command='reboot', storage_test_command='integrity',
suspend_duration=_SUSPEND_DURATION, storage_test_argument=''):
"""
Run the Storage stress test
Use hardwareStorageFio to run some test_command repeatedly for a long
time. Between each iteration of test command, run power command such as
reboot or suspend.
@param client_ip: string of client's ip address (required)
@param gap: gap between each test (second) default = 1 min
@param duration: duration to run test (second) default = 12 hours
@param power_command: command to do between each test Command
possible command: reboot / suspend / nothing
@param storage_test_command: FIO command to run
- integrity: Check data integrity
- full_write: Check performance consistency
for full disk write. Use argument
to determine which disk to write
@param suspend_duration: if power_command is suspend, how long the DUT
is suspended.
"""
# init test
if not client_ip:
error.TestError("Must provide client's IP address to test")
self._client = hosts.create_host(client_ip)
self._client_at = autotest.Autotest(self._client)
self._results = {}
self._suspend_duration = suspend_duration
# parse power command
if power_command == 'nothing':
power_func = self._do_nothing
elif power_command == 'reboot':
power_func = self._do_reboot
elif power_command == 'suspend':
power_func = self._do_suspend
else:
raise error.TestFail(
'Test failed with error: Invalid power command')
# parse test command
if storage_test_command == 'integrity':
setup_func = self._write_data
loop_func = self._verify_data
elif storage_test_command == 'full_write':
setup_func = self._do_nothing
loop_func = self._full_disk_write
# Do at least 2 soak runs. Given the absolute minimum of a loop is
# around 1h, duration should be at least 1h.
self._soak_time = min(self._TEST_DURATION, duration / 4)
else:
raise error.TestFail('Test failed with error: Invalid test command')
# init statistic variable
min_time_per_loop = sys.maxsize
max_time_per_loop = 0
all_loop_time = 0
avr_time_per_loop = 0
self._loop_count = 0
setup_func()
start_time = time.time()
while time.time() - start_time < duration:
# sleep
time.sleep(gap)
self._loop_count += 1
# do power command & verify data & calculate time
loop_start_time = time.time()
power_func()
loop_func()
loop_time = time.time() - loop_start_time
# update statistic
all_loop_time += loop_time
min_time_per_loop = min(loop_time, min_time_per_loop)
max_time_per_loop = max(loop_time, max_time_per_loop)
if self._loop_count > 0:
avr_time_per_loop = all_loop_time / self._loop_count
logging.info(str('check data count: %d' % self._loop_count))
# report result
self.write_perf_keyval({'loop_count':self._loop_count})
self.write_perf_keyval({'min_time_per_loop':min_time_per_loop})
self.write_perf_keyval({'max_time_per_loop':max_time_per_loop})
self.write_perf_keyval({'avr_time_per_loop':avr_time_per_loop})
def _do_nothing(self):
pass
def _do_reboot(self):
"""
Reboot host machine
"""
self._client.reboot()
def _do_suspend(self):
"""
Suspend host machine
"""
self._client.suspend(suspend_time=self._suspend_duration)
@classmethod
def _check_client_test_result(cls, client):
"""
Check result of the client test.
Auto test will store results in the file named status.
We check that the second to last line in that file begin with 'END GOOD'
@ raise an error if test fails.
"""
client_result_dir = '%s/results/default' % client.autodir
command = 'tail -2 %s/status | head -1' % client_result_dir
status = client.run(command).stdout.strip()
logging.info(status)
if status[:8] != 'END GOOD':
raise error.TestFail('client in StorageStress failed.')
def _write_data(self):
"""
Write test data to host using hardware_StorageFio
"""
logging.info('_write_data')
self._client_at.run_test('hardware_StorageFio', wait=0,
tag='%s_%d' % ('write_data', self._loop_count),
requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_WRITE_FLAGS)])
self._check_client_test_result(self._client)
def _verify_data(self):
"""
Verify test data using hardware_StorageFio
"""
logging.info(str('_verify_data #%d' % self._loop_count))
self._client_at.run_test('hardware_StorageFio', wait=0,
tag='%s_%d' % ('verify_data', self._loop_count),
requirements=[(self._FIO_REQUIREMENT_FILE, self._FIO_VERIFY_FLAGS)])
self._check_client_test_result(self._client)
def _full_disk_write(self):
"""
Do the root device full area write and report performance
Write random pattern for few hours, then do a write and a verify,
noting the latency.
"""
logging.info(str('_full_disk_write #%d' % self._loop_count))
# use the default requirement that write different pattern arround.
self._client_at.run_test('hardware_StorageFio',
tag='%s_%d' % ('soak', self._loop_count),
requirements=[('64k_stress', [])],
time_length=self._soak_time)
self._check_client_test_result(self._client)
self._client_at.run_test('hardware_StorageFio',
tag='%s_%d' % ('surf', self._loop_count),
requirements=[('surfing', [])],
time_length=self._soak_time)
self._check_client_test_result(self._client)
self._client_at.run_test('hardware_StorageFio',
tag='%s_%d' % ('integrity', self._loop_count),
wait=0, integrity=True)
self._check_client_test_result(self._client)
self._client_at.run_test('hardware_StorageWearoutDetect',
tag='%s_%d' % ('wearout', self._loop_count),
wait=0, use_cached_result=False)
# No checkout for wearout, to test device pass their limits.