ml/benchmark.cc - mirrors/cros/chromiumos/platform2 - Git at Google

 // Copyright 2020 The Chromium OS Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "ml/benchmark.h"

 #include <algorithm>
 #include <map>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>

 #include <base/bind.h>
 #include <base/containers/flat_map.h>
 #include <base/files/file_path.h>
 #include <base/files/file_util.h>
 #include <base/run_loop.h>
 #include <base/task/current_thread.h>
 #include <base/threading/thread_task_runner_handle.h>
 #include <brillo/message_loops/base_message_loop.h>
 #include <google/protobuf/text_format.h>
 #include <mojo/core/core.h>
 #include <mojo/core/embedder/embedder.h>
 #include <mojo/public/cpp/bindings/remote.h>

 #include "ml/benchmark.pb.h"
 #include "ml/model_impl.h"
 #include "ml/mojom/graph_executor.mojom.h"
 #include "ml/mojom/machine_learning_service.mojom.h"
 #include "ml/mojom/model.mojom.h"
 #include "ml/test_utils.h"
 #include "proto/benchmark_config.pb.h"

 using ::chrome::ml_benchmark::BenchmarkResults;
 using ::chrome::ml_benchmark::BenchmarkReturnStatus;
 using ::chrome::ml_benchmark::CrOSBenchmarkConfig;
 using ::chromeos::machine_learning::mojom::CreateGraphExecutorResult;
 using ::chromeos::machine_learning::mojom::ExecuteResult;
 using ::chromeos::machine_learning::mojom::GraphExecutor;
 using ::chromeos::machine_learning::mojom::LoadModelResult;
 using ::chromeos::machine_learning::mojom::Model;
 using ::chromeos::machine_learning::mojom::TensorPtr;
 using ::chromeos::machine_learning::mojom::ValueList;
 using ::google::protobuf::Map;
 using ::google::protobuf::TextFormat;

 using Example = ml::ExpectedInputOutput::Example;
 using Feature = ml::ExpectedInputOutput::Example::Feature;
 using NodeSpec = ml::FlatBufferModelSpecProto::NodeSpec;

 namespace ml {
 namespace {

 // Percentiles for latency.
 constexpr int kLatencyPercentile[] = {50, 90, 95, 99};
 // Use a fake model name for benchmark runs.
 constexpr char kMlBenchmarkMetricsName[] = "benchmark_model";

 // The accumulative result of an inference test run.
 struct AccumulativeResult {
   // Set to true if any inference fails.
   bool has_failure = false;
   // Total error for all inference.
   float total_error = 0.0;
   // Time of each run.
   std::vector<int64_t> times_in_us;
   // Error message.
   std::string error_message;
 };

 // Serialize `results` into results_data and returns results.status().
 int32_t SerializeResults(const BenchmarkResults& results,
                          void** results_data,
                          int32_t* results_size) {
   if (results.status() != BenchmarkReturnStatus::OK) {
     LOG(ERROR) << "result with error: " << results.DebugString();
   }
   const std::string result_pb = results.SerializeAsString();
   CHECK(!result_pb.empty());
   const int size = result_pb.size();
   // Will be released by the caller.
   char* const data = new char[size];
   result_pb.copy(data, size);
   *results_data = data;
   *results_size = size;
   return results.status();
 }

 void InitializeOnce() {
   if (!base::CurrentThread::IsSet()) {
     (new brillo::BaseMessageLoop())->SetAsCurrent();
   }
   if (!mojo::core::Core::Get()) {
     mojo::core::Init();
   }
 }

 // Constructs `model` based on tflite_config; returns whether the construction
 // is successful.
 bool ConstructModel(const FlatBufferModelSpecProto& model_proto,
                     mojo::Remote<Model>* const model) {
   auto model_data =
       std::make_unique<AlignedModelData>(model_proto.model_string());

   // Step 1 builds the FlatBufferModel.
   std::unique_ptr<tflite::FlatBufferModel> flat_buffer_model =
       tflite::FlatBufferModel::VerifyAndBuildFromBuffer(model_data->data(),
                                                         model_data->size());

   if (flat_buffer_model == nullptr) {
     return false;
   }

   // Step 2 constructs the ModelImpl.
   std::map<std::string, int> required_inputs, required_outputs;
   for (const auto& pair : model_proto.required_inputs()) {
     required_inputs[pair.first] = pair.second.index();
   }
   for (const auto& pair : model_proto.required_outputs()) {
     required_outputs[pair.first] = pair.second.index();
   }
   ModelImpl::Create(required_inputs, required_outputs,
                     std::move(flat_buffer_model), std::move(model_data),
                     model->BindNewPipeAndPassReceiver(),
                     kMlBenchmarkMetricsName);

   return true;
 }

 // Constructs `graph_executor`; returns whether the construction is successful.
 bool ConstructGraphExecutor(const mojo::Remote<Model>& model,
                             mojo::Remote<GraphExecutor>* const graph_executor) {
   bool succeeded = false;
   model->CreateGraphExecutor(
       graph_executor->BindNewPipeAndPassReceiver(),
       base::Bind(
           [](bool* succeeded, const CreateGraphExecutorResult result) {
             *succeeded = result == CreateGraphExecutorResult::OK;
           },
           &succeeded));
   // Block until CreateGraphExecutor finishes.
   base::RunLoop().RunUntilIdle();
   return succeeded;
 }

 // Converts ExpectedInputOutput::Example into tensor map.
 base::flat_map<std::string, TensorPtr> TensorMapFromExample(
     const Example& input, const Map<std::string, NodeSpec>& node_spec_map) {
   base::flat_map<std::string, TensorPtr> input_map;

   // Loop over each feature.
   for (const auto& pair : input.features().feature()) {
     const NodeSpec& node_spec = node_spec_map.at(pair.first);
     std::vector<int64_t> dims(node_spec.dims().begin(), node_spec.dims().end());
     switch (pair.second.kind_case()) {
       case Feature::kFloatList: {
         // For FloatList, make a (1, n) tensor with the value.
         const auto& float_values = pair.second.float_list().value();
         input_map[pair.first] = NewTensor<double>(
             dims,
             std::vector<double>(float_values.begin(), float_values.end()));
       } break;
       case Feature::kInt64List: {
         // For Int64List, make a (1, n) tensor with the value.
         const auto& int_values = pair.second.int64_list().value();
         input_map[pair.first] = NewTensor<int64_t>(
             dims, std::vector<int64_t>(int_values.begin(), int_values.end()));
       } break;
       default:
         LOG(ERROR) << "InputType not supported.";
         NOTREACHED();
         break;
     }
   }
   return input_map;
 }

 // Converts the `accumulative_result` into BenchmarkResults.
 BenchmarkResults ToBenchmarkResults(AccumulativeResult* accumulative_result) {
   BenchmarkResults benchmark_result;
   benchmark_result.set_status(BenchmarkReturnStatus::OK);
   // Sets average accuracy.
   benchmark_result.set_total_accuracy(accumulative_result->total_error /
                                       accumulative_result->times_in_us.size());

   // Sorts all times_in_us for all the successful runs.
   std::sort(accumulative_result->times_in_us.begin(),
             accumulative_result->times_in_us.end());

   // Gets percentile for times_in_us.
   for (const int i : kLatencyPercentile) {
     const int pos = i * accumulative_result->times_in_us.size() / 100;
     CHECK(pos < accumulative_result->times_in_us.size())
         << "percentile can't be 100";
     (*benchmark_result.mutable_percentile_latencies_in_us())[i] =
         accumulative_result->times_in_us[pos];
   }

   return benchmark_result;
 }

 // Check two tensors have the same shape and size; then calculate the L1
 // Distance between them, and add it to `accumulative result`.
 template <class T>
 void AccumulateDistance(const TensorPtr& tensor1,
                         const TensorPtr& tensor2,
                         AccumulativeResult* const accumulative_result) {
   if (tensor1->data->which() != tensor2->data->which()) {
     accumulative_result->error_message = "Tensor has different data type.";
     accumulative_result->has_failure = true;
     return;
   }
   const TensorView<T> tensor_view1(tensor1);
   const TensorView<T> tensor_view2(tensor2);
   if (!tensor_view1.IsValidType() || !tensor_view1.IsValidFormat() ||
       !tensor_view2.IsValidType() || !tensor_view2.IsValidFormat()) {
     accumulative_result->error_message = "Tensor type or format is invalid.";
     accumulative_result->has_failure = true;
     return;
   }
   if (tensor_view1.GetShape() != tensor_view2.GetShape() ||
       tensor_view1.GetValues().size() != tensor_view2.GetValues().size()) {
     accumulative_result->error_message = "Tensor has different shape or size.";
     accumulative_result->has_failure = true;
     return;
   }
   for (int j = 0; j < tensor_view1.GetValues().size(); ++j) {
     // accumulates the diff between elements.
     accumulative_result->total_error +=
         std::abs(tensor_view1.GetValues()[j] - tensor_view2.GetValues()[j]);
   }
 }

 // Calls Typed AccumulateDistance function above.
 void AccumulateDistance(const TensorPtr& tensor1,
                         const TensorPtr& tensor2,
                         AccumulativeResult* const accumulative_result) {
   switch (tensor1->data->which()) {
     case ValueList::Tag::INT64_LIST:
       AccumulateDistance<int64_t>(tensor1, tensor2, accumulative_result);
       return;
     case ValueList::Tag::FLOAT_LIST:
       AccumulateDistance<double>(tensor1, tensor2, accumulative_result);
       return;
     default:
       accumulative_result->error_message = "Tensor type is not supported.";
       accumulative_result->has_failure = true;
       LOG(ERROR)
           << "Not supported tensor type for calculating AccumulateDistance.";
       NOTREACHED();
       return;
   }
 }

 BenchmarkResults InferenceForTfliteModel(
     const TfliteBenchmarkConfig& tflite_config,
     const FlatBufferModelSpecProto& model_proto,
     const ExpectedInputOutput& input_output) {
   // Initialization for the first time.
   InitializeOnce();

   BenchmarkResults benchmark_result;

   // Step 1: construct the model.
   mojo::Remote<Model> model;
   if (!ConstructModel(model_proto, &model)) {
     benchmark_result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
     benchmark_result.set_results_message(
         "Can't construct the Model from the model file.");
     return benchmark_result;
   }

   // Step 2: construct the graph executor.
   mojo::Remote<GraphExecutor> graph_executor;
   if (!ConstructGraphExecutor(model, &graph_executor)) {
     benchmark_result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
     benchmark_result.set_results_message(
         "Can't construct the GraphExecutor from the model.");
     return benchmark_result;
   }

   // Step 3: run inference multiple times.
   std::vector<std::string> output_name;
   for (const auto& pair : model_proto.required_outputs()) {
     output_name.push_back(pair.first);
   }

   AccumulativeResult accumulative_result;
   const base::flat_map<std::string, TensorPtr> expected_output =
       TensorMapFromExample(input_output.expected_output(),
                            model_proto.required_outputs());

   for (int i = 0; i < tflite_config.num_runs(); ++i) {
     // Starts the timer.
     const std::clock_t start_time = std::clock();
     // Run infernce.
     graph_executor->Execute(
         TensorMapFromExample(input_output.input(),
                              model_proto.required_inputs()),
         output_name,
         base::Bind(
             [](AccumulativeResult* accumulative_result,
                const std::vector<std::string>* const output_name,
                const base::flat_map<std::string, TensorPtr>* const
                    expected_output,
                ExecuteResult result,
                base::Optional<std::vector<TensorPtr>> outputs) {
               // Check that the inference run successfully.
               if (result != ExecuteResult::OK || !outputs.has_value()) {
                 accumulative_result->error_message = "Inference not OK";
                 accumulative_result->has_failure = true;
                 return;
               }

               // Compare the output tensor with the expected tensor; add their
               // distance to the accumulative_result if two tensors have the
               // same type and shape.
               for (int i = 0; i < output_name->size(); ++i) {
                 AccumulateDistance(outputs->at(i),
                                    expected_output->at(output_name->at(i)),
                                    accumulative_result);
                 if (accumulative_result->has_failure) {
                   return;
                 }
               }
             },
             &accumulative_result, &output_name, &expected_output));
     base::RunLoop().RunUntilIdle();

     // Inference should always succeed; return error otherwise.
     if (accumulative_result.has_failure) {
       benchmark_result.set_status(BenchmarkReturnStatus::RUNTIME_ERROR);
       benchmark_result.set_results_message(accumulative_result.error_message);
       return benchmark_result;
     }

     // Records time.
     const int64_t cpu_time_us = static_cast<int64_t>(
         (std::clock() - start_time) * 1000000.0 / CLOCKS_PER_SEC);
     accumulative_result.times_in_us.push_back(cpu_time_us);
   }

   // Converts accumulative_result into BenchmarkResults.
   return ToBenchmarkResults(&accumulative_result);
 }

 }  // namespace
 }  // namespace ml

 int32_t benchmark_start(const void* config_bytes,
                         int32_t config_bytes_size,
                         void** results_bytes,
                         int32_t* results_bytes_size) {
   CHECK(config_bytes);
   CHECK(results_bytes);
   CHECK(results_bytes_size);

   BenchmarkResults result;

   // Step 1 De-serialize the CrOSBenchmarkConfig.
   CrOSBenchmarkConfig benchmark_config;
   if (!benchmark_config.ParseFromArray(config_bytes, config_bytes_size)) {
     result.set_status(BenchmarkReturnStatus::INCORRECT_CONFIGURATION);
     result.set_results_message("Can't parse CrOSBenchmarkConfig.");
     return ml::SerializeResults(result, results_bytes, results_bytes_size);
   }

   // Step 2 Parse the TfliteBenchmarkConfig
   ml::TfliteBenchmarkConfig tflite_config;
   if (!TextFormat::ParseFromString(benchmark_config.driver_config(),
                                    &tflite_config)) {
     result.set_status(BenchmarkReturnStatus::INCORRECT_CONFIGURATION);
     result.set_results_message("Can't parse TfliteBenchmarkConfig.");
     return ml::SerializeResults(result, results_bytes, results_bytes_size);
   }

   // Step 3 Parse the FlatBufferModelSpecProto.
   ml::FlatBufferModelSpecProto model_proto;
   std::string model_buf;
   if (!base::ReadFileToString(
           base::FilePath(tflite_config.tflite_model_filepath()), &model_buf)) {
     result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
     result.set_results_message(tflite_config.tflite_model_filepath() +
                                " can't be read.");
     return ml::SerializeResults(result, results_bytes, results_bytes_size);
   }
   if (!model_proto.ParseFromString(model_buf)) {
     result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
     result.set_results_message("Can't parse FlatBufferModelSpecProto");
     return ml::SerializeResults(result, results_bytes, results_bytes_size);
   }

   // Step 4 Parse the ExpectedInputOutput.
   ml::ExpectedInputOutput input_output;
   std::string input_buf;
   if (!base::ReadFileToString(
           base::FilePath(tflite_config.input_output_filepath()), &input_buf)) {
     result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
     result.set_results_message(tflite_config.input_output_filepath() +
                                " can't be read.");
     return ml::SerializeResults(result, results_bytes, results_bytes_size);
   }
   if (!input_output.ParseFromString(input_buf)) {
     result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
     result.set_results_message("Can't parse ExpectedInputOutput");
     return ml::SerializeResults(result, results_bytes, results_bytes_size);
   }

   // Step 5 runs InferenceForTfliteModel with the tflite_config,
   result =
       ml::InferenceForTfliteModel(tflite_config, model_proto, input_output);
   return ml::SerializeResults(result, results_bytes, results_bytes_size);
 }

 void free_benchmark_results(void* results_bytes) {
   delete[] static_cast<char*>(results_bytes);
 }
	// Copyright 2020 The Chromium OS Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "ml/benchmark.h"

	#include <algorithm>
	#include <map>
	#include <memory>
	#include <string>
	#include <utility>
	#include <vector>

	#include <base/bind.h>
	#include <base/containers/flat_map.h>
	#include <base/files/file_path.h>
	#include <base/files/file_util.h>
	#include <base/run_loop.h>
	#include <base/task/current_thread.h>
	#include <base/threading/thread_task_runner_handle.h>
	#include <brillo/message_loops/base_message_loop.h>
	#include <google/protobuf/text_format.h>
	#include <mojo/core/core.h>
	#include <mojo/core/embedder/embedder.h>
	#include <mojo/public/cpp/bindings/remote.h>

	#include "ml/benchmark.pb.h"
	#include "ml/model_impl.h"
	#include "ml/mojom/graph_executor.mojom.h"
	#include "ml/mojom/machine_learning_service.mojom.h"
	#include "ml/mojom/model.mojom.h"
	#include "ml/test_utils.h"
	#include "proto/benchmark_config.pb.h"

	using ::chrome::ml_benchmark::BenchmarkResults;
	using ::chrome::ml_benchmark::BenchmarkReturnStatus;
	using ::chrome::ml_benchmark::CrOSBenchmarkConfig;
	using ::chromeos::machine_learning::mojom::CreateGraphExecutorResult;
	using ::chromeos::machine_learning::mojom::ExecuteResult;
	using ::chromeos::machine_learning::mojom::GraphExecutor;
	using ::chromeos::machine_learning::mojom::LoadModelResult;
	using ::chromeos::machine_learning::mojom::Model;
	using ::chromeos::machine_learning::mojom::TensorPtr;
	using ::chromeos::machine_learning::mojom::ValueList;
	using ::google::protobuf::Map;
	using ::google::protobuf::TextFormat;

	using Example = ml::ExpectedInputOutput::Example;
	using Feature = ml::ExpectedInputOutput::Example::Feature;
	using NodeSpec = ml::FlatBufferModelSpecProto::NodeSpec;

	namespace ml {
	namespace {

	// Percentiles for latency.
	constexpr int kLatencyPercentile[] = {50, 90, 95, 99};
	// Use a fake model name for benchmark runs.
	constexpr char kMlBenchmarkMetricsName[] = "benchmark_model";

	// The accumulative result of an inference test run.
	struct AccumulativeResult {
	// Set to true if any inference fails.
	bool has_failure = false;
	// Total error for all inference.
	float total_error = 0.0;
	// Time of each run.
	std::vector<int64_t> times_in_us;
	// Error message.
	std::string error_message;
	};

	// Serialize `results` into results_data and returns results.status().
	int32_t SerializeResults(const BenchmarkResults& results,
	void** results_data,
	int32_t* results_size) {
	if (results.status() != BenchmarkReturnStatus::OK) {
	LOG(ERROR) << "result with error: " << results.DebugString();
	}
	const std::string result_pb = results.SerializeAsString();
	CHECK(!result_pb.empty());
	const int size = result_pb.size();
	// Will be released by the caller.
	char* const data = new char[size];
	result_pb.copy(data, size);
	*results_data = data;
	*results_size = size;
	return results.status();
	}

	void InitializeOnce() {
	if (!base::CurrentThread::IsSet()) {
	(new brillo::BaseMessageLoop())->SetAsCurrent();
	}
	if (!mojo::core::Core::Get()) {
	mojo::core::Init();
	}
	}

	// Constructs `model` based on tflite_config; returns whether the construction
	// is successful.
	bool ConstructModel(const FlatBufferModelSpecProto& model_proto,
	mojo::Remote<Model>* const model) {
	auto model_data =
	std::make_unique<AlignedModelData>(model_proto.model_string());

	// Step 1 builds the FlatBufferModel.
	std::unique_ptr<tflite::FlatBufferModel> flat_buffer_model =
	tflite::FlatBufferModel::VerifyAndBuildFromBuffer(model_data->data(),
	model_data->size());

	if (flat_buffer_model == nullptr) {
	return false;
	}

	// Step 2 constructs the ModelImpl.
	std::map<std::string, int> required_inputs, required_outputs;
	for (const auto& pair : model_proto.required_inputs()) {
	required_inputs[pair.first] = pair.second.index();
	}
	for (const auto& pair : model_proto.required_outputs()) {
	required_outputs[pair.first] = pair.second.index();
	}
	ModelImpl::Create(required_inputs, required_outputs,
	std::move(flat_buffer_model), std::move(model_data),
	model->BindNewPipeAndPassReceiver(),
	kMlBenchmarkMetricsName);

	return true;
	}

	// Constructs `graph_executor`; returns whether the construction is successful.
	bool ConstructGraphExecutor(const mojo::Remote<Model>& model,
	mojo::Remote<GraphExecutor>* const graph_executor) {
	bool succeeded = false;
	model->CreateGraphExecutor(
	graph_executor->BindNewPipeAndPassReceiver(),
	base::Bind(
	[](bool* succeeded, const CreateGraphExecutorResult result) {
	*succeeded = result == CreateGraphExecutorResult::OK;
	},
	&succeeded));
	// Block until CreateGraphExecutor finishes.
	base::RunLoop().RunUntilIdle();
	return succeeded;
	}

	// Converts ExpectedInputOutput::Example into tensor map.
	base::flat_map<std::string, TensorPtr> TensorMapFromExample(
	const Example& input, const Map<std::string, NodeSpec>& node_spec_map) {
	base::flat_map<std::string, TensorPtr> input_map;

	// Loop over each feature.
	for (const auto& pair : input.features().feature()) {
	const NodeSpec& node_spec = node_spec_map.at(pair.first);
	std::vector<int64_t> dims(node_spec.dims().begin(), node_spec.dims().end());
	switch (pair.second.kind_case()) {
	case Feature::kFloatList: {
	// For FloatList, make a (1, n) tensor with the value.
	const auto& float_values = pair.second.float_list().value();
	input_map[pair.first] = NewTensor<double>(
	dims,
	std::vector<double>(float_values.begin(), float_values.end()));
	} break;
	case Feature::kInt64List: {
	// For Int64List, make a (1, n) tensor with the value.
	const auto& int_values = pair.second.int64_list().value();
	input_map[pair.first] = NewTensor<int64_t>(
	dims, std::vector<int64_t>(int_values.begin(), int_values.end()));
	} break;
	default:
	LOG(ERROR) << "InputType not supported.";
	NOTREACHED();
	break;
	}
	}
	return input_map;
	}

	// Converts the `accumulative_result` into BenchmarkResults.
	BenchmarkResults ToBenchmarkResults(AccumulativeResult* accumulative_result) {
	BenchmarkResults benchmark_result;
	benchmark_result.set_status(BenchmarkReturnStatus::OK);
	// Sets average accuracy.
	benchmark_result.set_total_accuracy(accumulative_result->total_error /
	accumulative_result->times_in_us.size());

	// Sorts all times_in_us for all the successful runs.
	std::sort(accumulative_result->times_in_us.begin(),
	accumulative_result->times_in_us.end());

	// Gets percentile for times_in_us.
	for (const int i : kLatencyPercentile) {
	const int pos = i * accumulative_result->times_in_us.size() / 100;
	CHECK(pos < accumulative_result->times_in_us.size())
	<< "percentile can't be 100";
	(*benchmark_result.mutable_percentile_latencies_in_us())[i] =
	accumulative_result->times_in_us[pos];
	}

	return benchmark_result;
	}

	// Check two tensors have the same shape and size; then calculate the L1
	// Distance between them, and add it to `accumulative result`.
	template <class T>
	void AccumulateDistance(const TensorPtr& tensor1,
	const TensorPtr& tensor2,
	AccumulativeResult* const accumulative_result) {
	if (tensor1->data->which() != tensor2->data->which()) {
	accumulative_result->error_message = "Tensor has different data type.";
	accumulative_result->has_failure = true;
	return;
	}
	const TensorView<T> tensor_view1(tensor1);
	const TensorView<T> tensor_view2(tensor2);
	if (!tensor_view1.IsValidType() \|\| !tensor_view1.IsValidFormat() \|\|
	!tensor_view2.IsValidType() \|\| !tensor_view2.IsValidFormat()) {
	accumulative_result->error_message = "Tensor type or format is invalid.";
	accumulative_result->has_failure = true;
	return;
	}
	if (tensor_view1.GetShape() != tensor_view2.GetShape() \|\|
	tensor_view1.GetValues().size() != tensor_view2.GetValues().size()) {
	accumulative_result->error_message = "Tensor has different shape or size.";
	accumulative_result->has_failure = true;
	return;
	}
	for (int j = 0; j < tensor_view1.GetValues().size(); ++j) {
	// accumulates the diff between elements.
	accumulative_result->total_error +=
	std::abs(tensor_view1.GetValues()[j] - tensor_view2.GetValues()[j]);
	}
	}

	// Calls Typed AccumulateDistance function above.
	void AccumulateDistance(const TensorPtr& tensor1,
	const TensorPtr& tensor2,
	AccumulativeResult* const accumulative_result) {
	switch (tensor1->data->which()) {
	case ValueList::Tag::INT64_LIST:
	AccumulateDistance<int64_t>(tensor1, tensor2, accumulative_result);
	return;
	case ValueList::Tag::FLOAT_LIST:
	AccumulateDistance<double>(tensor1, tensor2, accumulative_result);
	return;
	default:
	accumulative_result->error_message = "Tensor type is not supported.";
	accumulative_result->has_failure = true;
	LOG(ERROR)
	<< "Not supported tensor type for calculating AccumulateDistance.";
	NOTREACHED();
	return;
	}
	}

	BenchmarkResults InferenceForTfliteModel(
	const TfliteBenchmarkConfig& tflite_config,
	const FlatBufferModelSpecProto& model_proto,
	const ExpectedInputOutput& input_output) {
	// Initialization for the first time.
	InitializeOnce();

	BenchmarkResults benchmark_result;

	// Step 1: construct the model.
	mojo::Remote<Model> model;
	if (!ConstructModel(model_proto, &model)) {
	benchmark_result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
	benchmark_result.set_results_message(
	"Can't construct the Model from the model file.");
	return benchmark_result;
	}

	// Step 2: construct the graph executor.
	mojo::Remote<GraphExecutor> graph_executor;
	if (!ConstructGraphExecutor(model, &graph_executor)) {
	benchmark_result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
	benchmark_result.set_results_message(
	"Can't construct the GraphExecutor from the model.");
	return benchmark_result;
	}

	// Step 3: run inference multiple times.
	std::vector<std::string> output_name;
	for (const auto& pair : model_proto.required_outputs()) {
	output_name.push_back(pair.first);
	}

	AccumulativeResult accumulative_result;
	const base::flat_map<std::string, TensorPtr> expected_output =
	TensorMapFromExample(input_output.expected_output(),
	model_proto.required_outputs());

	for (int i = 0; i < tflite_config.num_runs(); ++i) {
	// Starts the timer.
	const std::clock_t start_time = std::clock();
	// Run infernce.
	graph_executor->Execute(
	TensorMapFromExample(input_output.input(),
	model_proto.required_inputs()),
	output_name,
	base::Bind(
	[](AccumulativeResult* accumulative_result,
	const std::vector<std::string>* const output_name,
	const base::flat_map<std::string, TensorPtr>* const
	expected_output,
	ExecuteResult result,
	base::Optional<std::vector<TensorPtr>> outputs) {
	// Check that the inference run successfully.
	if (result != ExecuteResult::OK \|\| !outputs.has_value()) {
	accumulative_result->error_message = "Inference not OK";
	accumulative_result->has_failure = true;
	return;
	}

	// Compare the output tensor with the expected tensor; add their
	// distance to the accumulative_result if two tensors have the
	// same type and shape.
	for (int i = 0; i < output_name->size(); ++i) {
	AccumulateDistance(outputs->at(i),
	expected_output->at(output_name->at(i)),
	accumulative_result);
	if (accumulative_result->has_failure) {
	return;
	}
	}
	},
	&accumulative_result, &output_name, &expected_output));
	base::RunLoop().RunUntilIdle();

	// Inference should always succeed; return error otherwise.
	if (accumulative_result.has_failure) {
	benchmark_result.set_status(BenchmarkReturnStatus::RUNTIME_ERROR);
	benchmark_result.set_results_message(accumulative_result.error_message);
	return benchmark_result;
	}

	// Records time.
	const int64_t cpu_time_us = static_cast<int64_t>(
	(std::clock() - start_time) * 1000000.0 / CLOCKS_PER_SEC);
	accumulative_result.times_in_us.push_back(cpu_time_us);
	}

	// Converts accumulative_result into BenchmarkResults.
	return ToBenchmarkResults(&accumulative_result);
	}

	} // namespace
	} // namespace ml

	int32_t benchmark_start(const void* config_bytes,
	int32_t config_bytes_size,
	void** results_bytes,
	int32_t* results_bytes_size) {
	CHECK(config_bytes);
	CHECK(results_bytes);
	CHECK(results_bytes_size);

	BenchmarkResults result;

	// Step 1 De-serialize the CrOSBenchmarkConfig.
	CrOSBenchmarkConfig benchmark_config;
	if (!benchmark_config.ParseFromArray(config_bytes, config_bytes_size)) {
	result.set_status(BenchmarkReturnStatus::INCORRECT_CONFIGURATION);
	result.set_results_message("Can't parse CrOSBenchmarkConfig.");
	return ml::SerializeResults(result, results_bytes, results_bytes_size);
	}

	// Step 2 Parse the TfliteBenchmarkConfig
	ml::TfliteBenchmarkConfig tflite_config;
	if (!TextFormat::ParseFromString(benchmark_config.driver_config(),
	&tflite_config)) {
	result.set_status(BenchmarkReturnStatus::INCORRECT_CONFIGURATION);
	result.set_results_message("Can't parse TfliteBenchmarkConfig.");
	return ml::SerializeResults(result, results_bytes, results_bytes_size);
	}

	// Step 3 Parse the FlatBufferModelSpecProto.
	ml::FlatBufferModelSpecProto model_proto;
	std::string model_buf;
	if (!base::ReadFileToString(
	base::FilePath(tflite_config.tflite_model_filepath()), &model_buf)) {
	result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
	result.set_results_message(tflite_config.tflite_model_filepath() +
	" can't be read.");
	return ml::SerializeResults(result, results_bytes, results_bytes_size);
	}
	if (!model_proto.ParseFromString(model_buf)) {
	result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
	result.set_results_message("Can't parse FlatBufferModelSpecProto");
	return ml::SerializeResults(result, results_bytes, results_bytes_size);
	}

	// Step 4 Parse the ExpectedInputOutput.
	ml::ExpectedInputOutput input_output;
	std::string input_buf;
	if (!base::ReadFileToString(
	base::FilePath(tflite_config.input_output_filepath()), &input_buf)) {
	result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
	result.set_results_message(tflite_config.input_output_filepath() +
	" can't be read.");
	return ml::SerializeResults(result, results_bytes, results_bytes_size);
	}
	if (!input_output.ParseFromString(input_buf)) {
	result.set_status(BenchmarkReturnStatus::INITIALIZATION_FAILED);
	result.set_results_message("Can't parse ExpectedInputOutput");
	return ml::SerializeResults(result, results_bytes, results_bytes_size);
	}

	// Step 5 runs InferenceForTfliteModel with the tflite_config,
	result =
	ml::InferenceForTfliteModel(tflite_config, model_proto, input_output);
	return ml::SerializeResults(result, results_bytes, results_bytes_size);
	}

	void free_benchmark_results(void* results_bytes) {
	delete[] static_cast<char*>(results_bytes);
	}