camera/common/camera_face_detection.cc - third_party/platform2 - Git at Google

 /*
  * Copyright 2021 The ChromiumOS Authors
  * Use of this source code is governed by a BSD-style license that can be
  * found in the LICENSE file.
  */

 #include "cros-camera/camera_face_detection.h"

 #include <optional>
 #include <string>

 #include <base/files/file_path.h>
 #include <base/files/file_util.h>
 #include <base/memory/ptr_util.h>
 #include <base/posix/safe_strerror.h>
 #include <libyuv.h>

 #include "common/common_tracing.h"
 #include "cros-camera/common.h"
 #include "cros-camera/future.h"
 #include "cros-camera/tracing.h"

 namespace cros {

 namespace {

 // This class only supports gray type model. See go/facessd for more details.
 constexpr char kFaceModelPath[] =
     "/usr/share/cros-camera/ml_models/fssd_small_8bit_gray_4orient_v4.tflite";
 constexpr char kFaceAnchorPath[] =
     "/usr/share/cros-camera/ml_models/fssd_anchors_v4.pb";
 constexpr float kScoreThreshold = 0.5;
 constexpr int kImageSizeForDetection = 160;
 constexpr int kDetectTimeoutMs = 1000;

 }  // namespace

 const char* FaceDetectResultToString(FaceDetectResult detect_result) {
   switch (detect_result) {
     case FaceDetectResult::kDetectOk:
       return "DetectOk";
     case FaceDetectResult::kDetectError:
       return "DetectError";
     case FaceDetectResult::kBufferError:
       return "BufferError";
     case FaceDetectResult::kTransformError:
       return "TransformError";
     case FaceDetectResult::kTimeoutError:
       return "TimeoutError";
   }
 }

 // static
 std::unique_ptr<FaceDetector> FaceDetector::Create() {
   if (!base::PathExists(base::FilePath(kFaceModelPath)) ||
       !base::PathExists(base::FilePath(kFaceAnchorPath))) {
     LOGF(ERROR) << "Cannot find face detection model file or anchor file";
     return nullptr;
   }

   auto wrapper =
       std::make_unique<human_sensing::FaceDetectorClientCrosWrapper>();
   if (!wrapper->Initialize(std::string(kFaceModelPath),
                            std::string(kFaceAnchorPath), kScoreThreshold)) {
     return nullptr;
   }
   return base::WrapUnique(new FaceDetector(std::move(wrapper)));
 }

 FaceDetector::FaceDetector(
     std::unique_ptr<human_sensing::FaceDetectorClientCrosWrapper> wrapper)
     : buffer_manager_(CameraBufferManager::GetInstance()),
       wrapper_(std::move(wrapper)),
       thread_("FaceDetectorThread") {
   CHECK(thread_.Start());
 }

 FaceDetector::~FaceDetector() {
   thread_.Stop();
 }

 FaceDetectResult FaceDetector::Detect(
     buffer_handle_t buffer,
     std::vector<human_sensing::CrosFace>* faces,
     std::optional<Size> active_sensor_array_size) {
   auto future = Future<FaceDetectResult>::Create(nullptr);
   DetectAsync(buffer, active_sensor_array_size,
               base::BindOnce(
                   [](base::OnceCallback<void(FaceDetectResult)> callback,
                      std::vector<human_sensing::CrosFace>* out_faces,
                      FaceDetectResult detect_result,
                      std::vector<human_sensing::CrosFace> faces) {
                     *out_faces = std::move(faces);
                     std::move(callback).Run(detect_result);
                   },
                   GetFutureCallback(future), faces));
   if (!future->Wait(kDetectTimeoutMs)) {
     LOGF(ERROR) << "Face detection timeout";
     return FaceDetectResult::kTimeoutError;
   }
   return future->Get();
 }

 FaceDetectResult FaceDetector::Detect(
     const uint8_t* buffer_addr,
     int input_stride,
     Size input_size,
     std::vector<human_sensing::CrosFace>* faces,
     std::optional<Size> active_sensor_array_size) {
   auto future = Future<FaceDetectResult>::Create(nullptr);
   DetectAsync(buffer_addr, input_stride, input_size, active_sensor_array_size,
               base::BindOnce(
                   [](base::OnceCallback<void(FaceDetectResult)> callback,
                      std::vector<human_sensing::CrosFace>* out_faces,
                      FaceDetectResult detect_result,
                      std::vector<human_sensing::CrosFace> faces) {
                     *out_faces = std::move(faces);
                     std::move(callback).Run(detect_result);
                   },
                   GetFutureCallback(future), faces));
   if (!future->Wait(kDetectTimeoutMs)) {
     LOGF(ERROR) << "Face detection timeout";
     return FaceDetectResult::kTimeoutError;
   }
   return future->Get();
 }

 void FaceDetector::DetectAsync(buffer_handle_t buffer,
                                std::optional<Size> active_sensor_array_size,
                                ResultCallback result_callback) {
   TRACE_COMMON(
       perfetto::Flow::ProcessScoped(reinterpret_cast<uintptr_t>(buffer)));
   ScopedMapping mapping(buffer);
   if (!mapping.is_valid()) {
     LOGF(ERROR) << "Failed to map buffer";
     std::move(result_callback).Run(FaceDetectResult::kBufferError, {});
     return;
   }
   int input_stride = mapping.plane(0).stride;
   Size input_size = Size(buffer_manager_->GetWidth(buffer),
                          buffer_manager_->GetHeight(buffer));
   const uint8_t* buffer_addr = static_cast<uint8_t*>(mapping.plane(0).addr);

   DetectAsync(buffer_addr, input_stride, input_size, active_sensor_array_size,
               std::move(result_callback));
 }

 void FaceDetector::DetectAsync(const uint8_t* buffer_addr,
                                int input_stride,
                                Size input_size,
                                std::optional<Size> active_sensor_array_size,
                                ResultCallback result_callback) {
   auto buffer_released = Future<void>::Create(nullptr);
   thread_.task_runner()->PostTask(
       FROM_HERE,
       base::BindOnce(&FaceDetector::DetectOnThread, base::Unretained(this),
                      buffer_addr, input_stride, input_size,
                      active_sensor_array_size, std::move(result_callback),
                      GetFutureCallback(buffer_released)));
   if (!buffer_released->Wait(kDetectTimeoutMs)) {
     LOGF(ERROR) << "Face detection timeout in releasing input buffer";
   }
 }

 void FaceDetector::DetectOnThread(const uint8_t* buffer_addr,
                                   int input_stride,
                                   Size input_size,
                                   std::optional<Size> active_sensor_array_size,
                                   ResultCallback result_callback,
                                   base::OnceClosure buffer_release_callback) {
   DCHECK(thread_.task_runner()->BelongsToCurrentThread());
   CHECK(buffer_addr);
   TRACE_COMMON("width", input_size.width, "height", input_size.height);

   Size scaled_size =
       (input_size.width > input_size.height)
           ? Size(kImageSizeForDetection,
                  kImageSizeForDetection * input_size.height / input_size.width)
           : Size(kImageSizeForDetection * input_size.width / input_size.height,
                  kImageSizeForDetection);

   PrepareBuffer(scaled_size);

   libyuv::ScalePlane(buffer_addr, input_stride, input_size.width,
                      input_size.height, scaled_buffer_.data(),
                      scaled_size.width, scaled_size.width, scaled_size.height,
                      libyuv::FilterMode::kFilterNone);
   std::move(buffer_release_callback).Run();

   std::vector<human_sensing::CrosFace> faces;
   {
     TRACE_EVENT_BEGIN(kCameraTraceCategoryCommon, "FaceDetector::Detect::Run");
     if (!wrapper_->Detect(scaled_buffer_.data(), scaled_size.width,
                           scaled_size.height, &faces)) {
       std::move(result_callback).Run(FaceDetectResult::kDetectError, {});
       return;
     }
     TRACE_EVENT_END(kCameraTraceCategoryCommon, "num_faces", faces.size());
   }

   if (!faces.empty()) {
     float ratio = static_cast<float>(input_size.width) /
                   static_cast<float>(scaled_size.width);
     for (auto& f : faces) {
       f.bounding_box.x1 *= ratio;
       f.bounding_box.y1 *= ratio;
       f.bounding_box.x2 *= ratio;
       f.bounding_box.y2 *= ratio;
       for (auto& l : f.landmarks) {
         l.x *= ratio;
         l.y *= ratio;
       }
     }
   }

   if (active_sensor_array_size) {
     std::optional<std::tuple<float, float, float>> transform =
         GetCoordinateTransform(input_size, *active_sensor_array_size);
     if (!transform) {
       std::move(result_callback).Run(FaceDetectResult::kTransformError, {});
       return;
     }
     const float scale = std::get<0>(*transform);
     const float offset_x = std::get<1>(*transform);
     const float offset_y = std::get<2>(*transform);
     for (auto& f : faces) {
       f.bounding_box.x1 = scale * f.bounding_box.x1 + offset_x;
       f.bounding_box.y1 = scale * f.bounding_box.y1 + offset_y;
       f.bounding_box.x2 = scale * f.bounding_box.x2 + offset_x;
       f.bounding_box.y2 = scale * f.bounding_box.y2 + offset_y;
       for (auto& l : f.landmarks) {
         l.x = scale * l.x + offset_x;
         l.y = scale * l.y + offset_y;
       }
     }
   }

   std::move(result_callback).Run(FaceDetectResult::kDetectOk, std::move(faces));
 }

 // static
 std::optional<std::tuple<float, float, float>>
 FaceDetector::GetCoordinateTransform(const Size src, const Size dst) {
   if (src.width > dst.width || src.height > dst.height) {
     return std::nullopt;
   }
   const float width_ratio = static_cast<float>(dst.width) / src.width;
   const float height_ratio = static_cast<float>(dst.height) / src.height;
   const float scaling = std::min(width_ratio, height_ratio);
   float offset_x = 0.0f, offset_y = 0.0f;
   if (width_ratio < height_ratio) {
     // |dst| has larger height than |src| * scaling.
     offset_y = (dst.height - (src.height * scaling)) / 2;
   } else {
     // |dst| has larger width than |src| * scaling.
     offset_x = (dst.width - (src.width * scaling)) / 2;
   }
   return std::make_tuple(scaling, offset_x, offset_y);
 }

 void FaceDetector::PrepareBuffer(Size img_size) {
   size_t new_size = img_size.width * img_size.height;
   if (new_size > scaled_buffer_.size()) {
     scaled_buffer_.resize(new_size);
   }
 }

 std::string LandmarkTypeToString(human_sensing::Landmark::Type type) {
   switch (type) {
     case human_sensing::Landmark::Type::kLeftEye:
       return "LeftEye";
     case human_sensing::Landmark::Type::kRightEye:
       return "RightEye";
     case human_sensing::Landmark::Type::kNoseTip:
       return "NoseTip";
     case human_sensing::Landmark::Type::kMouthCenter:
       return "MouthCenter";
     case human_sensing::Landmark::Type::kLeftEarTragion:
       return "LeftEarTragion";
     case human_sensing::Landmark::Type::kRightEarTragion:
       return "RightEarTragion";
     case human_sensing::Landmark::Type::kLandmarkUnknown:
       return "Unknown";
   }
   return base::StringPrintf("Undefined landmark type %d",
                             static_cast<int>(type));
 }

 }  // namespace cros
	/*
	* Copyright 2021 The ChromiumOS Authors
	* Use of this source code is governed by a BSD-style license that can be
	* found in the LICENSE file.
	*/

	#include "cros-camera/camera_face_detection.h"

	#include <optional>
	#include <string>

	#include <base/files/file_path.h>
	#include <base/files/file_util.h>
	#include <base/memory/ptr_util.h>
	#include <base/posix/safe_strerror.h>
	#include <libyuv.h>

	#include "common/common_tracing.h"
	#include "cros-camera/common.h"
	#include "cros-camera/future.h"
	#include "cros-camera/tracing.h"

	namespace cros {

	namespace {

	// This class only supports gray type model. See go/facessd for more details.
	constexpr char kFaceModelPath[] =
	"/usr/share/cros-camera/ml_models/fssd_small_8bit_gray_4orient_v4.tflite";
	constexpr char kFaceAnchorPath[] =
	"/usr/share/cros-camera/ml_models/fssd_anchors_v4.pb";
	constexpr float kScoreThreshold = 0.5;
	constexpr int kImageSizeForDetection = 160;
	constexpr int kDetectTimeoutMs = 1000;

	} // namespace

	const char* FaceDetectResultToString(FaceDetectResult detect_result) {
	switch (detect_result) {
	case FaceDetectResult::kDetectOk:
	return "DetectOk";
	case FaceDetectResult::kDetectError:
	return "DetectError";
	case FaceDetectResult::kBufferError:
	return "BufferError";
	case FaceDetectResult::kTransformError:
	return "TransformError";
	case FaceDetectResult::kTimeoutError:
	return "TimeoutError";
	}
	}

	// static
	std::unique_ptr<FaceDetector> FaceDetector::Create() {
	if (!base::PathExists(base::FilePath(kFaceModelPath)) \|\|
	!base::PathExists(base::FilePath(kFaceAnchorPath))) {
	LOGF(ERROR) << "Cannot find face detection model file or anchor file";
	return nullptr;
	}

	auto wrapper =
	std::make_unique<human_sensing::FaceDetectorClientCrosWrapper>();
	if (!wrapper->Initialize(std::string(kFaceModelPath),
	std::string(kFaceAnchorPath), kScoreThreshold)) {
	return nullptr;
	}
	return base::WrapUnique(new FaceDetector(std::move(wrapper)));
	}

	FaceDetector::FaceDetector(
	std::unique_ptr<human_sensing::FaceDetectorClientCrosWrapper> wrapper)
	: buffer_manager_(CameraBufferManager::GetInstance()),
	wrapper_(std::move(wrapper)),
	thread_("FaceDetectorThread") {
	CHECK(thread_.Start());
	}

	FaceDetector::~FaceDetector() {
	thread_.Stop();
	}

	FaceDetectResult FaceDetector::Detect(
	buffer_handle_t buffer,
	std::vector<human_sensing::CrosFace>* faces,
	std::optional<Size> active_sensor_array_size) {
	auto future = Future<FaceDetectResult>::Create(nullptr);
	DetectAsync(buffer, active_sensor_array_size,
	base::BindOnce(
	[](base::OnceCallback<void(FaceDetectResult)> callback,
	std::vector<human_sensing::CrosFace>* out_faces,
	FaceDetectResult detect_result,
	std::vector<human_sensing::CrosFace> faces) {
	*out_faces = std::move(faces);
	std::move(callback).Run(detect_result);
	},
	GetFutureCallback(future), faces));
	if (!future->Wait(kDetectTimeoutMs)) {
	LOGF(ERROR) << "Face detection timeout";
	return FaceDetectResult::kTimeoutError;
	}
	return future->Get();
	}

	FaceDetectResult FaceDetector::Detect(
	const uint8_t* buffer_addr,
	int input_stride,
	Size input_size,
	std::vector<human_sensing::CrosFace>* faces,
	std::optional<Size> active_sensor_array_size) {
	auto future = Future<FaceDetectResult>::Create(nullptr);
	DetectAsync(buffer_addr, input_stride, input_size, active_sensor_array_size,
	base::BindOnce(
	[](base::OnceCallback<void(FaceDetectResult)> callback,
	std::vector<human_sensing::CrosFace>* out_faces,
	FaceDetectResult detect_result,
	std::vector<human_sensing::CrosFace> faces) {
	*out_faces = std::move(faces);
	std::move(callback).Run(detect_result);
	},
	GetFutureCallback(future), faces));
	if (!future->Wait(kDetectTimeoutMs)) {
	LOGF(ERROR) << "Face detection timeout";
	return FaceDetectResult::kTimeoutError;
	}
	return future->Get();
	}

	void FaceDetector::DetectAsync(buffer_handle_t buffer,
	std::optional<Size> active_sensor_array_size,
	ResultCallback result_callback) {
	TRACE_COMMON(
	perfetto::Flow::ProcessScoped(reinterpret_cast<uintptr_t>(buffer)));
	ScopedMapping mapping(buffer);
	if (!mapping.is_valid()) {
	LOGF(ERROR) << "Failed to map buffer";
	std::move(result_callback).Run(FaceDetectResult::kBufferError, {});
	return;
	}
	int input_stride = mapping.plane(0).stride;
	Size input_size = Size(buffer_manager_->GetWidth(buffer),
	buffer_manager_->GetHeight(buffer));
	const uint8_t* buffer_addr = static_cast<uint8_t*>(mapping.plane(0).addr);

	DetectAsync(buffer_addr, input_stride, input_size, active_sensor_array_size,
	std::move(result_callback));
	}

	void FaceDetector::DetectAsync(const uint8_t* buffer_addr,
	int input_stride,
	Size input_size,
	std::optional<Size> active_sensor_array_size,
	ResultCallback result_callback) {
	auto buffer_released = Future<void>::Create(nullptr);
	thread_.task_runner()->PostTask(
	FROM_HERE,
	base::BindOnce(&FaceDetector::DetectOnThread, base::Unretained(this),
	buffer_addr, input_stride, input_size,
	active_sensor_array_size, std::move(result_callback),
	GetFutureCallback(buffer_released)));
	if (!buffer_released->Wait(kDetectTimeoutMs)) {
	LOGF(ERROR) << "Face detection timeout in releasing input buffer";
	}
	}

	void FaceDetector::DetectOnThread(const uint8_t* buffer_addr,
	int input_stride,
	Size input_size,
	std::optional<Size> active_sensor_array_size,
	ResultCallback result_callback,
	base::OnceClosure buffer_release_callback) {
	DCHECK(thread_.task_runner()->BelongsToCurrentThread());
	CHECK(buffer_addr);
	TRACE_COMMON("width", input_size.width, "height", input_size.height);

	Size scaled_size =
	(input_size.width > input_size.height)
	? Size(kImageSizeForDetection,
	kImageSizeForDetection * input_size.height / input_size.width)
	: Size(kImageSizeForDetection * input_size.width / input_size.height,
	kImageSizeForDetection);

	PrepareBuffer(scaled_size);

	libyuv::ScalePlane(buffer_addr, input_stride, input_size.width,
	input_size.height, scaled_buffer_.data(),
	scaled_size.width, scaled_size.width, scaled_size.height,
	libyuv::FilterMode::kFilterNone);
	std::move(buffer_release_callback).Run();

	std::vector<human_sensing::CrosFace> faces;
	{
	TRACE_EVENT_BEGIN(kCameraTraceCategoryCommon, "FaceDetector::Detect::Run");
	if (!wrapper_->Detect(scaled_buffer_.data(), scaled_size.width,
	scaled_size.height, &faces)) {
	std::move(result_callback).Run(FaceDetectResult::kDetectError, {});
	return;
	}
	TRACE_EVENT_END(kCameraTraceCategoryCommon, "num_faces", faces.size());
	}

	if (!faces.empty()) {
	float ratio = static_cast<float>(input_size.width) /
	static_cast<float>(scaled_size.width);
	for (auto& f : faces) {
	f.bounding_box.x1 *= ratio;
	f.bounding_box.y1 *= ratio;
	f.bounding_box.x2 *= ratio;
	f.bounding_box.y2 *= ratio;
	for (auto& l : f.landmarks) {
	l.x *= ratio;
	l.y *= ratio;
	}
	}
	}

	if (active_sensor_array_size) {
	std::optional<std::tuple<float, float, float>> transform =
	GetCoordinateTransform(input_size, *active_sensor_array_size);
	if (!transform) {
	std::move(result_callback).Run(FaceDetectResult::kTransformError, {});
	return;
	}
	const float scale = std::get<0>(*transform);
	const float offset_x = std::get<1>(*transform);
	const float offset_y = std::get<2>(*transform);
	for (auto& f : faces) {
	f.bounding_box.x1 = scale * f.bounding_box.x1 + offset_x;
	f.bounding_box.y1 = scale * f.bounding_box.y1 + offset_y;
	f.bounding_box.x2 = scale * f.bounding_box.x2 + offset_x;
	f.bounding_box.y2 = scale * f.bounding_box.y2 + offset_y;
	for (auto& l : f.landmarks) {
	l.x = scale * l.x + offset_x;
	l.y = scale * l.y + offset_y;
	}
	}
	}

	std::move(result_callback).Run(FaceDetectResult::kDetectOk, std::move(faces));
	}

	// static
	std::optional<std::tuple<float, float, float>>
	FaceDetector::GetCoordinateTransform(const Size src, const Size dst) {
	if (src.width > dst.width \|\| src.height > dst.height) {
	return std::nullopt;
	}
	const float width_ratio = static_cast<float>(dst.width) / src.width;
	const float height_ratio = static_cast<float>(dst.height) / src.height;
	const float scaling = std::min(width_ratio, height_ratio);
	float offset_x = 0.0f, offset_y = 0.0f;
	if (width_ratio < height_ratio) {
	// \|dst\| has larger height than \|src\| * scaling.
	offset_y = (dst.height - (src.height * scaling)) / 2;
	} else {
	// \|dst\| has larger width than \|src\| * scaling.
	offset_x = (dst.width - (src.width * scaling)) / 2;
	}
	return std::make_tuple(scaling, offset_x, offset_y);
	}

	void FaceDetector::PrepareBuffer(Size img_size) {
	size_t new_size = img_size.width * img_size.height;
	if (new_size > scaled_buffer_.size()) {
	scaled_buffer_.resize(new_size);
	}
	}

	std::string LandmarkTypeToString(human_sensing::Landmark::Type type) {
	switch (type) {
	case human_sensing::Landmark::Type::kLeftEye:
	return "LeftEye";
	case human_sensing::Landmark::Type::kRightEye:
	return "RightEye";
	case human_sensing::Landmark::Type::kNoseTip:
	return "NoseTip";
	case human_sensing::Landmark::Type::kMouthCenter:
	return "MouthCenter";
	case human_sensing::Landmark::Type::kLeftEarTragion:
	return "LeftEarTragion";
	case human_sensing::Landmark::Type::kRightEarTragion:
	return "RightEarTragion";
	case human_sensing::Landmark::Type::kLandmarkUnknown:
	return "Unknown";
	}
	return base::StringPrintf("Undefined landmark type %d",
	static_cast<int>(type));
	}

	} // namespace cros