blob: 979d02c42ad81028b63a36204097fa9ba08ac12f [file] [log] [blame] [edit]
/*
* Copyright 2021 The ChromiumOS Authors
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "cros-camera/camera_face_detection.h"
#include <optional>
#include <string>
#include <base/files/file_path.h>
#include <base/files/file_util.h>
#include <base/memory/ptr_util.h>
#include <base/posix/safe_strerror.h>
#include <libyuv.h>
#include "common/common_tracing.h"
#include "cros-camera/common.h"
#include "cros-camera/future.h"
#include "cros-camera/tracing.h"
namespace cros {
namespace {
// This class only supports gray type model. See go/facessd for more details.
constexpr char kFaceModelPath[] =
"/usr/share/cros-camera/ml_models/fssd_small_8bit_gray_4orient_v4.tflite";
constexpr char kFaceAnchorPath[] =
"/usr/share/cros-camera/ml_models/fssd_anchors_v4.pb";
constexpr float kScoreThreshold = 0.5;
constexpr int kImageSizeForDetection = 160;
constexpr int kDetectTimeoutMs = 1000;
} // namespace
const char* FaceDetectResultToString(FaceDetectResult detect_result) {
switch (detect_result) {
case FaceDetectResult::kDetectOk:
return "DetectOk";
case FaceDetectResult::kDetectError:
return "DetectError";
case FaceDetectResult::kBufferError:
return "BufferError";
case FaceDetectResult::kTransformError:
return "TransformError";
case FaceDetectResult::kTimeoutError:
return "TimeoutError";
}
}
// static
std::unique_ptr<FaceDetector> FaceDetector::Create() {
if (!base::PathExists(base::FilePath(kFaceModelPath)) ||
!base::PathExists(base::FilePath(kFaceAnchorPath))) {
LOGF(ERROR) << "Cannot find face detection model file or anchor file";
return nullptr;
}
auto wrapper =
std::make_unique<human_sensing::FaceDetectorClientCrosWrapper>();
if (!wrapper->Initialize(std::string(kFaceModelPath),
std::string(kFaceAnchorPath), kScoreThreshold)) {
return nullptr;
}
return base::WrapUnique(new FaceDetector(std::move(wrapper)));
}
FaceDetector::FaceDetector(
std::unique_ptr<human_sensing::FaceDetectorClientCrosWrapper> wrapper)
: buffer_manager_(CameraBufferManager::GetInstance()),
wrapper_(std::move(wrapper)),
thread_("FaceDetectorThread") {
CHECK(thread_.Start());
}
FaceDetector::~FaceDetector() {
thread_.Stop();
}
FaceDetectResult FaceDetector::Detect(
buffer_handle_t buffer,
std::vector<human_sensing::CrosFace>* faces,
std::optional<Size> active_sensor_array_size) {
auto future = Future<FaceDetectResult>::Create(nullptr);
DetectAsync(buffer, active_sensor_array_size,
base::BindOnce(
[](base::OnceCallback<void(FaceDetectResult)> callback,
std::vector<human_sensing::CrosFace>* out_faces,
FaceDetectResult detect_result,
std::vector<human_sensing::CrosFace> faces) {
*out_faces = std::move(faces);
std::move(callback).Run(detect_result);
},
GetFutureCallback(future), faces));
if (!future->Wait(kDetectTimeoutMs)) {
LOGF(ERROR) << "Face detection timeout";
return FaceDetectResult::kTimeoutError;
}
return future->Get();
}
FaceDetectResult FaceDetector::Detect(
const uint8_t* buffer_addr,
int input_stride,
Size input_size,
std::vector<human_sensing::CrosFace>* faces,
std::optional<Size> active_sensor_array_size) {
auto future = Future<FaceDetectResult>::Create(nullptr);
DetectAsync(buffer_addr, input_stride, input_size, active_sensor_array_size,
base::BindOnce(
[](base::OnceCallback<void(FaceDetectResult)> callback,
std::vector<human_sensing::CrosFace>* out_faces,
FaceDetectResult detect_result,
std::vector<human_sensing::CrosFace> faces) {
*out_faces = std::move(faces);
std::move(callback).Run(detect_result);
},
GetFutureCallback(future), faces));
if (!future->Wait(kDetectTimeoutMs)) {
LOGF(ERROR) << "Face detection timeout";
return FaceDetectResult::kTimeoutError;
}
return future->Get();
}
void FaceDetector::DetectAsync(buffer_handle_t buffer,
std::optional<Size> active_sensor_array_size,
ResultCallback result_callback) {
TRACE_COMMON(
perfetto::Flow::ProcessScoped(reinterpret_cast<uintptr_t>(buffer)));
ScopedMapping mapping(buffer);
if (!mapping.is_valid()) {
LOGF(ERROR) << "Failed to map buffer";
std::move(result_callback).Run(FaceDetectResult::kBufferError, {});
return;
}
int input_stride = mapping.plane(0).stride;
Size input_size = Size(buffer_manager_->GetWidth(buffer),
buffer_manager_->GetHeight(buffer));
const uint8_t* buffer_addr = static_cast<uint8_t*>(mapping.plane(0).addr);
DetectAsync(buffer_addr, input_stride, input_size, active_sensor_array_size,
std::move(result_callback));
}
void FaceDetector::DetectAsync(const uint8_t* buffer_addr,
int input_stride,
Size input_size,
std::optional<Size> active_sensor_array_size,
ResultCallback result_callback) {
auto buffer_released = Future<void>::Create(nullptr);
thread_.task_runner()->PostTask(
FROM_HERE,
base::BindOnce(&FaceDetector::DetectOnThread, base::Unretained(this),
buffer_addr, input_stride, input_size,
active_sensor_array_size, std::move(result_callback),
GetFutureCallback(buffer_released)));
if (!buffer_released->Wait(kDetectTimeoutMs)) {
LOGF(ERROR) << "Face detection timeout in releasing input buffer";
}
}
void FaceDetector::DetectOnThread(const uint8_t* buffer_addr,
int input_stride,
Size input_size,
std::optional<Size> active_sensor_array_size,
ResultCallback result_callback,
base::OnceClosure buffer_release_callback) {
DCHECK(thread_.task_runner()->BelongsToCurrentThread());
CHECK(buffer_addr);
TRACE_COMMON("width", input_size.width, "height", input_size.height);
Size scaled_size =
(input_size.width > input_size.height)
? Size(kImageSizeForDetection,
kImageSizeForDetection * input_size.height / input_size.width)
: Size(kImageSizeForDetection * input_size.width / input_size.height,
kImageSizeForDetection);
PrepareBuffer(scaled_size);
libyuv::ScalePlane(buffer_addr, input_stride, input_size.width,
input_size.height, scaled_buffer_.data(),
scaled_size.width, scaled_size.width, scaled_size.height,
libyuv::FilterMode::kFilterNone);
std::move(buffer_release_callback).Run();
std::vector<human_sensing::CrosFace> faces;
{
TRACE_EVENT_BEGIN(kCameraTraceCategoryCommon, "FaceDetector::Detect::Run");
if (!wrapper_->Detect(scaled_buffer_.data(), scaled_size.width,
scaled_size.height, &faces)) {
std::move(result_callback).Run(FaceDetectResult::kDetectError, {});
return;
}
TRACE_EVENT_END(kCameraTraceCategoryCommon, "num_faces", faces.size());
}
if (!faces.empty()) {
float ratio = static_cast<float>(input_size.width) /
static_cast<float>(scaled_size.width);
for (auto& f : faces) {
f.bounding_box.x1 *= ratio;
f.bounding_box.y1 *= ratio;
f.bounding_box.x2 *= ratio;
f.bounding_box.y2 *= ratio;
for (auto& l : f.landmarks) {
l.x *= ratio;
l.y *= ratio;
}
}
}
if (active_sensor_array_size) {
std::optional<std::tuple<float, float, float>> transform =
GetCoordinateTransform(input_size, *active_sensor_array_size);
if (!transform) {
std::move(result_callback).Run(FaceDetectResult::kTransformError, {});
return;
}
const float scale = std::get<0>(*transform);
const float offset_x = std::get<1>(*transform);
const float offset_y = std::get<2>(*transform);
for (auto& f : faces) {
f.bounding_box.x1 = scale * f.bounding_box.x1 + offset_x;
f.bounding_box.y1 = scale * f.bounding_box.y1 + offset_y;
f.bounding_box.x2 = scale * f.bounding_box.x2 + offset_x;
f.bounding_box.y2 = scale * f.bounding_box.y2 + offset_y;
for (auto& l : f.landmarks) {
l.x = scale * l.x + offset_x;
l.y = scale * l.y + offset_y;
}
}
}
std::move(result_callback).Run(FaceDetectResult::kDetectOk, std::move(faces));
}
// static
std::optional<std::tuple<float, float, float>>
FaceDetector::GetCoordinateTransform(const Size src, const Size dst) {
if (src.width > dst.width || src.height > dst.height) {
return std::nullopt;
}
const float width_ratio = static_cast<float>(dst.width) / src.width;
const float height_ratio = static_cast<float>(dst.height) / src.height;
const float scaling = std::min(width_ratio, height_ratio);
float offset_x = 0.0f, offset_y = 0.0f;
if (width_ratio < height_ratio) {
// |dst| has larger height than |src| * scaling.
offset_y = (dst.height - (src.height * scaling)) / 2;
} else {
// |dst| has larger width than |src| * scaling.
offset_x = (dst.width - (src.width * scaling)) / 2;
}
return std::make_tuple(scaling, offset_x, offset_y);
}
void FaceDetector::PrepareBuffer(Size img_size) {
size_t new_size = img_size.width * img_size.height;
if (new_size > scaled_buffer_.size()) {
scaled_buffer_.resize(new_size);
}
}
std::string LandmarkTypeToString(human_sensing::Landmark::Type type) {
switch (type) {
case human_sensing::Landmark::Type::kLeftEye:
return "LeftEye";
case human_sensing::Landmark::Type::kRightEye:
return "RightEye";
case human_sensing::Landmark::Type::kNoseTip:
return "NoseTip";
case human_sensing::Landmark::Type::kMouthCenter:
return "MouthCenter";
case human_sensing::Landmark::Type::kLeftEarTragion:
return "LeftEarTragion";
case human_sensing::Landmark::Type::kRightEarTragion:
return "RightEarTragion";
case human_sensing::Landmark::Type::kLandmarkUnknown:
return "Unknown";
}
return base::StringPrintf("Undefined landmark type %d",
static_cast<int>(type));
}
} // namespace cros