| // Copyright 2021 The Chromium OS Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // This file is mostly copied from chromium repo: |
| // //components/assist_ranker/example_preprocessing_unittest.cc |
| |
| #include "ml/example_preprocessor/example_preprocessing.h" |
| |
| #include <vector> |
| |
| #include <base/strings/string_number_conversions.h> |
| #include <google/protobuf/map.h> |
| #include <google/protobuf/repeated_field.h> |
| #include <gtest/gtest.h> |
| |
| namespace assist_ranker { |
| namespace { |
| |
| using ::google::protobuf::Map; |
| using ::google::protobuf::RepeatedField; |
| |
| void EXPECT_EQUALS_EXAMPLE(const RankerExample& example1, |
| const RankerExample& example2) { |
| EXPECT_EQ(example1.features_size(), example2.features_size()); |
| for (const auto& pair : example1.features()) { |
| const Feature& feature1 = pair.second; |
| const Feature& feature2 = example2.features().at(pair.first); |
| EXPECT_EQ(feature1.feature_type_case(), feature2.feature_type_case()); |
| EXPECT_EQ(feature1.bool_value(), feature2.bool_value()); |
| EXPECT_EQ(feature1.int32_value(), feature2.int32_value()); |
| EXPECT_EQ(feature1.float_value(), feature2.float_value()); |
| EXPECT_EQ(feature1.string_value(), feature2.string_value()); |
| EXPECT_EQ(feature1.string_list().string_value_size(), |
| feature2.string_list().string_value_size()); |
| for (int i = 0; i < feature1.string_list().string_value_size(); ++i) { |
| EXPECT_EQ(feature1.string_list().string_value(i), |
| feature2.string_list().string_value(i)); |
| } |
| } |
| } |
| |
| } // namespace |
| |
| class ExamplePreprocessorTest : public ::testing::Test { |
| protected: |
| void SetUp() override { |
| auto& features = *example_.mutable_features(); |
| features[bool_name_].set_bool_value(bool_value_); |
| features[int32_name_].set_int32_value(int32_value_); |
| features[float_name_].set_float_value(float_value_); |
| features[one_hot_name_].set_string_value(one_hot_value_); |
| *features[sparse_name_].mutable_string_list()->mutable_string_value() = { |
| sparse_values_.begin(), sparse_values_.end()}; |
| } |
| |
| RankerExample example_; |
| const std::string bool_name_ = "bool_feature"; |
| const bool bool_value_ = true; |
| const std::string int32_name_ = "int32_feature"; |
| const int int32_value_ = 2; |
| const std::string float_name_ = "float_feature"; |
| const float float_value_ = 3.0; |
| const std::string one_hot_name_ = "one_hot_feature"; |
| const std::string elem1_ = "elem1"; |
| const std::string elem2_ = "elem2"; |
| const std::string one_hot_value_ = elem1_; |
| const std::string sparse_name_ = "sparse_feature"; |
| const std::vector<std::string> sparse_values_ = {elem1_, elem2_}; |
| }; |
| |
| TEST_F(ExamplePreprocessorTest, AddMissingFeatures) { |
| RankerExample expected = example_; |
| ExamplePreprocessorConfig config; |
| |
| // Adding missing feature label to an existing feature has no effect. |
| config.add_missing_features(bool_name_); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kSuccess); |
| EXPECT_EQUALS_EXAMPLE(example_, expected); |
| config.Clear(); |
| |
| // Adding missing feature label to non-existing feature returns a |
| // "_MissingFeature" feature with a list of feature names. |
| const std::string foo = "foo"; |
| config.add_missing_features(foo); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kSuccess); |
| (*expected |
| .mutable_features())[ExamplePreprocessor::kMissingFeatureDefaultName] |
| .mutable_string_list() |
| ->add_string_value(foo); |
| EXPECT_EQUALS_EXAMPLE(example_, expected); |
| config.Clear(); |
| } |
| |
| TEST_F(ExamplePreprocessorTest, AddBucketizeFeatures) { |
| RankerExample expected = example_; |
| ExamplePreprocessorConfig config; |
| Map<std::string, ExamplePreprocessorConfig::Boundaries>& bucketizers = |
| *config.mutable_bucketizers(); |
| |
| // Adding bucketized feature to non-existing feature returns the same example. |
| const std::string foo = "foo"; |
| bucketizers[foo].add_boundaries(0.5); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kSuccess); |
| EXPECT_EQUALS_EXAMPLE(example_, expected); |
| config.Clear(); |
| |
| // Bucketizing a bool feature returns same proto. |
| bucketizers[bool_name_].add_boundaries(0.5); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kNonbucketizableFeatureType); |
| EXPECT_EQUALS_EXAMPLE(example_, expected); |
| config.Clear(); |
| |
| // Bucketizing a string feature returns same proto. |
| bucketizers[one_hot_name_].add_boundaries(0.5); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kNonbucketizableFeatureType); |
| EXPECT_EQUALS_EXAMPLE(example_, expected); |
| config.Clear(); |
| |
| // Bucketizing an int32 feature with 3 boundary. |
| bucketizers[int32_name_].add_boundaries(int32_value_ - 2); |
| bucketizers[int32_name_].add_boundaries(int32_value_ - 1); |
| bucketizers[int32_name_].add_boundaries(int32_value_ + 1); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kSuccess); |
| (*expected.mutable_features())[int32_name_].set_string_value("2"); |
| EXPECT_EQUALS_EXAMPLE(example_, expected); |
| config.Clear(); |
| |
| // Bucketizing a float feature with 3 boundary. |
| bucketizers[float_name_].add_boundaries(float_value_ - 0.2); |
| bucketizers[float_name_].add_boundaries(float_value_ - 0.1); |
| bucketizers[float_name_].add_boundaries(float_value_ + 0.1); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kSuccess); |
| (*expected.mutable_features())[float_name_].set_string_value("2"); |
| EXPECT_EQUALS_EXAMPLE(example_, expected); |
| config.Clear(); |
| |
| // Bucketizing a float feature with value equal to a boundary. |
| (*example_.mutable_features())[float_name_].set_float_value(float_value_); |
| bucketizers[float_name_].add_boundaries(float_value_ - 0.2); |
| bucketizers[float_name_].add_boundaries(float_value_ - 0.1); |
| bucketizers[float_name_].add_boundaries(float_value_); |
| bucketizers[float_name_].add_boundaries(float_value_ + 0.1); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kSuccess); |
| (*expected.mutable_features())[float_name_].set_string_value("3"); |
| EXPECT_EQUALS_EXAMPLE(example_, expected); |
| config.Clear(); |
| } |
| |
| // Tests normalization of float and int32 features. |
| TEST_F(ExamplePreprocessorTest, NormalizeFeatures) { |
| RankerExample expected = example_; |
| ExamplePreprocessorConfig config; |
| Map<std::string, float>& normalizers = *config.mutable_normalizers(); |
| normalizers[int32_name_] = int32_value_ - 1.0f; |
| normalizers[float_name_] = float_value_ + 1.0f; |
| |
| (*expected.mutable_features())[int32_name_].set_float_value(1.0f); |
| (*expected.mutable_features())[float_name_].set_float_value( |
| float_value_ / (float_value_ + 1.0f)); |
| |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kSuccess); |
| EXPECT_EQUALS_EXAMPLE(example_, expected); |
| |
| // Zero normalizer returns an error. |
| normalizers[float_name_] = 0.0f; |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kNormalizerIsZero); |
| } |
| |
| // Zero normalizer returns an error. |
| TEST_F(ExamplePreprocessorTest, ZeroNormalizerReturnsError) { |
| ExamplePreprocessorConfig config; |
| (*config.mutable_normalizers())[float_name_] = 0.0f; |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kNormalizerIsZero); |
| } |
| |
| // Tests converts a bool or int32 feature to a string feature. |
| TEST_F(ExamplePreprocessorTest, ConvertToStringFeatures) { |
| RankerExample expected = example_; |
| ExamplePreprocessorConfig config; |
| auto& features_list = *config.mutable_convert_to_string_features(); |
| *features_list.Add() = bool_name_; |
| *features_list.Add() = int32_name_; |
| *features_list.Add() = one_hot_name_; |
| |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kSuccess); |
| |
| (*expected.mutable_features())[bool_name_].set_string_value( |
| base::NumberToString(static_cast<int>(bool_value_))); |
| (*expected.mutable_features())[int32_name_].set_string_value( |
| base::NumberToString(int32_value_)); |
| EXPECT_EQUALS_EXAMPLE(example_, expected); |
| } |
| |
| // Float features can't be convert to string features. |
| TEST_F(ExamplePreprocessorTest, |
| ConvertFloatFeatureToStringFeatureReturnsError) { |
| ExamplePreprocessorConfig config; |
| config.add_convert_to_string_features(float_name_); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_), |
| ExamplePreprocessor::kNonConvertibleToStringFeatureType); |
| } |
| |
| TEST_F(ExamplePreprocessorTest, Vectorization) { |
| ExamplePreprocessorConfig config; |
| Map<std::string, int32_t>& feature_indices = |
| *config.mutable_feature_indices(); |
| |
| RankerExample example_vec_expected = example_; |
| RepeatedField<float>& feature_vector = |
| *(*example_vec_expected.mutable_features()) |
| [ExamplePreprocessor::kVectorizedFeatureDefaultName] |
| .mutable_float_list() |
| ->mutable_float_value(); |
| |
| // bool feature puts the value to the corresponding place. |
| feature_indices[bool_name_] = 0; |
| feature_vector.Add(1.0); |
| |
| // int32 feature puts the value to the corresponding place. |
| feature_indices[int32_name_] = 1; |
| feature_vector.Add(int32_value_); |
| |
| // float feature puts the value to the corresponding place. |
| feature_indices[float_name_] = 2; |
| feature_vector.Add(float_value_); |
| |
| // string value is vectorized as 1.0. |
| feature_indices[ExamplePreprocessor::FeatureFullname(one_hot_name_, |
| one_hot_value_)] = 3; |
| feature_vector.Add(1.0); |
| |
| // string list value is vectorized as 1.0. |
| feature_indices[ExamplePreprocessor::FeatureFullname(sparse_name_, elem1_)] = |
| 4; |
| feature_indices[ExamplePreprocessor::FeatureFullname(sparse_name_, elem2_)] = |
| 5; |
| feature_vector.Add(1.0); |
| feature_vector.Add(1.0); |
| |
| // string list value with element not in the example sets the corresponding |
| // place as 0.0; |
| feature_indices[ExamplePreprocessor::FeatureFullname(sparse_name_, "foo")] = |
| 5; |
| feature_vector.Add(0.0); |
| |
| // Non-existing feature puts 0 to the corresponding place. |
| feature_indices["bar"] = 6; |
| feature_vector.Add(0.0); |
| |
| // Verify the propressing result. |
| RankerExample example = example_; |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example), |
| ExamplePreprocessor::kSuccess); |
| EXPECT_EQUALS_EXAMPLE(example, example_vec_expected); |
| |
| // Example with extra numeric feature gets kNoFeatureIndexFound error; |
| RankerExample example_with_extra_numeric = example_; |
| (*example_with_extra_numeric.mutable_features())["foo"].set_float_value(1.0); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_with_extra_numeric), |
| ExamplePreprocessor::ExamplePreprocessor::kNoFeatureIndexFound); |
| |
| // Example with extra one-hot feature gets kNoFeatureIndexFound error; |
| RankerExample example_with_extra_one_hot = example_; |
| (*example_with_extra_one_hot.mutable_features())["foo"].set_string_value( |
| "bar"); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_with_extra_one_hot), |
| ExamplePreprocessor::ExamplePreprocessor::kNoFeatureIndexFound); |
| |
| // Example with extra sparse feature value gets kNoFeatureIndexFound error; |
| RankerExample example_with_extra_sparse = example_; |
| (*example_with_extra_sparse.mutable_features())[sparse_name_] |
| .mutable_string_list() |
| ->add_string_value("bar"); |
| EXPECT_EQ(ExamplePreprocessor::Process(config, &example_with_extra_sparse), |
| ExamplePreprocessor::ExamplePreprocessor::kNoFeatureIndexFound); |
| } |
| |
| TEST_F(ExamplePreprocessorTest, MultipleErrorCode) { |
| ExamplePreprocessorConfig config; |
| |
| (*config.mutable_feature_indices())[int32_name_] = 0; |
| (*config.mutable_feature_indices())[float_name_] = 1; |
| (*config.mutable_bucketizers())[one_hot_name_].add_boundaries(0.5); |
| RankerExample example_vec_expected = example_; |
| RepeatedField<float>& feature_vector = |
| *(*example_vec_expected.mutable_features()) |
| [ExamplePreprocessor::kVectorizedFeatureDefaultName] |
| .mutable_float_list() |
| ->mutable_float_value(); |
| |
| feature_vector.Add(int32_value_); |
| feature_vector.Add(float_value_); |
| |
| const int error_code = ExamplePreprocessor::Process(config, &example_); |
| // Error code contains features in example_ but not in feature_indices. |
| EXPECT_TRUE(error_code & ExamplePreprocessor::kNoFeatureIndexFound); |
| // Error code contains features that are not bucketizable. |
| EXPECT_TRUE(error_code & ExamplePreprocessor::kNonbucketizableFeatureType); |
| // No kInvalidFeatureType error. |
| EXPECT_FALSE(error_code & ExamplePreprocessor::kInvalidFeatureType); |
| // Only two elements is correctly vectorized. |
| EXPECT_EQUALS_EXAMPLE(example_, example_vec_expected); |
| } |
| |
| TEST_F(ExamplePreprocessorTest, ExampleFloatIterator) { |
| RankerExample float_example; |
| for (const auto& field : ExampleFloatIterator(example_)) { |
| EXPECT_EQ(field.error, ExamplePreprocessor::kSuccess); |
| (*float_example.mutable_features())[field.fullname].set_float_value( |
| field.value); |
| } |
| |
| RankerExample float_example_expected; |
| auto& feature_map = *float_example_expected.mutable_features(); |
| |
| feature_map[bool_name_].set_float_value(bool_value_); |
| feature_map[int32_name_].set_float_value(int32_value_); |
| feature_map[float_name_].set_float_value(float_value_); |
| feature_map[ExamplePreprocessor::FeatureFullname(one_hot_name_, |
| one_hot_value_)] |
| .set_float_value(1.0); |
| feature_map[ExamplePreprocessor::FeatureFullname(sparse_name_, elem1_)] |
| .set_float_value(1.0); |
| feature_map[ExamplePreprocessor::FeatureFullname(sparse_name_, elem2_)] |
| .set_float_value(1.0); |
| |
| EXPECT_EQUALS_EXAMPLE(float_example, float_example_expected); |
| } |
| |
| TEST_F(ExamplePreprocessorTest, ExampleFloatIteratorError) { |
| RankerExample example; |
| example.mutable_features()->insert({"foo", Feature::default_instance()}); |
| (*example.mutable_features())["bar"] |
| .mutable_string_list() |
| ->mutable_string_value(); |
| int num_of_fields = 0; |
| for (const auto& field : ExampleFloatIterator(example)) { |
| if (field.fullname == "foo") { |
| EXPECT_EQ(field.error, ExamplePreprocessor::kInvalidFeatureType); |
| } |
| if (field.fullname == "bar") { |
| EXPECT_EQ(field.error, ExamplePreprocessor::kInvalidFeatureListIndex); |
| } |
| ++num_of_fields; |
| } |
| // Check the iterator indeed found the two fields. |
| EXPECT_EQ(num_of_fields, 2); |
| } |
| |
| } // namespace assist_ranker |