| // Copyright 2020 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // API of the text classifier (libtextclassifier). |
| |
| // NOTE: This mojom exists in two places and must be kept in sync: |
| // Chromium: //chromeos/services/machine_learning/public/mojom/ |
| // Chrome OS: src/platform2/ml/mojom/ |
| // Example: A backwards-compatible mojom change (and corresponding |
| // implementation change) can be made in Chrome OS first, then replicated to the |
| // client (Chromium) later. |
| |
| module chromeos.machine_learning.mojom; |
| |
| // NOTE: The base directory for 'import' statements is expected to differ |
| // between Chromium and Chrome OS versions of this file. |
| // And for "time.mojom", on the chromium side, we have to use the version |
| // under mojo folder, that is, "mojo/public/mojom/base/time.mojom". |
| import "ml/mojom/time.mojom"; |
| |
| // These values are persisted to logs. Entries should not be renumbered and |
| // numeric values should never be reused. |
| enum TextAnnotationResult { |
| OK = 0, |
| ERROR = 1, |
| }; |
| |
| // These values are persisted to logs. Entries should not be renumbered and |
| // numeric values should never be reused. |
| enum SuggestSelectionResult { |
| OK = 0, |
| ERROR = 1, |
| }; |
| |
| // Enum for specifying the annotation usecase. |
| // Must be consistent with |AnnotationUsecase| in model.fb in libtextclassifier. |
| enum AnnotationUsecase { |
| // Results are optimized for Smart{Select,Share,Linkify} |
| ANNOTATION_USECASE_SMART = 0, |
| // Results are optimized for using TextClassifier as an infrastructure that |
| // annotates as much as possible. |
| ANNOTATION_USECASE_RAW = 1, |
| }; |
| |
| // Stores data extracted from each text entity. |
| // Currently, only for "number" type, the real number value is stored in |
| // |numeric_value|. For the other types, the substring annotated is stored |
| // in |string_value|. |
| // The values come from the class |ClassificationResult| of tclib. (See |
| // "tclib/annotator/types.h"). |
| // TODO(honglinyu): add data extraction for more types when needed and |
| // available. For example, when "date" data is needed, probably we should add |
| // a new struct "Date" and add a new member "Data date_value" to the following |
| // union. |
| union TextEntityData { |
| // A numeric value. |
| // - For "number", it is the value. |
| // e.g. it is "34.3" for input string "34.3". |
| double numeric_value@0; |
| // Could be "url", "address" etc. |
| string string_value@1; |
| }; |
| |
| // Types of text (can be phone numbers, addresses, emails and urls etc.). |
| // This struct is a distillation of the |ClassificationResult| of tclib. (See |
| // "tclib/annotator/types.h"). |
| struct TextEntity { |
| // The name of the type (e.g. "phone", "address", "email" and "url" etc.). |
| string name@0; |
| // The confidence score of the entity annotation, and the range is 0-1. |
| float confidence_score@1; |
| // Additional data extracted from the text. |
| TextEntityData data@2; |
| }; |
| |
| // A substring of the annotated text and possible associated entities. |
| // This struct is a simplification of the |AnnotatedSpan| class of tclib. (See |
| // "tclib/annotator/types.h"). |
| struct TextAnnotation { |
| // The offset of the first character of the annotation. |
| uint32 start_offset@0; |
| // The offset of the last character of the annotation. |
| uint32 end_offset@1; |
| // The set of entity types associated with the substring. |
| array<TextEntity> entities@2; |
| }; |
| |
| // Contains the input and parameters used to annotate the text. |
| // This is a combination of string and |AnnotationOptions| in tclib (see |
| // "tclib/annotator/types.h"). |
| struct TextAnnotationRequest { |
| // The text to be annotated. |
| string text@0; |
| // Comma-delimited locales (e.g., "en", "en,es"). |
| string? default_locales@1; |
| // Comma-separated list of language tags. |
| string? detected_text_language_tags@2; |
| // Tailors the output annotations according to the specified use-case. |
| AnnotationUsecase annotation_usecase@3 = ANNOTATION_USECASE_SMART; |
| // For parsing relative datetimes, the reference now time against which the |
| // relative datetimes get resolved. |
| mojo_base.mojom.Time? reference_time@4; |
| // Timezone in which the input text was written (format as accepted by ICU). |
| // If empty (default), will use the system's timezone. |
| string? reference_timezone@5; |
| // Enabled entities. If empty (default), all types of entities will be |
| // enabled. |
| array<string>? enabled_entities@6; |
| }; |
| |
| // Marks a span in a sequence of codepoints. |
| // This struct is consistent with the type |CodepointSpan| of tclib. (See |
| // "tclib/annotator/types.h"). |
| struct CodepointSpan { |
| // The offset of the first character of the span. |
| uint32 start_offset@0; |
| // The offset of the last character of the span. |
| uint32 end_offset@1; |
| }; |
| |
| // Contains the input and parameters used to suggest selection. |
| // This is a combination of the inputs of the |SuggestSelection| function |
| // of tclib. (See "tclib/annotator/annotate.h"). |
| struct TextSuggestSelectionRequest { |
| // The candidate text. |
| string text@0; |
| // Where the user selects. |
| CodepointSpan user_selection@1; |
| // Comma-delimited locales (e.g., "en", "en,es"). |
| string? default_locales@2; |
| // Comma-separated list of BCP 47 language tags. |
| string? detected_text_language_tags@3; |
| // Tailors the output annotations according to the specified use-case. |
| AnnotationUsecase annotation_usecase@4 = ANNOTATION_USECASE_SMART; |
| }; |
| |
| // Used to annotate entities within text strings. |
| interface TextClassifier { |
| // Annotate a text string and returns the detected substrings and possible |
| // entities. |
| Annotate@0(TextAnnotationRequest request) => |
| (array<TextAnnotation> outputs); |
| // Suggest a selection based on user's selection. |
| // If the inputs are invalid (e.g., user selection's start point is behind |
| // the end point), the input user selection will be returned. |
| // NOTE: The selection indices are passed in and returned in terms of |
| // UTF8 codepoints (not bytes). |
| SuggestSelection@1(TextSuggestSelectionRequest request) => |
| (CodepointSpan outputs); |
| }; |