| // Copyright 2022 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // This file needs to be synced between, |
| // - "components/ml/mojom/web_platform_model.mojom" in the chromium repo |
| // - "src/platform2/ml/mojom/web_platform_model.mojom" in the chromiumos repo |
| // Modifications needed are, |
| // - "big_buffer.mojom" needs to be included from mojo's base folder in chrome. |
| // whereas it should be included from "ml/mojom" in chromiumos repo. |
| |
| module ml.model_loader.mojom; |
| |
| import "ml/mojom/big_buffer.mojom"; |
| |
| // Model formats that will be loaded. Currently only supports TfLite |
| // flatbuffer. |
| // TODO(honglinyu): we may also need to add the versions of the formats, or/and |
| // we can add feature query interfaces to tell the supported formats and |
| // versions. And how to best version the API is still an open question, see |
| // https://github.com/webmachinelearning/model-loader/issues/25. |
| [Stable, Extensible] |
| enum ModelFormat { |
| // This means the backend should decide the model format automatically. |
| [Default] kAuto = 0, |
| kTfLite = 1, |
| }; |
| |
| // Types of devices used to do model inference. |
| // Currently only supports CPU. |
| [Stable, Extensible] |
| enum DevicePreference { |
| // Means the backend can arbitrarily select the device. |
| [Default] kAuto = 0, |
| // Means model inference must be done on CPU. |
| kCpu = 1, |
| // Prefers running the model inference on GPU. If some of the operators can |
| // not be supported on GPU, the backend can try to select a best fallback |
| // strategy: it may run the whole graph on CPU or still run part of the OPs on |
| // GPU. We do not directly return an error in this case to avoid directly |
| // exposing user's GPU information. |
| kGpu = 2, |
| }; |
| |
| [Stable, Extensible] |
| struct CreateModelLoaderOptions { |
| // #Threads used in model inference. |
| // 0 means the backend can decide it automatically (e.g. equals the number of |
| // physical cores). And normally backends may have a upper cap on the number |
| // of threads a model can use. So setting a bigger number than that will have |
| // no difference. |
| uint32 num_threads@0 = 0; |
| // The format of model (e.g. "tflite"). |
| ModelFormat model_format@1; |
| // The device to be used (e.g. "kAuto", "kCpu", "kGpu" etc.). |
| DevicePreference device_preference@2; |
| }; |
| |
| // Currently, the tensor type is only used to inform the user, and is not |
| // meaningful to `compute()`. The `kUnknown` type just means the type is not |
| // exposed in the mojo interface --- it does not mean the type is "unsupported". |
| // We define this type just to avoid exposing unnecessary TfLite details from |
| // the beginning. So even the type is `kUnknown`, user can still use the |
| // `compute()` function by feeding in appropriate input binary buffers. |
| [Stable, Extensible] |
| enum DataType { |
| [Default] kUnknown = 0, |
| kInt64 = 1, |
| kUint64 = 2, |
| kFloat64 = 3, |
| kInt32 = 4, |
| kUint32 = 5, |
| kFloat32 = 6, |
| kInt16 = 7, |
| kUint16 = 8, |
| kFloat16 = 9, |
| kInt8 = 10, |
| kUint8 = 11, |
| kBool = 12, |
| }; |
| |
| // Represents the information of a tensor. The tensor infos of input and output |
| // tensors are sent from ml-service to the client as a result of model loading. |
| [Stable, Extensible] |
| struct TensorInfo { |
| // The total size of the tensor buffer in bytes. This is the most important |
| // information. Ml-service will use it to check whether the input tensor is |
| // valid. |
| uint32 byte_size@0; |
| // The type of the tensor data. |
| DataType data_type@1; |
| // The dimensions of the tensor. |
| array<uint32> dimensions@2; |
| }; |
| |
| // Represents the model information. Currently it contains the tensor info of |
| // input and output tensors. |
| [Stable, Extensible] |
| struct ModelInfo { |
| map<string, TensorInfo> input_tensor_info@0; |
| map<string, TensorInfo> output_tensor_info@1; |
| }; |
| |
| // Used by `Load()` of interface Model. |
| [Stable, Extensible] |
| enum LoadModelResult { |
| kOk, |
| kUnknownError, |
| // The model is invalid (e.g. the interpreter fails to parse the model). |
| kInvalidModel, |
| // The model can not be supported, e.g. some OPs are not supported. |
| kNotSupported, |
| }; |
| |
| [Stable, Extensible] |
| enum CreateModelLoaderResult { |
| kOk, |
| kUnknownError, |
| // The input configuration is not supported. |
| kNotSupported, |
| }; |
| |
| // Used by `Compute()` of interface Model. |
| [Stable, Extensible] |
| enum ComputeResult { |
| kOk, |
| kUnknownError, |
| // There is no model that has been loaded yet. |
| kModelNotLoaded, |
| // The number of inputs is different from that required by the model. |
| kIncorrectNumberOfInputs, |
| // Some needed inputs are missing. |
| kMissingInput, |
| // The size of some input buffer is wrong. |
| kInvalidInputBufferSize, |
| }; |
| |
| // Corresponds to the `MLModelLoader` object in the WebIDL definition. |
| [Stable] |
| interface ModelLoader { |
| // We define this function in `Model` interface rather than in `MLService` to |
| // avoid unnecessary IPC hops of model content. |
| Load@0(mojo_base.mojom.BigBuffer model_content) |
| => (LoadModelResult result, pending_remote<Model>? remote, |
| ModelInfo? model_info); |
| }; |
| |
| // Represents a model instance. The user can use its `Compute` interface to do |
| // model inferences. |
| // Corresponds to the `MLModelLoaded` object in the WebIDL definition. |
| [Stable] |
| interface Model { |
| // The backend/frontend should already know the dimension needed for each |
| // input/output tensors. So we only need to know the buffer content of the |
| // input/output tensors. |
| Compute@0(map<string, array<uint8>> input_tensors) |
| => (ComputeResult result, |
| map<string, array<uint8>>? output_tensors); |
| }; |