blob: ff77c909113e0ba0e4c64df3185698b82ec82f3e [file] [log] [blame]
// Copyright 2020 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "foomatic_shell/scanner.h"
#include <string>
#include <vector>
#include <base/logging.h>
namespace foomatic_shell {
// This class encapsulates an iterator representing the current position in the
// input string.
class Scanner::Input {
public:
// Constructor. |data| is a reference to the input string. The input string
// must remain constant and valid during the lifetime of the object. The
// current position is set to the first element in the |data|.
explicit Input(const std::string& data)
: data_(data), current_(data_.begin()) {}
Input(const Input&) = delete;
Input(Input&&) = delete;
// Returns the iterator to the current position. The iterator is from the
// input string given in the constructor and is always valid (but may be
// equal |data.end()|).
std::string::const_iterator GetCurrentPosition() const { return current_; }
// Returns the value of the current character. If the current position is set
// to |data.end()|, this method returns '\0'.
char GetCurrentCharacter() const {
if (current_ == data_.end())
return '\0';
return *current_;
}
// Returns true <=> a character at the current position equals |c|. If the
// current position is set to |data.end()|, it returns false.
bool CurrentCharIs(char c) const {
return (current_ != data_.end() && *current_ == c);
}
// Returns true <=> a character at the current position is ByteNative (see
// grammar.h for details). If the current position is set to |data.end()|,
// it returns false.
bool CurrentCharIsByteNative() const {
if (current_ == data_.end())
return false;
if (*current_ >= 'A' && *current_ <= 'Z')
return true;
if (*current_ >= 'a' && *current_ <= 'z')
return true;
if (*current_ >= '0' && *current_ <= '9')
return true;
return (std::string("./_+-@%").find(*current_) != std::string::npos);
}
// Returns true <=> a character at the current position equals to one of the
// elements of |chars|. If the current position is set to |data.end()|,
// it returns false.
bool CurrentCharIsOneOf(const std::string& chars) const {
if (current_ == data_.end())
return false;
return (chars.find(*current_) != std::string::npos);
}
// Returns true <=> the current position is set to |data.end()|.
bool CurrentCharIsEOF() const { return (current_ == data_.end()); }
// Move the current position to the next element. If the current position
// is set to |data.end()|, it does nothing.
void MoveToNext() {
if (current_ != data_.end())
++current_;
}
private:
const std::string& data_;
std::string::const_iterator current_;
};
Scanner::Scanner(const std::string& data)
: data_(std::make_unique<Input>(data)) {}
Scanner::~Scanner() {}
// Parses the following (see grammar.h for details):
// LiteralString = "'" , { ByteCommon | '"' | "`" | "\" } , "'" ;
// The current position must be one the opening '. It moves cursor to the first
// character after the closing '. The resultant token is added to |tokens|.
// |tokens| must not be nullptr. Returns false in case of an error.
bool Scanner::ParseLiteralString(std::vector<Token>* tokens) {
DCHECK(tokens != nullptr);
DCHECK(data_->CurrentCharIs('\''));
// Skip the opening '.
data_->MoveToNext();
// Create a new token.
tokens->resize(tokens->size() + 1);
Token* out = &(tokens->back());
out->type = Token::Type::kLiteralString;
out->begin = data_->GetCurrentPosition();
// Move forward until we find EOF or the closing '.
while (!data_->CurrentCharIsEOF()) {
if (data_->CurrentCharIs('\'')) {
// The closing ' was found.
out->end = data_->GetCurrentPosition();
out->value.assign(out->begin, out->end);
// Skip the closing '.
data_->MoveToNext();
// Success!
return true;
}
data_->MoveToNext();
}
// There is no closing '.
out->end = data_->GetCurrentPosition();
message_ = "Unexpected EOF when parsing '...' (literal string)";
return false;
}
// Parses the following (see grammar.h for details):
// ExecutedString = "`" , { ByteCommon | "'" | '"' | ("\",ByteAny) } , "`" ;
// The current position must be one the opening `. It moves cursor to the first
// character after the closing `. The resultant token is added to |tokens|.
// |tokens| must not be nullptr. Returns false in case of an error.
bool Scanner::ParseExecutedString(std::vector<Token>* tokens) {
DCHECK(tokens != nullptr);
DCHECK(data_->CurrentCharIs('`'));
// Skip the opening `.
data_->MoveToNext();
// Create a new token.
tokens->resize(tokens->size() + 1);
Token* out = &(tokens->back());
out->type = Token::Type::kExecutedString;
out->begin = data_->GetCurrentPosition();
// Move forward until we find EOF or the closing `.
while (!data_->CurrentCharIsEOF()) {
if (data_->CurrentCharIs('`')) {
// The closing ` was found.
out->end = data_->GetCurrentPosition();
// Skip the closing `.
data_->MoveToNext();
// Success!
return true;
}
// The escape character (\) works in ExecutedString for ByteAny.
if (data_->CurrentCharIs('\\')) {
data_->MoveToNext();
if (data_->CurrentCharIsEOF())
break;
}
// Save the current character and move to the next element.
out->value.push_back(data_->GetCurrentCharacter());
data_->MoveToNext();
}
// There is no closing `.
out->end = data_->GetCurrentPosition();
message_ = "Unexpected EOF when parsing `...` (executed string)";
return false;
}
// Parses the following (see grammar.h for details):
// InterpretedString = '"' , { ByteCommon | "'" | "\" | ("\",'"') | ("\","`")
// | ("\","\") | ExecutedString } , '"' ;
// The current position must be one the opening ". It moves cursor to the first
// character after the closing ". If the string contains one or more
// ExecutedString, it is split into a sequence of consecutive tokens of types
// InterpretedString and ExecutedString. The resultant tokens are added to
// |tokens|. |tokens| must not be nullptr. Returns false in case of an error.
bool Scanner::ParseInterpretedString(std::vector<Token>* tokens) {
DCHECK(tokens != nullptr);
DCHECK(data_->CurrentCharIs('"'));
// Skip the opening ".
data_->MoveToNext();
// Create a sequence of alternating InterpretedString and ExecutedString
// tokens.
while (true) {
// Create a new InterpretedString token.
tokens->resize(tokens->size() + 1);
Token* out = &(tokens->back());
out->type = Token::Type::kInterpretedString;
out->begin = data_->GetCurrentPosition();
// Move forward until we find EOF, the closing " or the opening `.
while (true) {
if (data_->CurrentCharIs('"')) {
// The closing " was found.
out->end = data_->GetCurrentPosition();
data_->MoveToNext();
return true;
}
if (data_->CurrentCharIs('`')) {
// The opening ` was found. We finish the current token and
// add a new ExecutedString token.
out->end = data_->GetCurrentPosition();
if (!ParseExecutedString(tokens))
return false;
// We break the internal loop to create a new InterpretedString
// token.
break;
}
if (data_->CurrentCharIs('\\')) {
// It may be an escape character for " or `.
data_->MoveToNext();
if (data_->CurrentCharIsOneOf("\"`\\")) {
// The next character is " or `. Just skip \ and go ahead.
} else {
// It was not an escape character. We have to add a skipped \.
out->value.push_back('\\');
}
}
if (data_->CurrentCharIsEOF()) {
// There is no closing ".
out->end = data_->GetCurrentPosition();
message_ = "Unexpected EOF when parsing \"...\" (interpreted string)";
return false;
}
// Save the current character and move to the next element.
out->value.push_back(data_->GetCurrentCharacter());
data_->MoveToNext();
}
}
}
// Parses the following (see grammar.h for details):
// NativeString = { ByteNative | ("\",ByteAny) }- ;
// The current position must be one the first character of NativeString. It
// moves cursor to the first character after the end of the string. The
// resultant token is added to |tokens|. |tokens| must not be nullptr. Returns
// false in case of an error.
bool Scanner::ParseNativeString(std::vector<Token>* tokens) {
DCHECK(tokens != nullptr);
DCHECK(data_->CurrentCharIsByteNative() || data_->CurrentCharIs('\\'));
// Create a new token.
tokens->resize(tokens->size() + 1);
Token* out = &(tokens->back());
out->type = Token::Type::kNativeString;
out->begin = data_->GetCurrentPosition();
// Move forward until we find EOF or the end of the string.
while (!data_->CurrentCharIsEOF()) {
if (data_->CurrentCharIs('\\')) {
// This is an escape character.
data_->MoveToNext();
if (data_->CurrentCharIsEOF()) {
// It is an error: EOF after the escape character.
out->end = data_->GetCurrentPosition();
message_ = "Unexpected EOF after escape character (\\)";
return false;
}
// Add the escaped character to the string.
out->value.push_back(data_->GetCurrentCharacter());
// Go to the next character.
data_->MoveToNext();
continue;
}
// If the current character is not a ByteNative, we found the end of the
// string.
if (!data_->CurrentCharIsByteNative())
break;
// Save the current character and move to the next element.
out->value.push_back(data_->GetCurrentCharacter());
data_->MoveToNext();
}
// We are at EOF or at the first character not being part of the string.
out->end = data_->GetCurrentPosition();
return true;
}
bool Scanner::ParseWholeInput(std::vector<Token>* tokens) {
DCHECK(tokens != nullptr);
while (!data_->CurrentCharIsEOF()) {
// Check for different types of string.
if (data_->CurrentCharIs('\'')) {
if (!ParseLiteralString(tokens))
return false;
continue;
}
if (data_->CurrentCharIs('"')) {
if (!ParseInterpretedString(tokens))
return false;
continue;
}
if (data_->CurrentCharIs('`')) {
if (!ParseExecutedString(tokens))
return false;
continue;
}
if (data_->CurrentCharIsByteNative() || data_->CurrentCharIs('\\')) {
if (!ParseNativeString(tokens))
return false;
continue;
}
// Create a new token.
tokens->resize(tokens->size() + 1);
Token& token = tokens->back();
if (data_->CurrentCharIsOneOf(" \t")) {
// It is a Space token.
// Space = { " " | Tabulator }- ;
token.type = Token::Type::kSpace;
token.begin = data_->GetCurrentPosition();
// Move forward until we find the first character not being part of
// the Space token. It stops also at EOF.
while (data_->CurrentCharIsOneOf(" \t"))
data_->MoveToNext();
token.end = data_->GetCurrentPosition();
continue;
}
// Add a single character as a token.
token.type = Token::Type::kByte;
token.begin = data_->GetCurrentPosition();
data_->MoveToNext();
token.end = data_->GetCurrentPosition();
token.value.assign(token.begin, token.end);
}
// Add a special EOF token at the end.
tokens->resize(tokens->size() + 1);
tokens->back().type = Token::Type::kEOF;
tokens->back().begin = tokens->back().end = data_->GetCurrentPosition();
return true;
}
std::string::const_iterator Scanner::GetPosition() const {
return data_->GetCurrentPosition();
}
} // namespace foomatic_shell