blob: 533d483abd3c0593b07480bc2501bf9219356ef8 [file] [log] [blame]
// Copyright 2020 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "foomatic_shell/parser.h"
#include <string>
#include <vector>
#include <base/logging.h>
namespace foomatic_shell {
namespace {
// Returns true if |token| may be a prefix of PipeSegment.
// Corresponding grammar rules:
// Command = {Variable,"=",StringAtom,Space}, Application, {Space,Parameter} ;
// Variable = NativeString ;
// Application = NativeString ;
// Parameter = StringAtom ;
bool MatchAPrefixOfPipeSegment(const Token& token) {
// Check if the token may be a prefix of: "(", Script, ")"
if (token.type == Token::kByte && *token.begin == '(')
return true;
// Check if the token may be a prefix of a Command.
if (token.type == Token::kNativeString)
return true;
// It cannot be a PipeSegment.
return false;
}
} // namespace
// This class encapsulates an iterator representing the current position
// (token) in the input sequence.
class Parser::InputTokens {
public:
// Constructor. |tokens| is a reference to the input sequence. The input
// sequence must remain constant and valid during the lifetime of the object.
// The current position is set to the first element in the |tokens|. The last
// token in the sequence must be of type EOF.
explicit InputTokens(const std::vector<Token>& tokens)
: tokens_(tokens), current_(tokens_.begin()) {
DCHECK(!tokens.empty());
DCHECK(tokens.back().type == Token::Type::kEOF);
}
InputTokens(const InputTokens&) = delete;
InputTokens(InputTokens&&) = delete;
// Returns the reference to the current token. It is always valid.
const Token& GetCurrentToken() const { return *current_; }
// Returns true <=> the current token is of type EOF.
bool CurrentTokenIsEOF() const {
return (current_->type == Token::Type::kEOF);
}
// Returns true <=> the current token is of type Space.
bool CurrentTokenIsSpace() const {
return (current_->type == Token::Type::kSpace);
}
// Returns true <=> the current token is of type NativeString.
bool CurrentTokenIsNativeString() const {
return (current_->type == Token::Type::kNativeString);
}
// Returns true <=> the current token is one of the types: LiteralString,
// ExecutedString, InterpretedString, NativeString.
bool CurrentTokenIsAnyString() const {
return (current_->type == Token::Type::kExecutedString ||
current_->type == Token::Type::kInterpretedString ||
current_->type == Token::Type::kLiteralString ||
current_->type == Token::Type::kNativeString);
}
// Returns true <=> the current token is of type Byte and its value equals
// |c|.
bool CurrentTokenIsByte(char c) const {
return (current_->type == Token::Type::kByte && *(current_->begin) == c);
}
// Moves the current position to the next token. If the current token is of
// type EOF, it does nothing.
void MoveToNext() {
if (current_->type != Token::Type::kEOF)
++current_;
}
// Moves the current position to the previous token. It the current position
// points to the first token in the sequence, it does nothing.
void ReturnToPrevious() {
if (current_ != tokens_.begin())
--current_;
}
private:
const std::vector<Token>& tokens_;
std::vector<Token>::const_iterator current_;
};
Parser::Parser(const std::vector<Token>& tokens)
: tokens_(std::make_unique<InputTokens>(tokens)) {}
Parser::~Parser() {}
bool Parser::ParseWholeInput(Script* out) {
DCHECK(out != nullptr);
if (!ParseScript(out))
return false;
if (!tokens_->CurrentTokenIsEOF()) {
message_ = "Not everything was parsed";
return false;
}
return true;
}
std::string::const_iterator Parser::GetPosition() const {
return tokens_->GetCurrentToken().begin;
}
// This is a wrapper around ParseScriptImpl(...) to limit the number of
// recursive (...) operators (sub-shells invocations).
bool Parser::ParseScript(Script* out) {
if (script_recursion_level_ > 4) {
message_ = "Too many recursive shells executions";
return false;
}
++script_recursion_level_;
const bool result = ParseScriptImpl(out);
--script_recursion_level_;
return result;
}
// Parses the following (see grammar.h for details):
// Script = OptSpace, {SepP,OptSpace}, Pipeline,
// { {SepP,OptSpace}-, Pipeline }, {SepP,OptSpace} ;
// or:
// Script = OptSpace , { SepP , OptSpace } ;
// If succeed, the method shifts the current position to the first token after
// the end of a whole Script. The resultant Script is saved in |out|. |out| must
// contain a pointer to an empty Script structure. Returns false in case of an
// error.
bool Parser::ParseScriptImpl(Script* out) {
DCHECK(out != nullptr);
// Parsing: OptSpace
if (tokens_->CurrentTokenIsSpace())
tokens_->MoveToNext();
// Parsing: { SepP , OptSpace }
while (tokens_->CurrentTokenIsByte('\n') ||
tokens_->CurrentTokenIsByte(';')) {
tokens_->MoveToNext();
if (tokens_->CurrentTokenIsSpace())
tokens_->MoveToNext();
}
// If the next token matches a Pipeline prefix, we go forward with the first
// Script definition. Otherwise, we match the second Script definition (the
// shorter one) and finish here with success.
if (!MatchAPrefixOfPipeSegment(tokens_->GetCurrentToken()))
return true;
// Parsing: Pipeline
out->pipelines.resize(1);
if (!ParsePipeline(&out->pipelines.back()))
return false;
// Parsing: { {SepP,OptSpace}-, Pipeline }, {SepP,OptSpace}
while (tokens_->CurrentTokenIsByte('\n') ||
tokens_->CurrentTokenIsByte(';')) {
// Parsing: {SepP,OptSpace}- or {SepP,OptSpace}
do {
tokens_->MoveToNext();
if (tokens_->CurrentTokenIsSpace())
tokens_->MoveToNext();
} while (tokens_->CurrentTokenIsByte('\n') ||
tokens_->CurrentTokenIsByte(';'));
// If the next token is not a prefix of a Pipeline, we reach the end of
// the Script.
if (!MatchAPrefixOfPipeSegment(tokens_->GetCurrentToken()))
break;
// Parsing: Pipeline
out->pipelines.emplace_back();
if (!ParsePipeline(&out->pipelines.back()))
return false;
}
return true;
}
// Parses the following (see grammar.h for details):
// Pipeline = PipeSegment, OptSpace, {"|",OptSpace,PipeSegment,OptSpace} ;
// The current token must be a first token of a Pipeline. If succeed, the
// method shifts the current position to the first token after the end of a
// whole Pipeline. The resultant Pipeline is saved in |out|. |out| must
// contain a pointer to an empty Pipeline structure. Returns false in case
// of an error.
bool Parser::ParsePipeline(Pipeline* out) {
DCHECK(out != nullptr);
DCHECK(MatchAPrefixOfPipeSegment(tokens_->GetCurrentToken()));
// Parsing: PipeSegment
out->segments.resize(1);
if (!ParsePipeSegment(&out->segments.back()))
return false;
// Parsing: OptSpace
if (tokens_->CurrentTokenIsSpace())
tokens_->MoveToNext();
// Parsing: {"|",OptSpace,PipeSegment,OptSpace}
while (tokens_->CurrentTokenIsByte('|')) {
tokens_->MoveToNext();
// Parsing: OptSpace
if (tokens_->CurrentTokenIsSpace())
tokens_->MoveToNext();
// Parsing: PipeSegment
if (!MatchAPrefixOfPipeSegment(tokens_->GetCurrentToken())) {
message_ = "Missing Pipe Segment after |";
return false;
}
out->segments.emplace_back();
if (!ParsePipeSegment(&out->segments.back()))
return false;
// Parsing: OptSpace
if (tokens_->CurrentTokenIsSpace())
tokens_->MoveToNext();
}
return true;
}
// Parses the following (see grammar.h for details):
// PipeSegment = ("(",Script,")") | Command ;
// The current token must be a first token of a PipeSegment. If succeed, the
// method shifts the current position to the first token after the end of a
// whole PipeSegment. The resultant PipeSegment is saved in |out|. |out| must
// contain a pointer to an empty PipeSegment structure. Returns false in case
// of an error.
bool Parser::ParsePipeSegment(PipeSegment* out) {
DCHECK(out != nullptr);
DCHECK(MatchAPrefixOfPipeSegment(tokens_->GetCurrentToken()));
if (tokens_->CurrentTokenIsByte('(')) {
// Parsing: "(", Script, ")"
tokens_->MoveToNext();
out->script = std::make_unique<Script>();
if (!ParseScript(out->script.get()))
return false;
if (!tokens_->CurrentTokenIsByte(')')) {
message_ = "Missing closing parenthesis )";
return false;
}
tokens_->MoveToNext();
return true;
}
// Parsing: Command
out->command = std::make_unique<Command>();
return ParseCommand(out->command.get());
}
// Parses the following (see grammar.h for details):
// Command = {Variable,"=",StringAtom,Space}, Application, {Space,Parameter} ;
// The current token must be of type NativeString. If succeed, the method
// shifts the current position to the first token after the end of a whole
// command statement. The resultant command is saved in |out|. |out| must
// contain a pointer to an empty Command structure. Returns false in case of
// an error.
bool Parser::ParseCommand(Command* out) {
DCHECK(out != nullptr);
DCHECK(tokens_->CurrentTokenIsNativeString());
// Parsing: {Variable,"=",StringAtom,Space}, Application
while (true) {
// Save the current token (NativeString) and check the next one.
// If the next token is "=", we are inside variable definition:
// Variable,"=",StringAtom,Space
// Otherwise, we just parsed Application.
const Token& first = tokens_->GetCurrentToken();
tokens_->MoveToNext();
if (tokens_->CurrentTokenIsByte('=')) {
// The token |first| is a Variable.
// Parsing: "=",StringAtom,Space
tokens_->MoveToNext();
if (!tokens_->CurrentTokenIsAnyString()) {
message_ = "Variable assignment with missing value";
return false;
}
// Save the variable and parse its value.
out->variables_with_values.emplace_back();
out->variables_with_values.back().variable = first;
ParseString(&out->variables_with_values.back().new_value);
// Now we expect Space.
if (!tokens_->CurrentTokenIsSpace()) {
message_ = "Unexpected token after variable assignment";
return false;
}
tokens_->MoveToNext();
// The next token must be a Variable or an Application.
// Both are NativeString.
if (!tokens_->CurrentTokenIsNativeString()) {
message_ = "Missing command";
return false;
}
} else {
// The token |first| is an Application.
out->application = first;
// The current token is the first token after the Application.
// Exit the loop and parse parameters.
break;
}
}
// Parsing: {Space,Parameter}
while (true) {
// If the current token is not a Space, it does not match.
if (!tokens_->CurrentTokenIsSpace())
break;
// It is a Space, check the next token.
tokens_->MoveToNext();
// If the next token is a beginning of StringAtom, we have next parameter.
// If not, we have to move back (to return a Space token) and exit.
if (!tokens_->CurrentTokenIsAnyString() &&
!tokens_->CurrentTokenIsByte('=')) {
tokens_->ReturnToPrevious();
break;
}
// It is a parameter, let's parse it.
out->parameters.emplace_back();
ParseString(&out->parameters.back());
}
return true;
}
// Parses the following (see grammar.h for details):
// StringAtom = { LiteralString | ExecutedString | InterpretedString
// | NativeString | "=" }- ;
// The current token must be the first token of the string. The method shifts
// the current position to the first token after the end of the string. The
// resultant string is saved in |out|. |out| must contain pointer to the empty
// StringAtom structure.
void Parser::ParseString(StringAtom* out) {
DCHECK(out != nullptr);
DCHECK(tokens_->CurrentTokenIsAnyString() ||
tokens_->CurrentTokenIsByte('='));
while (tokens_->CurrentTokenIsAnyString() ||
tokens_->CurrentTokenIsByte('=')) {
out->components.push_back(tokens_->GetCurrentToken());
tokens_->MoveToNext();
}
}
} // namespace foomatic_shell