| // Copyright 2020 The Chromium OS Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "foomatic_shell/parser.h" |
| |
| #include <string> |
| #include <vector> |
| |
| #include <base/logging.h> |
| |
| namespace foomatic_shell { |
| |
| namespace { |
| |
| // Returns true if |token| may be a prefix of PipeSegment. |
| // Corresponding grammar rules: |
| // Command = {Variable,"=",StringAtom,Space}, Application, {Space,Parameter} ; |
| // Variable = NativeString ; |
| // Application = NativeString ; |
| // Parameter = StringAtom ; |
| bool MatchAPrefixOfPipeSegment(const Token& token) { |
| // Check if the token may be a prefix of: "(", Script, ")" |
| if (token.type == Token::kByte && *token.begin == '(') |
| return true; |
| // Check if the token may be a prefix of a Command. |
| if (token.type == Token::kNativeString) |
| return true; |
| // It cannot be a PipeSegment. |
| return false; |
| } |
| |
| } // namespace |
| |
| // This class encapsulates an iterator representing the current position |
| // (token) in the input sequence. |
| class Parser::InputTokens { |
| public: |
| // Constructor. |tokens| is a reference to the input sequence. The input |
| // sequence must remain constant and valid during the lifetime of the object. |
| // The current position is set to the first element in the |tokens|. The last |
| // token in the sequence must be of type EOF. |
| explicit InputTokens(const std::vector<Token>& tokens) |
| : tokens_(tokens), current_(tokens_.begin()) { |
| DCHECK(!tokens.empty()); |
| DCHECK(tokens.back().type == Token::Type::kEOF); |
| } |
| |
| InputTokens(const InputTokens&) = delete; |
| InputTokens(InputTokens&&) = delete; |
| |
| // Returns the reference to the current token. It is always valid. |
| const Token& GetCurrentToken() const { return *current_; } |
| |
| // Returns true <=> the current token is of type EOF. |
| bool CurrentTokenIsEOF() const { |
| return (current_->type == Token::Type::kEOF); |
| } |
| |
| // Returns true <=> the current token is of type Space. |
| bool CurrentTokenIsSpace() const { |
| return (current_->type == Token::Type::kSpace); |
| } |
| |
| // Returns true <=> the current token is of type NativeString. |
| bool CurrentTokenIsNativeString() const { |
| return (current_->type == Token::Type::kNativeString); |
| } |
| |
| // Returns true <=> the current token is one of the types: LiteralString, |
| // ExecutedString, InterpretedString, NativeString. |
| bool CurrentTokenIsAnyString() const { |
| return (current_->type == Token::Type::kExecutedString || |
| current_->type == Token::Type::kInterpretedString || |
| current_->type == Token::Type::kLiteralString || |
| current_->type == Token::Type::kNativeString); |
| } |
| |
| // Returns true <=> the current token is of type Byte and its value equals |
| // |c|. |
| bool CurrentTokenIsByte(char c) const { |
| return (current_->type == Token::Type::kByte && *(current_->begin) == c); |
| } |
| |
| // Moves the current position to the next token. If the current token is of |
| // type EOF, it does nothing. |
| void MoveToNext() { |
| if (current_->type != Token::Type::kEOF) |
| ++current_; |
| } |
| |
| // Moves the current position to the previous token. It the current position |
| // points to the first token in the sequence, it does nothing. |
| void ReturnToPrevious() { |
| if (current_ != tokens_.begin()) |
| --current_; |
| } |
| |
| private: |
| const std::vector<Token>& tokens_; |
| std::vector<Token>::const_iterator current_; |
| }; |
| |
| Parser::Parser(const std::vector<Token>& tokens) |
| : tokens_(std::make_unique<InputTokens>(tokens)) {} |
| |
| Parser::~Parser() {} |
| |
| bool Parser::ParseWholeInput(Script* out) { |
| DCHECK(out != nullptr); |
| |
| if (!ParseScript(out)) |
| return false; |
| if (!tokens_->CurrentTokenIsEOF()) { |
| message_ = "Not everything was parsed"; |
| return false; |
| } |
| return true; |
| } |
| |
| std::string::const_iterator Parser::GetPosition() const { |
| return tokens_->GetCurrentToken().begin; |
| } |
| |
| // This is a wrapper around ParseScriptImpl(...) to limit the number of |
| // recursive (...) operators (sub-shells invocations). |
| bool Parser::ParseScript(Script* out) { |
| if (script_recursion_level_ > 4) { |
| message_ = "Too many recursive shells executions"; |
| return false; |
| } |
| |
| ++script_recursion_level_; |
| const bool result = ParseScriptImpl(out); |
| --script_recursion_level_; |
| return result; |
| } |
| |
| // Parses the following (see grammar.h for details): |
| // Script = OptSpace, {SepP,OptSpace}, Pipeline, |
| // { {SepP,OptSpace}-, Pipeline }, {SepP,OptSpace} ; |
| // or: |
| // Script = OptSpace , { SepP , OptSpace } ; |
| // If succeed, the method shifts the current position to the first token after |
| // the end of a whole Script. The resultant Script is saved in |out|. |out| must |
| // contain a pointer to an empty Script structure. Returns false in case of an |
| // error. |
| bool Parser::ParseScriptImpl(Script* out) { |
| DCHECK(out != nullptr); |
| |
| // Parsing: OptSpace |
| if (tokens_->CurrentTokenIsSpace()) |
| tokens_->MoveToNext(); |
| |
| // Parsing: { SepP , OptSpace } |
| while (tokens_->CurrentTokenIsByte('\n') || |
| tokens_->CurrentTokenIsByte(';')) { |
| tokens_->MoveToNext(); |
| if (tokens_->CurrentTokenIsSpace()) |
| tokens_->MoveToNext(); |
| } |
| |
| // If the next token matches a Pipeline prefix, we go forward with the first |
| // Script definition. Otherwise, we match the second Script definition (the |
| // shorter one) and finish here with success. |
| if (!MatchAPrefixOfPipeSegment(tokens_->GetCurrentToken())) |
| return true; |
| |
| // Parsing: Pipeline |
| out->pipelines.resize(1); |
| if (!ParsePipeline(&out->pipelines.back())) |
| return false; |
| |
| // Parsing: { {SepP,OptSpace}-, Pipeline }, {SepP,OptSpace} |
| while (tokens_->CurrentTokenIsByte('\n') || |
| tokens_->CurrentTokenIsByte(';')) { |
| // Parsing: {SepP,OptSpace}- or {SepP,OptSpace} |
| do { |
| tokens_->MoveToNext(); |
| if (tokens_->CurrentTokenIsSpace()) |
| tokens_->MoveToNext(); |
| } while (tokens_->CurrentTokenIsByte('\n') || |
| tokens_->CurrentTokenIsByte(';')); |
| |
| // If the next token is not a prefix of a Pipeline, we reach the end of |
| // the Script. |
| if (!MatchAPrefixOfPipeSegment(tokens_->GetCurrentToken())) |
| break; |
| |
| // Parsing: Pipeline |
| out->pipelines.emplace_back(); |
| if (!ParsePipeline(&out->pipelines.back())) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // Parses the following (see grammar.h for details): |
| // Pipeline = PipeSegment, OptSpace, {"|",OptSpace,PipeSegment,OptSpace} ; |
| // The current token must be a first token of a Pipeline. If succeed, the |
| // method shifts the current position to the first token after the end of a |
| // whole Pipeline. The resultant Pipeline is saved in |out|. |out| must |
| // contain a pointer to an empty Pipeline structure. Returns false in case |
| // of an error. |
| bool Parser::ParsePipeline(Pipeline* out) { |
| DCHECK(out != nullptr); |
| DCHECK(MatchAPrefixOfPipeSegment(tokens_->GetCurrentToken())); |
| |
| // Parsing: PipeSegment |
| out->segments.resize(1); |
| if (!ParsePipeSegment(&out->segments.back())) |
| return false; |
| |
| // Parsing: OptSpace |
| if (tokens_->CurrentTokenIsSpace()) |
| tokens_->MoveToNext(); |
| |
| // Parsing: {"|",OptSpace,PipeSegment,OptSpace} |
| while (tokens_->CurrentTokenIsByte('|')) { |
| tokens_->MoveToNext(); |
| // Parsing: OptSpace |
| if (tokens_->CurrentTokenIsSpace()) |
| tokens_->MoveToNext(); |
| // Parsing: PipeSegment |
| if (!MatchAPrefixOfPipeSegment(tokens_->GetCurrentToken())) { |
| message_ = "Missing Pipe Segment after |"; |
| return false; |
| } |
| out->segments.emplace_back(); |
| if (!ParsePipeSegment(&out->segments.back())) |
| return false; |
| // Parsing: OptSpace |
| if (tokens_->CurrentTokenIsSpace()) |
| tokens_->MoveToNext(); |
| } |
| |
| return true; |
| } |
| |
| // Parses the following (see grammar.h for details): |
| // PipeSegment = ("(",Script,")") | Command ; |
| // The current token must be a first token of a PipeSegment. If succeed, the |
| // method shifts the current position to the first token after the end of a |
| // whole PipeSegment. The resultant PipeSegment is saved in |out|. |out| must |
| // contain a pointer to an empty PipeSegment structure. Returns false in case |
| // of an error. |
| bool Parser::ParsePipeSegment(PipeSegment* out) { |
| DCHECK(out != nullptr); |
| DCHECK(MatchAPrefixOfPipeSegment(tokens_->GetCurrentToken())); |
| |
| if (tokens_->CurrentTokenIsByte('(')) { |
| // Parsing: "(", Script, ")" |
| tokens_->MoveToNext(); |
| out->script = std::make_unique<Script>(); |
| if (!ParseScript(out->script.get())) |
| return false; |
| if (!tokens_->CurrentTokenIsByte(')')) { |
| message_ = "Missing closing parenthesis )"; |
| return false; |
| } |
| tokens_->MoveToNext(); |
| return true; |
| } |
| |
| // Parsing: Command |
| out->command = std::make_unique<Command>(); |
| return ParseCommand(out->command.get()); |
| } |
| |
| // Parses the following (see grammar.h for details): |
| // Command = {Variable,"=",StringAtom,Space}, Application, {Space,Parameter} ; |
| // The current token must be of type NativeString. If succeed, the method |
| // shifts the current position to the first token after the end of a whole |
| // command statement. The resultant command is saved in |out|. |out| must |
| // contain a pointer to an empty Command structure. Returns false in case of |
| // an error. |
| bool Parser::ParseCommand(Command* out) { |
| DCHECK(out != nullptr); |
| DCHECK(tokens_->CurrentTokenIsNativeString()); |
| |
| // Parsing: {Variable,"=",StringAtom,Space}, Application |
| while (true) { |
| // Save the current token (NativeString) and check the next one. |
| // If the next token is "=", we are inside variable definition: |
| // Variable,"=",StringAtom,Space |
| // Otherwise, we just parsed Application. |
| const Token& first = tokens_->GetCurrentToken(); |
| tokens_->MoveToNext(); |
| if (tokens_->CurrentTokenIsByte('=')) { |
| // The token |first| is a Variable. |
| // Parsing: "=",StringAtom,Space |
| tokens_->MoveToNext(); |
| if (!tokens_->CurrentTokenIsAnyString()) { |
| message_ = "Variable assignment with missing value"; |
| return false; |
| } |
| // Save the variable and parse its value. |
| out->variables_with_values.emplace_back(); |
| out->variables_with_values.back().variable = first; |
| ParseString(&out->variables_with_values.back().new_value); |
| // Now we expect Space. |
| if (!tokens_->CurrentTokenIsSpace()) { |
| message_ = "Unexpected token after variable assignment"; |
| return false; |
| } |
| tokens_->MoveToNext(); |
| // The next token must be a Variable or an Application. |
| // Both are NativeString. |
| if (!tokens_->CurrentTokenIsNativeString()) { |
| message_ = "Missing command"; |
| return false; |
| } |
| } else { |
| // The token |first| is an Application. |
| out->application = first; |
| // The current token is the first token after the Application. |
| // Exit the loop and parse parameters. |
| break; |
| } |
| } |
| |
| // Parsing: {Space,Parameter} |
| while (true) { |
| // If the current token is not a Space, it does not match. |
| if (!tokens_->CurrentTokenIsSpace()) |
| break; |
| // It is a Space, check the next token. |
| tokens_->MoveToNext(); |
| // If the next token is a beginning of StringAtom, we have next parameter. |
| // If not, we have to move back (to return a Space token) and exit. |
| if (!tokens_->CurrentTokenIsAnyString() && |
| !tokens_->CurrentTokenIsByte('=')) { |
| tokens_->ReturnToPrevious(); |
| break; |
| } |
| // It is a parameter, let's parse it. |
| out->parameters.emplace_back(); |
| ParseString(&out->parameters.back()); |
| } |
| |
| return true; |
| } |
| |
| // Parses the following (see grammar.h for details): |
| // StringAtom = { LiteralString | ExecutedString | InterpretedString |
| // | NativeString | "=" }- ; |
| // The current token must be the first token of the string. The method shifts |
| // the current position to the first token after the end of the string. The |
| // resultant string is saved in |out|. |out| must contain pointer to the empty |
| // StringAtom structure. |
| void Parser::ParseString(StringAtom* out) { |
| DCHECK(out != nullptr); |
| DCHECK(tokens_->CurrentTokenIsAnyString() || |
| tokens_->CurrentTokenIsByte('=')); |
| |
| while (tokens_->CurrentTokenIsAnyString() || |
| tokens_->CurrentTokenIsByte('=')) { |
| out->components.push_back(tokens_->GetCurrentToken()); |
| tokens_->MoveToNext(); |
| } |
| } |
| |
| } // namespace foomatic_shell |