// Copyright 2020 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "foomatic_shell/scanner.h"

#include <base/logging.h>
#include <string>
#include <vector>

namespace foomatic_shell {

// This class encapsulates an iterator representing the current position in the
// input string.
class Scanner::Input {
 public:
  // Constructor. |data| is a reference to the input string. The input string
  // must remain constant and valid during the lifetime of the object. The
  // current position is set to the first element in the |data|.
  explicit Input(const std::string& data)
      : data_(data), current_(data_.begin()) {}

  Input(const Input&) = delete;
  Input(Input&&) = delete;

  // Returns the iterator to the current position. The iterator is from the
  // input string given in the constructor and is always valid (but may be
  // equal |data.end()|).
  std::string::const_iterator GetCurrentPosition() const { return current_; }

  // Returns the value of the current character. If the current position is set
  // to |data.end()|, this method returns '\0'.
  char GetCurrentCharacter() const {
    if (current_ == data_.end())
      return '\0';
    return *current_;
  }

  // Returns true <=> a character at the current position equals |c|. If the
  // current position is set to |data.end()|, it returns false.
  bool CurrentCharIs(char c) const {
    return (current_ != data_.end() && *current_ == c);
  }

  // Returns true <=> a character at the current position is ByteNative (see
  // grammar.h for details). If the current position is set to |data.end()|,
  // it returns false.
  bool CurrentCharIsByteNative() const {
    if (current_ == data_.end())
      return false;
    if (*current_ >= 'A' && *current_ <= 'Z')
      return true;
    if (*current_ >= 'a' && *current_ <= 'z')
      return true;
    if (*current_ >= '0' && *current_ <= '9')
      return true;
    return (std::string("./_+-@%").find(*current_) != std::string::npos);
  }

  // Returns true <=> a character at the current position equals to one of the
  // elements of |chars|. If the current position is set to |data.end()|,
  // it returns false.
  bool CurrentCharIsOneOf(const std::string& chars) const {
    if (current_ == data_.end())
      return false;
    return (chars.find(*current_) != std::string::npos);
  }

  // Returns true <=> the current position is set to |data.end()|.
  bool CurrentCharIsEOF() const { return (current_ == data_.end()); }

  // Move the current position to the next element. If the current position
  // is set to |data.end()|, it does nothing.
  void MoveToNext() {
    if (current_ != data_.end())
      ++current_;
  }

 private:
  const std::string& data_;
  std::string::const_iterator current_;
};

Scanner::Scanner(const std::string& data)
    : data_(std::make_unique<Input>(data)) {}
Scanner::~Scanner() {}

// Parses the following (see grammar.h for details):
//   LiteralString = c' *( ByteCommon | c" | c` | c\ ) c'
// The current position must be one the opening '. It moves cursor to the first
// character after the closing '. The resultant token is added to |tokens|.
// |tokens| must not be nullptr. Returns false in case of an error.
bool Scanner::ParseLiteralString(std::vector<Token>* tokens) {
  DCHECK(tokens != nullptr);
  DCHECK(data_->CurrentCharIs('\''));

  // Skip the opening '.
  data_->MoveToNext();

  // Create a new token.
  tokens->resize(tokens->size() + 1);
  Token* out = &(tokens->back());
  out->type = Token::Type::kLiteralString;
  out->begin = data_->GetCurrentPosition();

  // Move forward until we find EOF or the closing '.
  while (!data_->CurrentCharIsEOF()) {
    if (data_->CurrentCharIs('\'')) {
      // The closing ' was found.
      out->end = data_->GetCurrentPosition();
      out->value.assign(out->begin, out->end);
      // Skip the closing '.
      data_->MoveToNext();
      // Success!
      return true;
    }
    data_->MoveToNext();
  }

  // There is no closing '.
  out->end = data_->GetCurrentPosition();
  message_ = "Unexpected EOF when parsing '...' (literal string)";
  return false;
}

// Parses the following (see grammar.h for details):
//   ExecutedString = c` *( ByteCommon | c' | c" | c\ ByteAny ) c`
// The current position must be one the opening `. It moves cursor to the first
// character after the closing `. The resultant token is added to |tokens|.
// |tokens| must not be nullptr. Returns false in case of an error.
bool Scanner::ParseExecutedString(std::vector<Token>* tokens) {
  DCHECK(tokens != nullptr);
  DCHECK(data_->CurrentCharIs('`'));

  // Skip the opening `.
  data_->MoveToNext();

  // Create a new token.
  tokens->resize(tokens->size() + 1);
  Token* out = &(tokens->back());
  out->type = Token::Type::kExecutedString;
  out->begin = data_->GetCurrentPosition();

  // Move forward until we find EOF or the closing `.
  while (!data_->CurrentCharIsEOF()) {
    if (data_->CurrentCharIs('`')) {
      // The closing ` was found.
      out->end = data_->GetCurrentPosition();
      // Skip the closing `.
      data_->MoveToNext();
      // Success!
      return true;
    }
    // The escape character (\) works in ExecutedString for ByteAny.
    if (data_->CurrentCharIs('\\')) {
      data_->MoveToNext();
      if (data_->CurrentCharIsEOF())
        break;
    }
    // Save the current character and move to the next element.
    out->value.push_back(data_->GetCurrentCharacter());
    data_->MoveToNext();
  }

  // There is no closing `.
  out->end = data_->GetCurrentPosition();
  message_ = "Unexpected EOF when parsing `...` (executed string)";
  return false;
}

// Parses the following (see grammar.h for details):
//   InterpretedString = c" *( ByteCommon | c' | c\ | c\ c" | c\ c` | c\ c\ |
//                           | ExecutedString ) c"
// The current position must be one the opening ". It moves cursor to the first
// character after the closing ". If the string contains one or more
// ExecutedString, it is split into a sequence of consecutive tokens of types
// InterpretedString and ExecutedString. The resultant tokens are added to
// |tokens|. |tokens| must not be nullptr. Returns false in case of an error.
bool Scanner::ParseInterpretedString(std::vector<Token>* tokens) {
  DCHECK(tokens != nullptr);
  DCHECK(data_->CurrentCharIs('"'));

  // Skip the opening ".
  data_->MoveToNext();

  // Create a sequence of alternating InterpretedString and ExecutedString
  // tokens.
  while (true) {
    // Create a new InterpretedString token.
    tokens->resize(tokens->size() + 1);
    Token* out = &(tokens->back());
    out->type = Token::Type::kInterpretedString;
    out->begin = data_->GetCurrentPosition();

    // Move forward until we find EOF, the closing " or the opening `.
    while (true) {
      if (data_->CurrentCharIs('"')) {
        // The closing " was found.
        out->end = data_->GetCurrentPosition();
        data_->MoveToNext();
        return true;
      }
      if (data_->CurrentCharIs('`')) {
        // The opening ` was found. We finish the current token and
        // add a new ExecutedString token.
        out->end = data_->GetCurrentPosition();
        if (!ParseExecutedString(tokens))
          return false;
        // We break the internal loop to create a new InterpretedString
        // token.
        break;
      }
      if (data_->CurrentCharIs('\\')) {
        // It may be an escape character for " or `.
        data_->MoveToNext();
        if (data_->CurrentCharIsOneOf("\"`\\")) {
          // The next character is " or `. Just skip \ and go ahead.
        } else {
          // It was not an escape character. We have to add a skipped \.
          out->value.push_back('\\');
        }
      }
      if (data_->CurrentCharIsEOF()) {
        // There is no closing ".
        out->end = data_->GetCurrentPosition();
        message_ = "Unexpected EOF when parsing \"...\" (interpreted string)";
        return false;
      }
      // Save the current character and move to the next element.
      out->value.push_back(data_->GetCurrentCharacter());
      data_->MoveToNext();
    }
  }
}

// Parses the following (see grammar.h for details):
//   NativeString = +( ByteNative | c\ ByteAny )
// The current position must be one the first character of NativeString. It
// moves cursor to the first character after the end of the string. The
// resultant token is added to |tokens|. |tokens| must not be nullptr. Returns
// false in case of an error.
bool Scanner::ParseNativeString(std::vector<Token>* tokens) {
  DCHECK(tokens != nullptr);
  DCHECK(data_->CurrentCharIsByteNative() || data_->CurrentCharIs('\\'));

  // Create a new token.
  tokens->resize(tokens->size() + 1);
  Token* out = &(tokens->back());
  out->type = Token::Type::kNativeString;
  out->begin = data_->GetCurrentPosition();

  // Move forward until we find EOF or the end of the string.
  while (!data_->CurrentCharIsEOF()) {
    if (data_->CurrentCharIs('\\')) {
      // This is an escape character.
      data_->MoveToNext();
      if (data_->CurrentCharIsEOF()) {
        // It is an error: EOF after the escape character.
        out->end = data_->GetCurrentPosition();
        message_ = "Unexpected EOF after escape character (\\)";
        return false;
      }
      // Add the escaped character to the string.
      out->value.push_back(data_->GetCurrentCharacter());
      // Go to the next character.
      data_->MoveToNext();
      continue;
    }

    // If the current character is not a ByteNative, we found the end of the
    // string.
    if (!data_->CurrentCharIsByteNative())
      break;

    // Save the current character and move to the next element.
    out->value.push_back(data_->GetCurrentCharacter());
    data_->MoveToNext();
  }

  // We are at EOF or at the first character not being part of the string.
  out->end = data_->GetCurrentPosition();
  return true;
}

bool Scanner::ParseWholeInput(std::vector<Token>* tokens) {
  DCHECK(tokens != nullptr);

  while (!data_->CurrentCharIsEOF()) {
    // Check for different types of string.
    if (data_->CurrentCharIs('\'')) {
      if (!ParseLiteralString(tokens))
        return false;
      continue;
    }
    if (data_->CurrentCharIs('"')) {
      if (!ParseInterpretedString(tokens))
        return false;
      continue;
    }
    if (data_->CurrentCharIs('`')) {
      if (!ParseExecutedString(tokens))
        return false;
      continue;
    }
    if (data_->CurrentCharIsByteNative() || data_->CurrentCharIs('\\')) {
      if (!ParseNativeString(tokens))
        return false;
      continue;
    }

    // Create a new token.
    tokens->resize(tokens->size() + 1);
    Token& token = tokens->back();

    if (data_->CurrentCharIsOneOf(" \t")) {
      // It is a Space token.
      //  Space = +( ByteSpace )
      token.type = Token::Type::kSpace;
      token.begin = data_->GetCurrentPosition();
      // Move forward until we find the first character not being part of
      // the Space token. It stops also at EOF.
      while (data_->CurrentCharIsOneOf(" \t"))
        data_->MoveToNext();
      token.end = data_->GetCurrentPosition();
      continue;
    }

    // Add a single character as a token.
    token.type = Token::Type::kByte;
    token.begin = data_->GetCurrentPosition();
    data_->MoveToNext();
    token.end = data_->GetCurrentPosition();
    token.value.assign(token.begin, token.end);
  }

  // Add a special EOF token at the end.
  tokens->resize(tokens->size() + 1);
  tokens->back().type = Token::Type::kEOF;
  tokens->back().begin = tokens->back().end = data_->GetCurrentPosition();
  return true;
}

std::string::const_iterator Scanner::GetPosition() const {
  return data_->GetCurrentPosition();
}

}  // namespace foomatic_shell
