blob: 7e6ceee45080adb9025e0de774d6ffc3cc836bc7 [file] [log] [blame] [edit]
# Copyright 2022 The ChromiumOS Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Parser for script shebangs."""
import re
from typing import NamedTuple, Optional, Union
# Extract a script's shebang.
RE = re.compile(rb"^#!\s*([^\s]+)\s*(.*)$", flags=re.M)
class Shebang(NamedTuple):
"""A shebang."""
# The script interpreter.
command: str
# Arguments to the interpreter.
args: Optional[str]
@property
def real_command(self) -> str:
"""Try to find the 'real' command by ignoring common wrappers."""
if self.args and self.command in {"/bin/env", "/usr/bin/env"}:
return self.args
return self.command
def parse(data: Union[bytes, str]) -> Shebang:
r"""Splits a shebang (#!) into command and arguments.
We only support UTF-8 programs & arguments. The OS usually doesn't have
such restrictions, but until such a use case comes up where non-UTF-8 data
is needed, assume it to simplify the code (strings are easier to handle).
Args:
data: The first line of a shebang file, for example
"#!/usr/bin/env -uPWD python foo.py\n". The referenced command must
be an absolute path with optionally some arguments.
Returns:
A Shebang with the command & arguments broken out.
Raises:
ValueError if the passed data is not a valid shebang line.
"""
# We convert strings to bytes and then scan the bytes so we don't have to
# worry about being given non-UTF-8 binary data. If we're unable to decode
# back into UTF-8, we'll just ignore the shebang. There's no situation that
# we care to support that would matter here.
if isinstance(data, str):
data = data.encode("utf-8")
m = RE.match(data)
if m:
try:
ret = Shebang(
command=m.group(1).decode("utf-8"),
args=m.group(2).strip().decode("utf-8"),
)
except UnicodeDecodeError:
raise ValueError("shebang (#!) line is not valid UTF-8")
if not ret.command.startswith("/"):
raise ValueError("shebang (#!) program must be absolute path")
return ret
raise ValueError("shebang (#!) line expected")