"""Development Utility Functions: Script vs. Library Checker
Author, Copyright, and License
------------------------------
Copyright (c) 2023-2025 Hauke Daempfling (haukex@zero-g.net)
at the Leibniz Institute of Freshwater Ecology and Inland Fisheries (IGB),
Berlin, Germany, https://www.igb-berlin.de/
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see https://www.gnu.org/licenses/
"""
import os
import re
import sys
import ast
import enum
import argparse
import subprocess
from stat import S_IXUSR
from pathlib import Path
from collections.abc import Sequence
from typing import NamedTuple, Union
from igbpyutils.file import Filename, cmdline_rglob, autoglob
# spell-checker: ignore scriptlike
[docs]
class ResultLevel(enum.IntEnum):
"""A severity level enum for :class:`ScriptLibResult`.
(Note the numeric values are mostly borrowed from :mod:`logging`.)"""
INFO = 20
NOTICE = 25
WARNING = 30
ERROR = 40
[docs]
class ScriptLibFlags(enum.Flag):
"""Flags for :class:`ScriptLibResult`.
.. warning::
Always use the named flags, do not rely on the integer flag values staying constant,
as they are automatically generated.
"""
#: Whether the file has its execute bit set
EXEC_BIT = enum.auto()
#: Whether the file has a shebang line
SHEBANG = enum.auto()
#: Whether the file contains ``if __name__=='__main__': ...``
NAME_MAIN = enum.auto()
#: Whether the file contains statements that make it look like a script
#: (i.e. anything that's not a ``def``, ``class``, etc.)
SCRIPT_LIKE = enum.auto()
[docs]
class ScriptLibResult(NamedTuple):
"""Result class for :func:`check_script_vs_lib`"""
#: The file that was analyzed
path :Path
#: The severity of the result, see :class:`ResultLevel`
level :ResultLevel
#: A textual description of the result, with details
message :str
#: The individual results of the analysis, see :class:`ScriptLibFlags`
flags :ScriptLibFlags
_IS_WINDOWS = sys.platform.startswith('win32')
_git_ls_files_re = re.compile(r'''\A([0-7]+) [a-fA-F0-9]{40} \d+\t(.+?)(?:\r?\n|\Z)''')
DEFAULT_SHEBANG_RE = re.compile(r'''\A\#\!/usr(?:/local)?/bin/(?:env +)?python3?\s*\Z''')
[docs]
def check_script_vs_lib(path :Filename, # pylint: disable=too-many-return-statements
*, known_shebangs :Union[Sequence[str],re.Pattern] = DEFAULT_SHEBANG_RE,
exec_from_git :bool = False) -> ScriptLibResult:
"""This function analyzes a Python file to see whether it looks like a library or a script,
and checks several features of the file for consistency.
It checks the following points, each of which on their own would indicate the file is a script, but in certain combinations don't make sense.
It checks whether the file...
- has its execute bit set (ignored on Windows, unless ``exec_from_git`` is set)
- has a shebang line (e.g. ``#!/usr/bin/env python3``, see also the ``known_shebangs`` parameter)
- contains a ``if __name__=='__main__':`` line
- contains statements other than ``class``, ``def``, etc. in the main body
:param path: The name of the file to analyze.
:param known_shebangs: You may provide your own list of shebang lines that this function will recognize here,
either as a list of strings (without trailing newlines) or a regular expression.
:param exec_from_git: If you set this to :obj:`True`, then instead of looking at the file's actual mode bits to determine whether the
exec bit is set, the function will ask ``git`` for the mode bits of the file and use those.
:return: A :class:`ScriptLibResult` object that indicates what was found and whether there are any inconsistencies.
"""
pth = Path(path)
flags = ScriptLibFlags(0)
with pth.open(encoding='UTF-8') as fh:
if not _IS_WINDOWS and os.stat(fh.fileno()).st_mode & S_IXUSR: # cover-not-win32
flags |= ScriptLibFlags.EXEC_BIT
source = fh.read()
ignore_exec_bit = _IS_WINDOWS
if exec_from_git:
flags &= ~ScriptLibFlags.EXEC_BIT
res = subprocess.run(['git','ls-files','--stage',pth.name], cwd=pth.parent,
encoding='UTF-8', check=True, capture_output=True)
assert not res.returncode and not res.stderr
if m := _git_ls_files_re.fullmatch(res.stdout):
if m.group(2) != pth.name:
raise RuntimeError(f"Unexpected git output, filename mismatch {res.stdout!r}")
if int(m.group(1), 8) & S_IXUSR:
flags |= ScriptLibFlags.EXEC_BIT
else:
raise RuntimeError(f"Failed to parse git output {res.stdout!r} for {pth.name!r}")
ignore_exec_bit = False
shebang_line :str = ''
if source.startswith('#!'):
shebang_line = source[:source.index('\n')]
flags |= ScriptLibFlags.SHEBANG
why_scriptlike :list[str] = []
for node in ast.iter_child_nodes(ast.parse(source, filename=str(pth))):
# If(test=Compare(left=Name(id='__name__', ctx=Load()), ops=[Eq()], comparators=[Constant(value='__main__')])
if (isinstance(node, ast.If) and isinstance(node.test, ast.Compare) # pylint: disable=too-many-boolean-expressions
and isinstance(node.test.left, ast.Name) and node.test.left.id=='__name__' and len(node.test.ops)==1
and isinstance(node.test.ops[0], ast.Eq) and len(node.test.comparators)==1
and isinstance(node.test.comparators[0], ast.Constant) and node.test.comparators[0].value=='__main__'):
flags |= ScriptLibFlags.NAME_MAIN
elif (not isinstance(node, (ast.Import, ast.ImportFrom, ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef,
ast.Assign, ast.AnnAssign, ast.Assert))
# docstring:
and not (isinstance(node, ast.Expr) and isinstance(node.value, ast.Constant) and isinstance(node.value.value, str))):
why_scriptlike.append(f"{type(node).__name__}@L{node.lineno}") # type: ignore[attr-defined]
if why_scriptlike:
flags |= ScriptLibFlags.SCRIPT_LIKE
if flags&ScriptLibFlags.SHEBANG and not ( known_shebangs.fullmatch(shebang_line) if isinstance(known_shebangs, re.Pattern)
else shebang_line in known_shebangs ):
return ScriptLibResult(pth, ResultLevel.WARNING, f"File has unrecognized shebang {shebang_line!r}", flags)
if flags&ScriptLibFlags.NAME_MAIN and flags&ScriptLibFlags.SCRIPT_LIKE:
return ScriptLibResult(pth, ResultLevel.ERROR, "File has `if __name__=='__main__'` and looks like a script due to "
f"{', '.join(why_scriptlike)}", flags)
if not flags&ScriptLibFlags.SHEBANG and not flags&ScriptLibFlags.NAME_MAIN and not flags&ScriptLibFlags.SCRIPT_LIKE:
# looks like a normal library
if flags&ScriptLibFlags.EXEC_BIT:
return ScriptLibResult(pth, ResultLevel.ERROR, "File looks like a library but exec bit is set", flags)
return ScriptLibResult(pth, ResultLevel.INFO, "File looks like a normal library", flags)
if not flags&ScriptLibFlags.NAME_MAIN and not flags&ScriptLibFlags.SCRIPT_LIKE:
assert flags&ScriptLibFlags.SHEBANG
return ScriptLibResult(pth, ResultLevel.ERROR, f"File has shebang{' and exec bit' if flags&ScriptLibFlags.EXEC_BIT else ''} "
"but seems to be missing anything script-like", flags)
assert (flags&ScriptLibFlags.NAME_MAIN or flags&ScriptLibFlags.SCRIPT_LIKE
) and not (flags&ScriptLibFlags.NAME_MAIN and flags&ScriptLibFlags.SCRIPT_LIKE) # xor
if (flags & ScriptLibFlags.EXEC_BIT or ignore_exec_bit) and flags&ScriptLibFlags.SHEBANG:
if flags&ScriptLibFlags.SCRIPT_LIKE:
return ScriptLibResult(pth, ResultLevel.NOTICE, "File looks like a normal script (but could use `if __name__=='__main__'`)", flags)
return ScriptLibResult(pth, ResultLevel.INFO, "File looks like a normal script", flags)
missing = ([] if flags & ScriptLibFlags.EXEC_BIT or ignore_exec_bit else ['exec bit']) + ([] if flags & ScriptLibFlags.SHEBANG else ['shebang'])
assert missing
why :str = ', '.join(why_scriptlike) if flags&ScriptLibFlags.SCRIPT_LIKE else "`if __name__=='__main__'`"
return ScriptLibResult(pth, ResultLevel.ERROR, f"File looks like a script (due to {why}) but is missing {' and '.join(missing)}", flags)
[docs]
def main() -> None:
"""Command-line interface for :func:`check_script_vs_lib`.
If the module and script have been installed correctly, you should be able to run ``py-check-script-vs-lib -h`` for help."""
parser = argparse.ArgumentParser(description='Check Python Scripts vs. Libraries')
parser.add_argument('-v', '--verbose', help="be verbose", action="store_true")
parser.add_argument('-n', '--notice', help="show notices and include in issue count", action="store_true")
parser.add_argument('-g', '--exec-git', help="get the exec bit from git", action="store_true")
parser.add_argument('paths', help="the paths to check (directories searched recursively)", nargs='*')
#TODO Later: Add an option to add known shebang lines
args = parser.parse_args()
issues :int = 0
for path in cmdline_rglob(autoglob(args.paths)):
if not path.is_file() or not path.suffix.lower()=='.py':
continue
result = check_script_vs_lib(path, exec_from_git=args.exec_git)
if result.level>=ResultLevel.WARNING or args.verbose or args.notice and result.level>=ResultLevel.NOTICE:
print(f"{result.level.name} {result.path}: {result.message}")
if result.level>=ResultLevel.WARNING or args.notice and result.level>=ResultLevel.NOTICE:
issues += 1
parser.exit(issues)