From 6f50423eaae06421aa6e9bd1976491f29b1466e3 Mon Sep 17 00:00:00 2001 From: Loek Le Blansch Date: Tue, 6 Jan 2026 07:50:30 +0100 Subject: 1.0 release --- .readthedocs.yaml | 6 +- doc/api/index.rst | 48 +++++- doc/dev/index.rst | 129 ++++++++++++--- doc/dev/processor.rst | 100 ++++++----- doc/user/index.rst | 35 ++-- patchtree/__init__.py | 3 +- patchtree/cli.py | 39 +++-- patchtree/config.py | 121 +++----------- patchtree/context.py | 447 ++++++++++++++++++++++++++++---------------------- patchtree/diff.py | 47 +++++- patchtree/fs.py | 171 +++++++++++++++++++ patchtree/header.py | 89 ++++++++++ patchtree/patch.py | 75 --------- patchtree/process.py | 256 ++++++++++++++++------------- patchtree/spec.py | 38 +++++ patchtree/target.py | 84 ++++++++++ pyproject.toml | 5 +- 17 files changed, 1102 insertions(+), 591 deletions(-) create mode 100644 patchtree/fs.py create mode 100644 patchtree/header.py delete mode 100644 patchtree/patch.py create mode 100644 patchtree/spec.py create mode 100644 patchtree/target.py diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 99f1990..3316308 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,5 +7,7 @@ build: sphinx: configuration: doc/conf.py python: - install: - - requirements: doc/requirements.txt + install: + - method: pip + path: . + - requirements: doc/requirements.txt diff --git a/doc/api/index.rst b/doc/api/index.rst index feb1740..d200f6f 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -1,19 +1,61 @@ .. _api: +============= API reference ============= +--------------------------- +``patchtree.config`` module +--------------------------- + .. automodule:: patchtree.config :members: -.. automodule:: patchtree.process +---------------------------- +``patchtree.context`` module +---------------------------- + +.. automodule:: patchtree.context :members: +------------------------- +``patchtree.diff`` module +------------------------- + .. automodule:: patchtree.diff :members: -.. automodule:: patchtree.patch +------------------------------ +``patchtree.fs`` module +------------------------------ + +.. automodule:: patchtree.fs :members: -.. automodule:: patchtree.context +------------------------------ +``patchtree.header`` module +------------------------------ + +.. automodule:: patchtree.header + :members: + +---------------------------- +``patchtree.process`` module +---------------------------- + +.. automodule:: patchtree.process + :members: + +------------------------------ +``patchtree.spec`` module +------------------------------ + +.. automodule:: patchtree.spec + :members: + +--------------------------- +``patchtree.target`` module +--------------------------- + +.. automodule:: patchtree.target :members: diff --git a/doc/dev/index.rst b/doc/dev/index.rst index 523e6a9..de8b996 100644 --- a/doc/dev/index.rst +++ b/doc/dev/index.rst @@ -1,3 +1,13 @@ +.. raw:: html + + + + ############## Developer docs ############## @@ -46,6 +56,9 @@ Nomenclature patchset A set of files that describe how to change the target source tree(s), placed in the same structure as the original source tree. + patchspec + A YAML specification that configures how patchtree should handle (other) inputs. + patch A single file (usually ``.patch`` or ``.diff``) that lists changes to make to one specific target directory. @@ -56,7 +69,7 @@ Building clean patches In order to generate clean patches files, patchtree needs * the (original) :term:`target` source tree (either as a folder or .zip file) -* a set of :term:`inputs` +* a set of :term:`inputs` (i.e. the patchset) The basic syntax of the patchtree CLI is as follows: @@ -67,36 +80,114 @@ The basic syntax of the patchtree CLI is as follows: By default, the resulting patch is written to the standard output. This behavior, along with many other default behaviors can be changed through the command-line arguments (see ``--help``) or the `configuration file `_. -************************ -Writing patchset sources -************************ +.. _patchspec: + +*********************** +Writing patchset inputs +*********************** -Each patchset source file is compared to the target source file of the same name, and the resulting diff is output in the clean patch. +Each patchset input file is compared using difflib's unified_diff algorithm to the target source file of the same name, and the resulting diff is output in the clean patch. This means that the default behavior of files placed in the patchset is to add or replace any file in the target source tree. +Because most of the time only small adjustments have to be made to the target sources, patchtree implements mechanisms for semantically describing what changes should be applied. +This is achieved by writing a :term:`patchspec`, which is located either at the start of a patchset input file or as a standalone .yml file in the patchset tree. -Because most of the time only small adjustments have to be made to the target sources, patchtree uses so-called processors. -Every patchset source is first processed by 0 or more processors, which transform the input's content before it is compared to the target file's content. -This mechanism allows you to describe changes *semantically* so they can apply to multiple versions of-- or variations in the target. +.. code-block:: yaml + :caption: Patchspec header + + --- !patchspec + foo: bar + ... + + remaining file content + +.. note:: + + While patchspec headers look similar to `front matter `_, they are actually regular YAML documents (and must therefore be delimited from the remaining file content using ``...`` instead of ``---``). + Any valid YAML document at the beginning of a file which explicitly defines a patchspec (using the ``!patchspec`` tag) is processed. + If a YAML file is present in the patchset but does not define a patchspec, it will be treated as any other text file! + +.. code-block:: yaml + :caption: Standalone patchspec + + !patchspec + foo: bar + +The output from standalone patchspecs are compared against the file in the target of the same name **after the .yml/.yaml extension is removed**. +The file extensions trimmed from standalone patchspecs can be configured using the `configuration file `_'s :any:`patchspec_extensions ` value. +Note that a file must both have an extension from the aforementioned list as well as contain a valid patchspec in order to be handled as non-literal input. + +========= +YAML tags +========= + +In addition to the default tags supported by `PyYAML `_, patchtree supports the following tags: +.. list-table:: + :header-rows: 1 + + * - Name + - YAML type + - Description + * - ``!patchspec`` + - mapping + - Provides patchspec type and value validation and is required in order to make patchtree use the patchspec. + * - ``!target`` + - scalar + - Input specification for a file in the target. + If no filename is passed to this tag, the current filename is used. + Creates a :any:`TargetFileInputSpec`. + * - ``!input`` + - scalar + - Input specification for a file in the patchset (relative to current file). + If no filename is passed to this tag, the current filename is used. + Creates a :any:`PatchsetFileInputSpec`. + + .. note:: + + Files specified using ``!input`` are no longer treated as a literal (unprocessed) input and will not be compared to a target file of the same name. + + +================ +Patchspec format +================ + +The patchspec format currently takes the following keys: + +.. code-block:: yaml + + !patchspec + # List of processors to apply (in order defined) + processors: list[Processor]() + + +.. _patchspec_tags: + +========== Processors ========== -Processors are indicated using the hash symbol (``#``) in the filename, and each process the input's contents in a chain. -Processors may optionally take argument(s) separated by a comma (``,``), and arguments may optionally take a value delimited by an equal sign (``=``) -After processing, the resulting file content is compared to the target source's content using difflib's unified_diff algorithm. +Every patchset source is first processed by 0 or more processors, which transform the input's content before it is compared to the target file's content. +This mechanism allows you to describe changes *semantically* so they can apply to multiple versions of-- or variations in the target. + +Each processor takes an input and output similar to the standard input/output of command-line processes. +The only difference with patchtree is that the input/output is file-based, and carries metadata about the file's mode and whether it is binary or not. +The processors can be thought of as piped commands in the sense that (by default) each processor's input is either (a) the content of the patchset file for the first processor, or (b) the output received from the previous processor. +Some processors also take secondary input(s), usually under the name *target*. -For example: +For each process, the input and/or target can be manually specified using an *input spec*. +The input spec's default value is to use the output of the previous processor. +It can also be set to a constant string (inside the YAML patchspec) or an existing file inside the patchset or target using YAML tags (see :ref:`patchspec_tags`). -.. code:: none +As an example, the following patchspec will set the mode of the file it is named as (with an added .yml extension) to 755 (executable): - sdk/middleware/adapters/src/ad_crypto.c#cocci#jinja - \_____________________________________/\__________/ - target source file path processors +.. code-block:: yaml -In the above example, the input is first processed by :ref:`jinja `, and the resulting file content is piped into :ref:`Coccinelle ` as if a file with the output from jinja existed under the name ``ad_crypto.c#cocci``. -Coccinelle will in this case output a modified copy of ``ad_crypto.c``, which will be compared to the original to produce the diff for this file. + !patchspec + processors: + - id: touch + mode: 0755 -The processors included with patchtree are documented on the :ref:`processors` page. +The processors included with patchtree, including any configuration options they take, are documented on the :ref:`processors` page. Custom processors can be created by inheriting from the base :any:`Process` class and registering through the `configuration file `_'s :any:`processors ` value. .. _ptconfig: diff --git a/doc/dev/processor.rst b/doc/dev/processor.rst index 018d216..640f44c 100644 --- a/doc/dev/processor.rst +++ b/doc/dev/processor.rst @@ -5,25 +5,28 @@ Processors ########## This page lists all built-in processor types along with descriptions of what they do and which options they take. -On this page, **output** refers to what the processor returns, while **input** refers to how the processor treats its input. -This input is either (a) the content of the patchset file for the first processor, or (b) the output received from the previous processor. -**Arguments** are any options explicitly given to the processor through the filename, e.g. ``filename#processor,arg,arg2=value,arg3#processor2``. -Note that some processors may take positional arguments, while others may use key/value based options instead. -.. _process_id: +.. _process_touch: -******** -Identity -******** +***** +Touch +***** -The identity processor is used to "touch" files, change the mode of existing files, or add arbitrary identifiers to patchset source filenames by passing arbitrary arguments. +The touch processor is used to create files (similar to ``touch``) or change the mode of existing files (similar to ``chmod``). -:Class: :any:`ProcessIdentity` -:Identifier: ``id`` -:Input: Ignored. +:Class: :any:`TouchProcess` +:Input: Content of new file. :Output: - A file with the *content* of the target file and *mode* of the patchset input. -:Arguments: Any arguments passed to this processor are ignored. + The input file as-is, with the following exceptions: + + - If the input is empty, it will be set to an empty (but existent) file. + - If the ``mode`` option is set, it will be used instead of the input mode. +:Options: + .. code-block:: yaml + + - id: "touch" + input: ProcessInputSpec(required=False) + mode: int(required=False, default=) .. _process_cocci: @@ -37,11 +40,17 @@ The Coccinelle processor uses Coccinelle to apply patch(es) in the SmPL (Semanti In order to use this processor, Coccinelle must be installed and ``spatch`` must be available in ``$PATH``. -:Class: :any:`ProcessCoccinelle` -:Identifier: ``cocci`` +:Class: :any:`CoccinelleProcess` :Input: Coccinelle's SmPL input. -:Output: The contents of the target file after being processed by Coccinelle (not the diff returned by Coccinelle). -:Arguments: Reserved. +:Target: File content to apply patch to (current patchspec's target file by default). +:Output: The content of the target file after being processed by Coccinelle (not the diff returned by Coccinelle). +:Options: + .. code-block:: yaml + + - id: "cocci" + input: ProcessInputSpec(required=False) + target: ProcessInputSpec(required=False) + .. _process_jinja: @@ -51,16 +60,19 @@ Jinja template The Jinja processor passes the input through the Jinja2 templating engine. -:Class: :any:`ProcessJinja2` -:Identifier: ``jinja`` +:Class: :any:`Jinja2Process` :Input: Jinja template code. :Output: The input after being processed by Jinja. -:Arguments: Reserved. +:Options: + .. code-block:: yaml + + - id: "jinja" + input: ProcessInputSpec(required=False) .. note:: - Template variables are generated through the :any:`get_template_vars ` method. - This method returns an empty dict by default, and is meant to be implemented by implementing a custom class that derives from ProcessJinja2 and registering it through the :ref:`configuration file `. + Template variables are generated through the :any:`get_template_vars ` method. + This method returns an empty dict by default, and is meant to be implemented by implementing a custom class that derives from :any:`Jinja2Process` and registering it through the :ref:`configuration file `. .. _process_exe: @@ -68,19 +80,21 @@ The Jinja processor passes the input through the Jinja2 templating engine. Executable ********** -The executable processor runs the input as an executable, passes the target file to its standard input, and returns its standard output. +The executable processor passes its input to an executable and returns its standard output. -:Class: :any:`ProcessExec` -:Identifier: ``exec`` -:Input: - Executable script. +:Class: :any:`ExecProcess` +:Input: Input passed to the standard input of the command. +:Output: Any content written to the standard output by the executable. +:Options: + .. code-block:: yaml - .. important:: + - id: "exec" + cmd: str() | list[str]() + input: ProcessInputSpec(required=False) - The executable *must* contain a shebang line to specify what interpreter to use. + .. note:: -:Output: Any content written to the standard output by the executable. -:Arguments: Reserved. + If the ``cmd`` option is a string, it is split **using shell syntax rules**. .. _process_merge: @@ -90,13 +104,21 @@ Merge The merge processor merges the input with the target file, such that changes are combined with the target instead of replacing the target. -:Class: :any:`ProcessMerge` -:Identifier: ``merge`` -:Input: Content to merge. +:Class: :any:`MergeProcess` +:Input: Content to merge (A). +:Target: Content to merge (B). :Output: Merged changes. -:Arguments: Positional. +:Options: + .. code-block:: yaml + + - id: "merge" + strategy: enum() # see below + input: ProcessInputSpec(required=False) + target: ProcessInputSpec(required=False) - 1. Merge strategy: +========== +Strategies +========== - ``ignore`` - Appends all lines from the input to the output excluding any lines already present in the output. +``ignore`` + Appends all lines from *input* to *target*, excluding any lines already present in *target*. diff --git a/doc/user/index.rst b/doc/user/index.rst index fdca7bd..13e61bf 100644 --- a/doc/user/index.rst +++ b/doc/user/index.rst @@ -4,26 +4,28 @@ User docs This page contains information useful to people who want to use the .patch files generated by patchtree. -.. note:: - - By convention, the patch file should be placed in the root of the target directory under the filename ``.patchtree.diff``. - This allows you to easily revert and/or upgrade the patch later. - -.. important:: - - If you keep the target directory under version control, make sure the repository root is the same as the root of the patch's target directory. - Patches produced by patchtree contain *extended header lines* which are be interpreted by ``git apply``. - Because these header lines must include the path to each modified file relative to the repository root, any files which don't exist at the expected location will be skipped silently by ``git``. +.. _user_apply: **************** Applying a patch **************** -To apply patches output by patchtree, download the ``.patch`` file and place it in the directory where it should apply the changes under the name ``.patchtree.diff``. +#. Download the ``.patch`` file for your source tree and place it in the directory where it should apply +#. Rename the downloaded file to ``.patchtree.diff`` +#. Open Git bash (on Windows) or any shell (if on \*NIX) in the directory with the patch +#. Mark the patch as executable by running:: + + $ chmod +x .patchtree.diff + +#. Apply the patch by running:: + + $ ./.patchtree.diff -To apply the patch, run the following command in the target directory:: +If all goes well, this command should produce no output and the source files will have been updated. +In case the patch fails to apply without producing any output, the above command can be ran with the ``-v`` option to produce verbose output. +Any command-line options recognized by git-apply(1) can also be used. - $ git apply .patchtree.diff +.. _user_revert: ***************** Reverting a patch @@ -31,7 +33,7 @@ Reverting a patch To revert the changes of a patch, run the following command in the target directory:: - $ git apply --reverse .patchtree.diff + $ ./.patchtree.diff --reverse ***************** Upgrading a patch @@ -39,6 +41,5 @@ Upgrading a patch Upgrading a patch consists of -#. reverting the current (old) patch -#. downloading and replacing the ``.patchtree.diff`` file with the new patch -#. reapplying the patch file +#. `Reverting the current (old) patch `_ +#. Repeating the steps from :ref:`user_apply` using the new patch file diff --git a/patchtree/__init__.py b/patchtree/__init__.py index 2e304a6..68d2078 100644 --- a/patchtree/__init__.py +++ b/patchtree/__init__.py @@ -1,4 +1,5 @@ from .config import Config, Header from .diff import Diff, File from .context import Context -from .process import ProcessCoccinelle, ProcessJinja2, ProcessIdentity, ProcessExec, ProcessMerge +from .header import Header +from .process import * diff --git a/patchtree/cli.py b/patchtree/cli.py index 1c38d30..7938974 100644 --- a/patchtree/cli.py +++ b/patchtree/cli.py @@ -2,6 +2,8 @@ from dataclasses import fields from sys import stderr from pathlib import Path from argparse import ArgumentTypeError +from logging import basicConfig as log_config, addLevelName as log_add_level_name +from logging import DEBUG, INFO, WARNING, ERROR, CRITICAL from .context import Context from .config import Config @@ -62,8 +64,8 @@ def parse_arguments(config: Config) -> Context: type=int, ) parser.add_argument( - "-s", - "--shebang", + "-S", + "--no-shebang", help="output shebang in resulting patch", action="store_true", ) @@ -100,8 +102,8 @@ def parse_arguments(config: Config) -> Context: if options.context is not None: config.diff_context = options.context - if options.shebang: - config.output_shebang = True + if options.no_shebang: + config.no_shebang = True if len(options.patch) == 0: options.patch = config.default_patch_sources @@ -109,25 +111,30 @@ def parse_arguments(config: Config) -> Context: try: return config.context(config, options) except Exception as e: - parser.error(str(e)) + raise e def main(): + log_add_level_name(DEBUG, "DBG") + log_add_level_name(INFO, "INF") + log_add_level_name(WARNING, "WRN") + log_add_level_name(ERROR, "ERR") + log_add_level_name(CRITICAL, "CRT") + log_config( + stream=stderr, + level=WARNING, + format="[%(levelname)s] %(message)s", + ) + config = load_config() context = parse_arguments(config) - if len(context.inputs) == 0: - print("no files to patch!", file=stderr) - return 0 - - config.header(config, context) - - for file in context.inputs: - patch = config.patch(config, file) - patch.write(context) - - context.close() + if context.in_place: + context.apply(True) + context.apply(False) + else: + context.write() return 0 diff --git a/patchtree/config.py b/patchtree/config.py index a4d0912..9c46740 100644 --- a/patchtree/config.py +++ b/patchtree/config.py @@ -2,120 +2,46 @@ from __future__ import annotations from dataclasses import dataclass, field from argparse import ArgumentParser -from importlib import metadata from pathlib import Path from .context import Context -from .patch import Patch +from .target import Target +from .header import Header from .process import * from .diff import * DEFAULT_PROCESSORS: dict[str, type[Process]] = { - "id": ProcessIdentity, - "cocci": ProcessCoccinelle, - "jinja": ProcessJinja2, - "exec": ProcessExec, - "merge": ProcessMerge, + "touch": TouchProcess, + "cocci": CoccinelleProcess, + "jinja": Jinja2Process, + "exec": ExecProcess, + "merge": MergeProcess, } -class Header: - """ - Patch output header generator. - - The header is formatted as - - * shebang (optional) - * patchtree version info - * extra version info (empty by default) - * license (empty by default) - """ - - config: Config - context: Context - - name = "patchtree" - """Program name shown in version info.""" - - license = None - """License text (optional).""" - - def __init__(self, config: Config, context: Context): - self.config = config - self.context = context - - self.write_shebang() - self.write_version() - self.write_version_extra() - self.write_license() - - def write_shebang(self): - """ - Write a shebang line to apply the output patch if the --shebang option was passed. - """ - - if not self.config.output_shebang: - return - - cmd = ["/usr/bin/env", "-S", *self.context.get_apply_cmd()] - cmdline = " ".join(cmd) - self.context.output.write(f"#!{cmdline}\n") - - def write_version(self): - """ - Write the patchtree name and version number. - """ - - version = metadata.version("patchtree") - self.context.output.write(f"{self.name} output (version {version})\n") - - def write_version_extra(self): - """ - Write extra version information (empty). - - This method is meant to be implemented by subclasses of Header defined in the ptconfig.py of - patchsets. - """ - - pass - - def write_license(self): - """ - Write a license if it is defined. - """ - - if self.license is None: - return - self.context.output.write(f"{self.license}\n") - - @dataclass class Config: """ Configuration dataclass. - This class contains all configuration options read from the :ref: `configuration file `. + This class contains all configuration options read from the :ref:`configuration file `. """ context: type[Context] = Context - """Context class type. Override this to add custom context variables.""" + """Context class type.""" - patch: type[Patch] = Patch - """Patch class type.""" + target: type[Target] = Target + """Target class type.""" argument_parser: type[ArgumentParser] = ArgumentParser - """ArgumentParser class type. Override this to add custom arguments.""" + """ArgumentParser class type.""" - process_delimiter: str = "#" - """ - String used to delimit processors in patch source filenames. - - See: :ref:`processors`. - """ + header: type[Header] = Header + """Header class type.""" processors: dict[str, type[Process]] = field(default_factory=lambda: DEFAULT_PROCESSORS) """ - Maps processor specification string to :type:`Process` class type. + Maps processor IDs to :type:`Process` class type (see :ref:`processors`). .. note:: @@ -123,20 +49,23 @@ class Config: with the configuration file keys taking priority. """ - header: type[Header] = Header - """Header class type. Override this to modify the patch header format.""" - diff_context: int = 3 """Lines of context to include in the diffs.""" - output_shebang: bool = False - """Whether to output a shebang line with the ``git patch`` command to apply the patch.""" + no_shebang: bool = False + """Whether to suppress the shebang line with the ``git patch`` command to apply the patch.""" default_patch_sources: list[Path] = field(default_factory=list) - """List of default sources.""" + """List of default sources (empty by default).""" - default_root: str | None = None + default_root: Path | None = None """Default value of the ``-C``/``--root`` argument.""" + patchspec_extensions: tuple[str, ...] = ( + ".yaml", + ".yml", + ) + """File extensions removed for standalone patch specifications (see :ref:`patchspec`).""" + def __post_init__(self): self.processors = {**DEFAULT_PROCESSORS, **self.processors} diff --git a/patchtree/context.py b/patchtree/context.py index c2cf33b..e7fb6c2 100644 --- a/patchtree/context.py +++ b/patchtree/context.py @@ -1,174 +1,112 @@ from __future__ import annotations -from typing import TYPE_CHECKING, IO, cast +from typing import TYPE_CHECKING, IO, TextIO, cast + +import yaml from argparse import Namespace from pathlib import Path -from zipfile import ZipInfo, is_zipfile -from tempfile import TemporaryFile -from os import path -from sys import stdout, stderr +from zipfile import is_zipfile +from os import path, environ +from sys import stdout from subprocess import run -from zipfile import ZipFile -from stat import S_IFDIR, S_IFREG - -if TYPE_CHECKING: - from .config import Config - -ZIP_CREATE_SYSTEM_UNX = 3 - - -class FS: - """Target filesystem interface.""" - - target: Path - - def __init__(self, target: Path): - self.target = target - - def get_dir(self, dir: str) -> list[str]: - """ - List all items in a subdirectory of the target. - - :returns: A list of all item names. - """ - - raise NotImplementedError() - - def get_content(self, file: str) -> bytes | str | None: - """ - Get the content of a file relative to the target. - - :returns: - * The file content if it exists. - * None if the file does not exist. - """ - - raise NotImplementedError() - - def get_mode(self, file: str) -> int: - """ - Get the mode of a file relative to the target. - - :returns: - * The mode as returned by stat(3)'s ``stat.st_mode`` - * 0 if the file does not exist - """ +from logging import Logger, getLogger as get_logger - raise NotImplementedError() +from yaml.events import DocumentEndEvent +from yaml.nodes import MappingNode, ScalarNode +from .diff import File +from .spec import ( + LiteralInputSpec, + PatchsetFileInputSpec, + ProcessInputSpec, + TargetFileInputSpec, +) +from .target import Target +from .fs import FS, PERM_EXEC, DiskFS, ZipFS, MODE_FILE -class DiskFS(FS): - """Implementation of :any:`FS` for a regular directory. Reads directly from the disk.""" - - def __init__(self, target): - super(DiskFS, self).__init__(target) - - def get_dir(self, dir): - here = self.target.joinpath(dir) - return [path.name for path in here.iterdir()] - - def get_content(self, file): - here = self.target.joinpath(file) - if not here.exists(): - return None - bytes = here.read_bytes() - try: - return bytes.decode() - except: - return bytes - - def get_mode(self, file): - here = self.target.joinpath(file) - if not here.exists(): - return 0 - return here.stat().st_mode - - -class ZipFS(FS): - """Implementation of :any:`FS` for zip files. Reads directly from the archive.""" - - zip: ZipFile - """Underlying zip file.""" +if TYPE_CHECKING: + from .config import Config, Header - files: dict[Path, ZipInfo] = {} - """Map of path -> ZipInfo for all files in the archive.""" - def __init__(self, target): - super(ZipFS, self).__init__(target) - self.zip = ZipFile(str(target)) - for info in self.zip.infolist(): - self.files[Path(info.filename)] = info - # todo: index implicit directories in tree +class PatchspecLoader(yaml.Loader): + """ + YAML loader for patch specifications. + """ - def get_info(self, path: str) -> ZipInfo | None: + patch: File + """File content to parse.""" + + input: Path + """Path to file content.""" + + context: Context + """Parent context.""" + + input_specs: list[ProcessInputSpec] = [] + """List of inputs used by this patch specification.""" + + @staticmethod + def tag_file_target(loader: PatchspecLoader, node: ScalarNode) -> TargetFileInputSpec: + target_root = loader.input.parent.relative_to(loader.context.root) + path = loader.construct_scalar(node) + if len(path) == 0: + path = loader.input.name + spec = TargetFileInputSpec(path=target_root.joinpath(Path(path))) + loader.input_specs.append(spec) + return spec + + @staticmethod + def tag_file_input(loader: PatchspecLoader, node: ScalarNode) -> PatchsetFileInputSpec: + input_root = loader.input.parent + path = loader.construct_scalar(node) + if len(path) == 0: + path = loader.input.name + spec = PatchsetFileInputSpec(path=input_root.joinpath(Path(path))) + loader.input_specs.append(spec) + return spec + + @staticmethod + def tag_patchspec(loader: PatchspecLoader, node: MappingNode) -> Target: + data = loader.construct_mapping(node, deep=True) + target_cls = loader.context.config.target + target = target_cls( + loader.context, loader.input.relative_to(loader.context.root), data + ) + target.inputs += loader.input_specs + return target + + def __init__(self, context: Context, patch: File, input: Path): + self.context = context + self.patch = patch + self.input = input + + super().__init__(self.patch.get_str()) + + self.add_constructor("!patchspec", self.tag_patchspec) + self.add_constructor("!target", self.tag_file_target) + self.add_constructor("!input", self.tag_file_input) + + def parse(self) -> Target: """ - Get the ZipInfo for a file in the archive + Read the provided patch content and return a valid Target. - :returns: - * The ZipInfo for the file at ``path`` - * None if the file does not exist + This method will raise an exception if the provided input is not a patchspec. + This method also removes the patchspec YAML header from the input patch file's content + if a valid patchspec was read. """ - - return self.files.get(Path(path), None) - - def get_dir(self, dir): - items: set[str] = set() - dir = path.normpath("/" + dir) - for zip_dir in self.zip.namelist(): - zip_dir = path.normpath("/" + zip_dir) - if not zip_dir.startswith(dir): - continue - if zip_dir == dir: - continue - relative = path.relpath(zip_dir, dir) - top_level = relative.split("/")[0] - items.add(top_level) - return list(items) - - def get_content(self, file): - info = self.get_info(file) - if info is None: - return None - bytes = self.zip.read(info) try: - return bytes.decode() - except: - return bytes - - def is_implicit_dir(self, file: str) -> bool: - """ - Check if there is an implicit directory at ``file``. - - Some zip files may not include entries for all directories if they already define entries for files or - subdirectories within. This function checks if any path that is a subdirectory of ``file`` exists. - - :returns: ``True`` if there is a directory at ``file``, else ``False``. - """ + data = self.get_data() + if not isinstance(data, Target): + raise Exception("provided yaml is not a patchspec") - parent = Path(file) - for child in self.files: - if parent in child.parents: - return True - return False + # strip frontmatter from input content if it exists + events = yaml.parse(self.patch.get_str()) + end = next(ev for ev in events if isinstance(ev, DocumentEndEvent)) + self.patch.content = self.patch.get_str()[end.end_mark.index :].lstrip() - def get_mode(self, file): - MODE_NONEXISTANT = 0 - MODE_FILE = 0o644 | S_IFREG - MODE_DIR = 0o755 | S_IFDIR - - info = self.get_info(file) - if info is None: - # if self.is_implicit_dir(file): - # return MODE_DIR - return MODE_NONEXISTANT - - if info.create_system == ZIP_CREATE_SYSTEM_UNX: - return (info.external_attr >> 16) & 0xFFFF - - if info.is_dir(): - return MODE_DIR - - return MODE_FILE + return data + finally: + self.dispose() class Context: @@ -186,45 +124,51 @@ class Context: The ``root`` member only changes the appearance of paths. All internal logic uses the "real" paths. """ - target: Path - """Path to target.""" + header: Header + """Patch header instance.""" + + content: list[Target] = [] + """Patch targets (content).""" - fs: FS + target_fs: FS """Target file system interface.""" - output: IO - """Output stream for writing the clean patch.""" + patchset_fs: FS + """Target file system interface.""" in_place: bool """Whether to apply the changes directly to the target instead of outputting the .patch file.""" config: Config + """Configuration class instance.""" + + log: Logger + """Global log instance reference.""" + + is_empty: bool = False + """Whether the output patch delta does not include any changes. Updated by :any:`make_patch`.""" + + output: IO + """Output IO stream used to write output patch to.""" def __init__(self, config: Config, options: Namespace): self.config = config + self.log = get_logger(self.__class__.__name__) self.root = options.root - self.target = options.target self.in_place = options.in_place - self.inputs = self.collect_inputs(options) - self.fs = self.get_fs() - self.output = self.get_output(options) + # NOTE: this should NOT be options.root because input filenames are treated as relative + # to the working directory by default (i.e. --root applies *after* the inputs are + # collected) + self.patchset_fs = DiskFS(Path(".")) + self.target_fs = self._get_target_fs(options.target) - if self.in_place: - self.apply(True) - - def close(self): - """Finish writing the clean patch file and close it.""" - - # patch must have a trailing newline - self.output.write("\n") - self.output.flush() - - if self.in_place: - self.apply(False) + self.inputs = self.collect_inputs(options) + self.content = self.collect_targets(self.inputs) - self.output.close() + self.output = self._get_output(options) + self.header = config.header(config, self) def collect_inputs(self, options: Namespace) -> list[Path]: """ @@ -253,26 +197,101 @@ class Context: inputs.add(path) return list(inputs) - def get_dir(self, dir: str) -> list[str]: - """Get a target directory's content (see :any:`FS.get_dir()`)""" - return self.fs.get_dir(dir) + def create_target(self, input: Path, meta_inputs: list[Path] = []) -> Target: + """Create a target instance from an input path.""" + file = input.relative_to(self.root) + target: Target | None = None + patch = self.patchset_fs.get_file(PatchsetFileInputSpec(path=input)) + target_cls = self.config.target - def get_content(self, file: str) -> bytes | str | None: - """Get a target file's content (see :any:`FS.get_content()`)""" - return self.fs.get_content(file) + try: + patch.get_str() + except: + # binary files can't be patchspecs + target = target_cls(self, file) + + # if the input is a yaml file, try to load it + if target is None and input.suffix in self.config.patchspec_extensions: + try: + loader = PatchspecLoader(self, patch, input.parent.joinpath(input.stem)) + target = loader.parse() + self.log.info(f"found direct yaml patchspec: {input}") + except Exception as e: + self.log.error(f"while parsing patchspec for {input}: {e}") + raise e + + # try to load any frontmatter if we still don't have a target + if target is None: + try: + loader = PatchspecLoader(self, patch, input) + target = loader.parse() + self.log.info(f"found frontmatter patchspec: {input}") + except Exception as e: + # exceptions while parsing frontmatter can be ignored silently since not all + # files will have them + target = None + + if target is None: + self.log.info(f"treating as literal input: {input}") + target = target_cls(self, file) + + target.patch = patch + + for input in (i.path for i in target.inputs if isinstance(i, PatchsetFileInputSpec)): + meta_inputs.append(input) + + return target + + def collect_targets(self, inputs: list[Path]) -> list[Target]: + """ + Create a list of targets and automatically resolve any patchspec naming conflicts. - def get_mode(self, file: str) -> int: - """Get a target file's mode (see :any:`FS.get_mode()`)""" - return self.fs.get_mode(file) + This function creates a list of targets from the input paths, and ensures no standalone + patchspecs or files referenced as inputs by any patchspecs are still treated as literal + inputs. - def get_fs(self) -> FS: + :returns: + List of targets to process for final clean patch. + """ + + meta_inputs: set[Path] = set() + targets: dict[Path, Target] = {} + + for input in inputs: + meta = [] + targets[input] = self.create_target(input, meta) + meta_inputs.update(meta) + + missing = meta_inputs - set(inputs) + if len(missing) > 0: + for input in missing: + self.log.error(f"{str(input)} referenced by patchspec but not in inputs") + raise Exception("missing files") + + # files referenced as meta inputs shouldn't be treated as verbatim files + for key in meta_inputs: + if key not in targets: + continue + del targets[key] + + return sorted(targets.values(), key=lambda target: target.file) + + def get_file(self, spec: ProcessInputSpec) -> File: + if isinstance(spec, LiteralInputSpec): + return File(content=spec.content, mode=MODE_FILE) + elif isinstance(spec, TargetFileInputSpec): + return self.target_fs.get_file(spec) + elif isinstance(spec, PatchsetFileInputSpec): + return self.patchset_fs.get_file(spec) + + raise Exception(f"unable to read file: {spec}") + + def _get_target_fs(self, target: Path) -> FS: """ Open the selected target, taking into account the --in-place option. :returns: Target filesystem interface. """ - target = self.target - if not target.exists(): raise Exception(f"cannot open `{target}'") @@ -284,18 +303,18 @@ class Context: raise Exception("cannot edit zip in-place!") return ZipFS(target) - raise Exception("cannot read `{target}'") + raise Exception(f"cannot read `{target}'") - def get_output(self, options: Namespace) -> IO: + def _get_output(self, options: Namespace) -> IO: """ Open the output stream, taking into account the --in-place and --out options. :returns: Output stream. """ - if self.in_place: + if options.in_place: if options.out is not None: - print("warning: --out is ignored when using --in-place", file=stderr) - return TemporaryFile("w+") + self.log.warning("--out is ignored when using --in-place") + return TextIO() if options.out is not None: if options.out == "-": @@ -312,28 +331,58 @@ class Context: :returns: Command argument vector. """ - cmd = ["git", "apply", "--allow-empty"] + cmd = ["git", "apply"] + if self.is_empty: + cmd.append("--allow-empty") if self.config.diff_context == 0: cmd.append("--unidiff-zero") return cmd + def make_patch(self) -> str: + """ + Generate a clean patch using the header configuration and deltas from all targets. + + :returns: + Clean patch contents + """ + patch = "" + for target in self.content: + patch += target.write() + + self.is_empty = len(patch) == 0 + + patch = self.header.write() + patch + + # patch must have a trailing newline + patch += "\n" + return patch + def apply(self, reverse: bool) -> None: """ Apply the patch in ``self.output`` and update the cache or reverse the patch in the cache. """ - location = cast(DiskFS, self.fs).target + location = cast(DiskFS, self.target_fs).target cache = location.joinpath(".patchtree.diff") - cmd = self.get_apply_cmd() + cmd = [str(cache.absolute())] if reverse: if not cache.exists(): return cmd.append("--reverse") else: - self.output.seek(0) - patch = self.output.read() + patch = self.make_patch() cache.write_text(patch) + cache.chmod(MODE_FILE | PERM_EXEC) - cmd.append(str(cache.absolute())) run(cmd, cwd=str(location.absolute())) + if reverse: + cache.unlink() + + def write(self) -> None: + """ + Write the clean patch to the selected output and close the output stream. + """ + patch = self.make_patch() + self.output.write(patch) + self.output.close() diff --git a/patchtree/diff.py b/patchtree/diff.py index 4d4a998..f8444e7 100644 --- a/patchtree/diff.py +++ b/patchtree/diff.py @@ -10,10 +10,10 @@ if TYPE_CHECKING: @dataclass class File: - content: str | bytes | None + content: str | bytes | None = None """The file's contents, or ``None`` if it does not exist.""" - mode: int + mode: int = 0 """The file's mode as returned by stat(3)'s ``stat.st_mode``.""" def is_binary(self) -> bool: @@ -22,6 +22,26 @@ class File: """ return isinstance(self.content, bytes) + def get_str(self) -> str: + """ + Get the file content as a string. + + This function raises an Exception if the file is binary. + + :returns: + * An empty string if the file is empty. + * The contents if the file is already open in text mode. + * The system locale decoded representation of the file content. + """ + if self.content is None: + return "" + if isinstance(self.content, bytes): + try: + self.content = self.content.decode() + except Exception: + raise Exception("expected text file instead of binary") + return self.content + def lines(self) -> list[str]: """ Get a list of lines in this file. @@ -34,16 +54,20 @@ class File: This function only works for text files. Use :any:`is_binary` to check this safely. """ - assert not isinstance(self.content, bytes) - return (self.content or "").splitlines() + return self.get_str().splitlines() + + def __repr__(self): + return f"{self.__class__.__name__}(mode={self.mode:06o}, content={repr(self.content)})" class Diff: """ - Produce a regular diff from the (possibly absent) original file to the file in the patch input tree. + Produce a diff between two files. + Either file may be absent, in which case extended header lines understood by ``git apply`` are generated. """ config: Config + file: str """Path to file relative to target dir.""" @@ -60,7 +84,7 @@ class Diff: def compare(self) -> str: """ Generate delta in "git-diff-files -p" format (see - ``_) + ``_). """ a = self.a b = self.b @@ -86,12 +110,17 @@ class Diff: delta += f"new mode {b.mode:06o}\n" if a.content != b.content: - # make sure a file doesn't switch from text to binary or vice versa - assert a.is_binary() == b.is_binary() + binary = False + lines_a = [] + lines_b = [] - if not b.is_binary(): + try: lines_a = a.lines() lines_b = b.lines() + except Exception: + binary = True + + if not binary: diff = unified_diff( lines_a, lines_b, fromfile, tofile, lineterm="", n=self.config.diff_context ) diff --git a/patchtree/fs.py b/patchtree/fs.py new file mode 100644 index 0000000..3563104 --- /dev/null +++ b/patchtree/fs.py @@ -0,0 +1,171 @@ +from pathlib import Path +from zipfile import ZipFile, ZipInfo +from stat import S_IFDIR, S_IFREG +from os import path + +from .spec import FileInputSpec +from .diff import File + +ZIP_CREATE_SYSTEM_UNX = 3 + +MODE_NONEXISTANT = 0 +MODE_FILE = 0o644 | S_IFREG +MODE_DIR = 0o755 | S_IFDIR + +PERM_EXEC = 0o111 + + +class FS: + """Filesystem interface.""" + + target: Path + root: Path = Path() + + def __init__(self, target: Path): + self.target = target + + def get_dir(self, dir: Path) -> list[Path]: + """ + List all items (i.e. file and directories) in a subdirectory. + + :returns: A list of all item names. + """ + + raise NotImplementedError() + + def get_file(self, spec: FileInputSpec) -> File: + """ + Get a :ref:`File` object with the content and mode of a file. + + :returns: A :ref:`File` object representing the content at ``spec`` + """ + return File( + content=self.get_content(spec.path), + mode=self.get_mode(spec.path), + ) + + def get_content(self, file: Path) -> bytes | str | None: + """ + Get the content of a file. + + :returns: + * The file content if it exists. + * None if the file does not exist. + """ + + raise NotImplementedError() + + def get_mode(self, file: Path) -> int: + """ + Get the mode of a file. + + :returns: + * The mode as returned by stat(3)'s ``stat.st_mode`` + * 0 if the file does not exist + """ + + raise NotImplementedError() + + +class DiskFS(FS): + """Implementation of :any:`FS` for a regular directory. Reads directly from the disk.""" + + def __init__(self, target): + super().__init__(target) + + def get_dir(self, dir): + here = self.target.joinpath(self.root, dir) + return list(here.iterdir()) + + def get_content(self, file): + here = self.target.joinpath(self.root, file) + if not here.exists(): + return None + bytes = here.read_bytes() + try: + return bytes.decode() + except: + return bytes + + def get_mode(self, file): + here = self.target.joinpath(self.root, file) + if not here.exists(): + return 0 + return here.stat().st_mode + + +class ZipFS(FS): + """Implementation of :any:`FS` for zip files. Reads directly from the archive.""" + + zip: ZipFile + """Underlying zip file.""" + + files: dict[Path, ZipInfo] = {} + """Map of path -> ZipInfo for all files in the archive.""" + + dirs: set[Path] = set() + """ + List of directories in this archive. + + Some zip files may not include entries for all directories if they already define entries + for files or subdirectories within. This set keeps all known directories. + """ + + def __init__(self, target): + super().__init__(target) + self.zip = ZipFile(str(target)) + for info in self.zip.infolist(): + path = Path(info.filename) + self.files[path] = info + while path.parent != path: + self.dirs.add(path.parent) + path = path.parent + + def get_info(self, path: Path) -> ZipInfo | None: + """ + Get the ZipInfo for a file in the archive + + :returns: + * The ZipInfo for the file at ``path`` + * None if the file does not exist + """ + + path = self.root.joinpath(path) + return self.files.get(path, None) + + def get_dir(self, dir): + items: set[Path] = set() + dir = self.root.joinpath(dir) + for zip_dir in (Path(name) for name in self.zip.namelist()): + if not zip_dir.is_relative_to(dir): + continue + if zip_dir == dir: + continue + relative = zip_dir.relative_to(dir) + items.add(dir.joinpath(relative.parts[0])) + return list(items) + + def get_content(self, file): + info = self.get_info(file) + if info is None: + return None + bytes = self.zip.read(info) + try: + return bytes.decode() + except: + return bytes + + def get_mode(self, file): + info = self.get_info(file) + if info is None: + if file in self.dirs: + return MODE_DIR + return MODE_NONEXISTANT + + if info.create_system == ZIP_CREATE_SYSTEM_UNX: + return (info.external_attr >> 16) & 0xFFFF + + if info.is_dir(): + return MODE_DIR + + return MODE_FILE diff --git a/patchtree/header.py b/patchtree/header.py new file mode 100644 index 0000000..d797ff0 --- /dev/null +++ b/patchtree/header.py @@ -0,0 +1,89 @@ +from __future__ import annotations +from typing import TYPE_CHECKING + +from importlib import metadata +import shlex + +if TYPE_CHECKING: + from .context import Context + from .config import Config + + +class Header: + """ + Patch output header generator. + + The header is formatted as + + #. shebang + #. patchtree version info + #. extra version info (empty by default) + #. license (empty by default) + """ + + config: Config + context: Context + + name = "patchtree" + """Program name shown in version info.""" + + license = None + """License text (optional).""" + + def __init__(self, config: Config, context: Context): + self.config = config + self.context = context + + def write(self) -> str: + return "".join( + ( + self.write_shebang(), + self.write_version(), + self.write_version_extra(), + self.write_license(), + ) + ) + + def write_shebang(self) -> str: + """ + Write a shebang line to apply the output patch unless the ``--no-shebang`` option was passed. + """ + + if self.config.no_shebang: + return "" + + # NOTE: the GIT_DIR environment variable is set in order to allow users to apply the + # .patch file to a target tree already tracked using git (which may be under a + # different directory relative to the repository root, causing the patch to be skipped + # silently). This effectively makes git-apply always behave as if it is outside a git + # tree, while still applying changes described using the extended git diff format. + cmd = ["/usr/bin/env", "-S", "GIT_DIR="] + cmd.append(shlex.join(self.context.get_apply_cmd())) + cmdline = " ".join(cmd) + return f"#!{cmdline}\n" + + def write_version(self) -> str: + """ + Write the patchtree name and version number. + """ + + version = metadata.version("patchtree") + return f"{self.name} output (version {version})\n" + + def write_version_extra(self) -> str: + """ + Write extra version information (empty). + + This method is meant to be implemented by subclasses of Header defined in the :ref:`configuration file `. + """ + + return "" + + def write_license(self) -> str: + """ + Write a license if it is defined. + """ + + if self.license is None: + return "" + return f"{self.license}\n" diff --git a/patchtree/patch.py b/patchtree/patch.py deleted file mode 100644 index 85d056d..0000000 --- a/patchtree/patch.py +++ /dev/null @@ -1,75 +0,0 @@ -from __future__ import annotations -from typing import TYPE_CHECKING - -from pathlib import Path - -from .diff import Diff, File -from .process import Process - -if TYPE_CHECKING: - from .context import Context - from .config import Config - - -class Patch: - """A single patched file.""" - - config: Config - - patch: Path - """The patchset input location.""" - - file: str - """The name of the patched file in the target.""" - - processors: list[tuple[type[Process], Process.Args]] = [] - """A list of processors to apply to the input before diffing.""" - - def __init__(self, config: Config, patch: Path): - self.patch = patch - self.config = config - - self.processors.clear() - self.file, *proc_strs = str(patch).split(config.process_delimiter) - for proc_str in proc_strs: - proc_name, *argv = proc_str.split(",") - args = Process.Args(name=proc_name, argv=argv) - proc_cls = config.processors.get(proc_name, None) - if proc_cls is None: - raise Exception(f"unknown processor: `{proc_cls}'") - for arg in argv: - key, value, *_ = (*arg.split("=", 1), None) - args.argd[key] = value - self.processors.insert(0, (proc_cls, args)) - - def write(self, context: Context) -> None: - """ - Apply all processors, compare to the target and write the delta to :any:`Context.output`. - """ - - if context.root is not None: - self.file = str(Path(self.file).relative_to(context.root)) - - diff = Diff(self.config, self.file) - - diff.a = File( - content=context.get_content(self.file), - mode=context.get_mode(self.file), - ) - - diff.b = File( - content=None, - mode=self.patch.stat().st_mode, - ) - b_content = self.patch.read_bytes() - try: - diff.b.content = b_content.decode() - except: - diff.b.content = b_content - - for cls, args in self.processors: - processor = cls(context, args) - diff.b = processor.transform(diff.a, diff.b) - - delta = diff.compare() - context.output.write(delta) diff --git a/patchtree/process.py b/patchtree/process.py index bd6b802..decc7d1 100644 --- a/patchtree/process.py +++ b/patchtree/process.py @@ -1,61 +1,76 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Callable +from stat import S_IFREG +from typing import TYPE_CHECKING, Any, Callable, Hashable from tempfile import mkstemp from jinja2 import Environment -from subprocess import Popen, run +from subprocess import PIPE, Popen, run from pathlib import Path -from os import fdopen, chmod, unlink -from dataclasses import dataclass, field - +from shlex import split + +from .spec import ( + DefaultInputSpec, + LiteralInputSpec, + ProcessInputSpec, + TargetFileInputSpec, +) from .diff import File if TYPE_CHECKING: - from .context import Context + from .target import Target class Process: """ - Processor base interface. + Process base interface. """ - context: Context + target: Target """Patch file context.""" - @dataclass - class Args: - """ - Processor filename arguments. - - See :ref:`processors`. - """ - - name: str - """The name the processor was called with.""" - argv: list[str] = field(default_factory=list) - """The arguments passed to the processor""" - argd: dict[str, str | None] = field(default_factory=dict) - """The key/value arguments passed to the processor""" - - args: Args - """Arguments passed to this processor.""" - - def __init__(self, context: Context, args: Args): - self.args = args - self.context = context + input_spec: ProcessInputSpec + """Processor ``input`` option (see :ref:`processors`)""" + target_spec: ProcessInputSpec + """Processor ``target`` option (optionally used, see :ref:`processors`)""" + + def __init__(self, target: Target, data: dict[Hashable, Any] = {}): + self.target = target + + if "input" in data: + if isinstance(data["input"], ProcessInputSpec): + self.input_spec = data["input"] + elif isinstance(data["input"], str): + self.input_spec = LiteralInputSpec(content=data["input"]) + else: + raise Exception(f"type error for key input {type(data['input'])}") + del data["input"] + + if "target" in data: + if isinstance(data["target"], ProcessInputSpec): + self.target_spec = data["target"] + elif isinstance(data["target"], str): + self.target_spec = LiteralInputSpec(content=data["target"]) + else: + raise Exception(f"type error for key target {type(data['target'])}") + del data["target"] + + assert target.file is not None + + self.input_spec = getattr(self, "input_spec", DefaultInputSpec()) + self.target_spec = getattr( + self, "target_spec", TargetFileInputSpec(path=Path(target.file)) + ) - def transform(self, a: File, b: File) -> File: + def transform(self) -> File: """ - Transform the input file. + Perform the transformation of this processor. - :param a: Content of file to patch. - :param b: Content of patch input in patch tree or output of previous processor. :returns: Processed file. """ raise NotImplementedError() -class ProcessJinja2(Process): +class Jinja2Process(Process): """ Jinja2 preprocessor. """ @@ -65,117 +80,137 @@ class ProcessJinja2(Process): lstrip_blocks=True, ) - def __init__(self, *args, **kwargs): - super(ProcessJinja2, self).__init__(*args, **kwargs) - - if len(self.args.argv) > 0: - raise Exception("too many arguments") - - def transform(self, a, b): + def transform(self): template_vars = self.get_template_vars() - assert b.content is not None - assert not isinstance(b.content, bytes) - b.content = self.environment.from_string(b.content).render(**template_vars) - return b + input = self.target.get_file(self.input_spec) + input.content = self.environment.from_string(input.get_str()).render(**template_vars) + + return input def get_template_vars(self) -> dict[str, Any]: """ Generate template variables. - This method returns an empty dict by default, and is meant to be implemented by subclassing the - ProcessJinja2 class. + This method returns an empty dict by default and is meant to be implemented by the user by creating a subclass and registering it through the :ref:`configuration file `. :returns: A dict of variables defined in the template. """ return {} -class ProcessCoccinelle(Process): +class CoccinelleProcess(Process): """ Coccinelle transformer. """ - def __init__(self, *args, **kwargs): - super(ProcessCoccinelle, self).__init__(*args, **kwargs) + def __init__(self, target, data): + if "input" not in data: + self.input_spec = TargetFileInputSpec(path=Path(target.file)) + + if "target" not in data: + self.target_spec = DefaultInputSpec() + + super().__init__(target, data) - if len(self.args.argv) > 0: - raise Exception("too many arguments") + def transform(self): + input = self.target.get_file(self.input_spec) + patch = self.target.get_file(self.target_spec) - def transform(self, a, b): - assert not isinstance(a.content, bytes) - assert not isinstance(b.content, bytes) - content_a = a.content or "" - content_b = b.content or "" + content_input = input.get_str() + content_patch = patch.get_str() - if len(content_b.strip()) == 0: - return a + # empty patch -> return input as-is (coccinelle gives errors in this case) + if len(content_patch.strip()) == 0: + return input - temp_a = Path(mkstemp()[1]) - temp_b = Path(mkstemp()[1]) - temp_sp = Path(mkstemp()[1]) + temp_input = Path(mkstemp()[1]) + temp_output = Path(mkstemp()[1]) + temp_patch = Path(mkstemp()[1]) - temp_a.write_text(content_a) - temp_sp.write_text(content_b) + temp_input.write_text(content_input) + temp_patch.write_text(content_patch) cmd = ( "spatch", "--very-quiet", "--no-show-diff", "--sp-file", - str(temp_sp), - str(temp_a), + str(temp_patch), + str(temp_input), "-o", - str(temp_b), + str(temp_output), ) coccinelle = Popen(cmd) coccinelle.wait() - b.content = temp_b.read_text() + input.content = temp_output.read_text() - temp_a.unlink() - temp_b.unlink() - temp_sp.unlink() + temp_input.unlink() + temp_output.unlink() + temp_patch.unlink() - return b + return input -class ProcessIdentity(Process): +class TouchProcess(Process): """ - Identity transformer. + Touch transformer. """ - def transform(self, a, b): - return File(content=a.content, mode=b.mode) + mode: int | None = None + + def transform(self): + input = self.target.get_file(self.input_spec) + input.content = input.content or "" + input.mode = self.mode or input.mode + return input + + def __init__(self, target, data): + super().__init__(target, data) + if "mode" in data: + if not isinstance(data["mode"], int): + raise TypeError("invalid type of key 'mode'") + self.mode = data["mode"] | S_IFREG + del data["mode"] -class ProcessExec(Process): + +class ExecProcess(Process): """ Executable transformer. """ - def __init__(self, *args, **kwargs): - super(ProcessExec, self).__init__(*args, **kwargs) + cmd: list[str] = [] - if len(self.args.argv) > 0: - raise Exception("too many arguments") + def __init__(self, target, data): + super().__init__(target, data) - def transform(self, a, b): - assert b.content is not None - assert not isinstance(b.content, bytes) + if "cmd" not in data: + raise Exception("missing property `cmd'") + if isinstance(data["cmd"], str): + self.cmd = split(data["cmd"]) + elif isinstance(data["cmd"], list): + self.cmd = data["cmd"] + # TODO: check if each list item is actually a string + else: + raise TypeError("invalid type of key `cmd'") + del data["cmd"] - fd, exec = mkstemp() - with fdopen(fd, "wt") as f: - f.write(b.content) - chmod(exec, 0o700) + def transform(self): + assert len(self.cmd) > 0 - proc = run((str(exec),), text=True, input=a.content, capture_output=True, check=True) - b.content = proc.stdout + input = self.target.get_file(self.input_spec) - unlink(exec) + if input.content is None: + input.content = "" + if isinstance(input.content, str): + input.content = input.content.encode() + proc = run(self.cmd, input=input.content, stdout=PIPE, check=True) + input.content = proc.stdout - return b + return input -class ProcessMerge(Process): +class MergeProcess(Process): """ Merge transformer. """ @@ -186,31 +221,26 @@ class ProcessMerge(Process): add_lines = set(lines_b) - set(lines_a) - b.content = "\n".join((*lines_a, *add_lines)) - - return b + return File(mode=a.mode, content="\n".join((*lines_a, *add_lines))) - strategies: dict[str, Callable[[ProcessMerge, File, File], File]] = { + strategies: dict[str, Callable[[MergeProcess, File, File], File]] = { "ignore": merge_ignore, } - strategy: Callable[[ProcessMerge, File, File], File] - - def __init__(self, *args, **kwargs): - super(ProcessMerge, self).__init__(*args, **kwargs) - - argv = self.args.argv - if len(argv) < 1: - raise Exception("not enough arguments") - - if len(argv) > 1: - raise Exception("too many arguments") + strategy: Callable[[MergeProcess, File, File], File] | None = None - strategy = argv[0] - if strategy not in self.strategies: - raise Exception(f"unknown merge strategy: `{strategy}'") + def __init__(self, target, data): + super().__init__(target, data) - self.strategy = self.strategies[strategy] + if "strategy" not in data: + raise Exception("missing property `strategy'") + if data["strategy"] not in self.strategies: + raise Exception(f"unknown strategy {repr(data['strategy'])}") + self.strategy = self.strategies[data["strategy"]] + del data["strategy"] - def transform(self, a, b): + def transform(self): + a = self.target.get_file(self.input_spec) + b = self.target.get_file(self.target_spec) + assert self.strategy is not None return self.strategy(self, a, b) diff --git a/patchtree/spec.py b/patchtree/spec.py new file mode 100644 index 0000000..6b6cc28 --- /dev/null +++ b/patchtree/spec.py @@ -0,0 +1,38 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class ProcessInputSpec: + """Processor input specification (abstract base)""" + + +@dataclass +class FileInputSpec(ProcessInputSpec): + """Processor input referencing a filename (abstract)""" + + path: Path + + +@dataclass +class TargetFileInputSpec(FileInputSpec): + """Spec to use a file (referenced by name) in the target directory (concrete)""" + + +@dataclass +class PatchsetFileInputSpec(FileInputSpec): + """Spec to use a file (referenced by name) in the patchset directory (concrete)""" + + +@dataclass +class DefaultInputSpec(ProcessInputSpec): + """Spec to use the output of the previous processor as input (concrete)""" + + +@dataclass +class LiteralInputSpec(ProcessInputSpec): + """Spec to use a literal string as input (concrete)""" + + content: str diff --git a/patchtree/target.py b/patchtree/target.py new file mode 100644 index 0000000..f022a61 --- /dev/null +++ b/patchtree/target.py @@ -0,0 +1,84 @@ +from __future__ import annotations +from typing import TYPE_CHECKING, Any, Hashable + +from pathlib import Path + +from .diff import Diff, File +from .spec import ( + DefaultInputSpec, + ProcessInputSpec, + TargetFileInputSpec, +) +from .process import Process + +if TYPE_CHECKING: + from .context import Context + + +class Target: + """A single patched file, including its processors.""" + + context: Context + + file: str + """The name of the patched file in the target.""" + + patch = File() + + processors: list[Process] = [] + + inputs: list[ProcessInputSpec] = [] + + def __init__(self, context: Context, file: Path, data: dict[Hashable, Any] = {}): + self.context = context + self.file = str(file) + + if "processors" not in data: + data["processors"] = [] + if not isinstance(data["processors"], list): + raise Exception("not a list: 'processors'") + self.processors = [] + for processor_spec in data["processors"]: + if "id" not in processor_spec: + raise Exception("missing key 'id'") + id = str(processor_spec["id"]) + del processor_spec["id"] + + config = context.config + if id not in config.processors: + raise Exception(f"no processor for id {id}") + + process_cls = config.processors[id] + process = process_cls(self, processor_spec) + + self.processors.append(process) + + def get_file(self, spec: ProcessInputSpec) -> File: + """Get the file contents from a :type:`ProcessInputSpec` (called by :type:`Processor`).""" + if isinstance(spec, DefaultInputSpec): + return self.patch + return self.context.get_file(spec) + + def write(self) -> str: + """ + Apply all processors, compare to the target and return the delta. + """ + self.context.log.info(f"writing patch for `{self.file}'") + + for i, processor in enumerate(self.processors): + try: + self.patch = processor.transform() + except Exception as e: + self.context.log.error( + f"while running processor {i+1} ({processor.__class__.__name__}) for `{self.file}'" + ) + raise e + + diff = Diff(self.context.config, self.file) + diff.a = self.context.get_file(TargetFileInputSpec(path=Path(self.file))) + diff.b = self.patch + + return diff.compare() + + def __repr__(self): + return f"{self.__class__.__name__}(file={self.file})" diff --git a/pyproject.toml b/pyproject.toml index 53cd20e..7a3914b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta" [project] name = "patchtree" description = "generate clean patches for external source trees" -version = "0.1.1" +version = "1.0.0" authors = [ { name="Loek Le Blansch", email="loek.le-blansch.pv@renesas.com" }, ] @@ -15,10 +15,11 @@ license = "MIT" license-files = ["license"] dependencies = [ "jinja2", + "PyYAML", ] [project.urls] -Homepage = "https://bitbucket.global.renesas.com/users/loek.le-blansch.pv_renesas.com/repos/patchtree" +Homepage = "https://github.com/renesas/patchtree" [project.scripts] patchtree = "patchtree.cli:main" -- cgit v1.2.3