API Reference

The patcher

Main patcher utility

Patcher

Patch processor, applies patches to the target binary

Parameters:

Name Type Description Default
binary Union[Path, str, bytes]

Either a path to the target binary or the raw bytes of the binary

required
cle_opts Optional[Dict[str, bool]]

An optional replacement set of options to pass to cle.Loader

None
Source code in pypatches/patcher.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
class Patcher:
    """Patch processor, applies patches to the target binary

    Args:
        binary: Either a path to the target binary or the raw bytes
            of the binary
        cle_opts: An optional replacement set of options to pass to
            cle.Loader
    """

    def __init__(
        self,
        binary: Union[Path, str, bytes],
        use_angr: bool = True,
        cle_opts: Optional[Dict[str, bool]] = None,
        cfg_opts: Optional[Dict[str, bool]] = None,
    ) -> None:
        """Set up patcher with the target binary"""
        self.binary = BinaryManager(binary, use_angr, cle_opts, cfg_opts)

    def apply(self, patch: PatchType) -> None:
        """Apply a patch to the target binary

        Args:
            patch: The patch to apply, can be any of the patch types
        """

        def camel_to_snake(name: str) -> str:
            """
            Convert a camel case string to snake case
            """
            name = sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
            return sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()

        patch_dispatch_func_name = f"apply_{camel_to_snake(patch.__class__.__name__)}"
        if not hasattr(self, patch_dispatch_func_name):
            raise NotImplementedError(
                f"Patch type {patch.__class__.__name__} is not supported."
            )

        patch_dispatch_func = getattr(self, patch_dispatch_func_name)
        patch_dispatch_func(patch)

    def save(self, path: Union[str, Path]) -> None:
        """
        Save the patched binary to the given path
        """
        if isinstance(path, str):
            path = Path(path)

        self.binary.save(path)

        path.chmod(0o755)

    def apply_nop_patch(self, patch: NopPatch) -> None:
        """
        Apply a nop patch to the target binary
        """
        if self.binary.use_angr:
            nop_len = len(cast(Arch, self.binary.cle_binary.arch).nop_instruction)

            for address_range in patch.address_ranges:
                for address in range(
                    address_range.start,
                    max(address_range.end, address_range.start + nop_len),
                    nop_len,
                ):
                    self.binary.write(
                        address, cast(Arch, self.binary.cle_binary.arch).nop_instruction
                    )
        else:
            for address_range in patch.address_ranges:
                self.binary.write(address_range.start, b"\x90" * address_range.length)

    def apply_invert_branch_patch(self, patch: InvertBranchPatch) -> None:
        """
        Apply a branch patch to the target binary
        """
        raise NotImplementedError("Branch patches are not supported.")

    def apply_always_branch_patch(self, patch: AlwaysBranchPatch) -> None:
        """
        Apply a branch patch to the target binary
        """

    def apply_never_branch_patch(self, patch: NeverBranchPatch) -> None:
        """
        Apply a branch patch to the target binary
        """
        raise NotImplementedError("Branch patches are not supported.")

    def apply_skip_and_return_patch(self, patch: SkipAndReturnPatch) -> None:
        """
        Apply a branch patch to the target binary
        """
        raise NotImplementedError("Branch patches are not supported.")

    def apply_function_replace_patch(self, patch: FunctionReplacePatch) -> None:
        """
        Apply a function replace patch to the target binary
        """
        raise NotImplementedError("Function replace patches are not supported.")

    def apply_caller_replace_patch(self, patch: CallerReplacePatch) -> None:
        """
        Apply a caller replace patch to the target binary
        """
        raise NotImplementedError("Caller replace patches are not supported.")

    def apply_init_patch(self, patch: InitPatch) -> None:
        """
        Apply an init patch to the target binary
        """
        raise NotImplementedError("Init patches are not supported.")

    def apply_fini_patch(self, patch: FiniPatch) -> None:
        """
        Apply a fini patch to the target binary
        """
        raise NotImplementedError("Fini patches are not supported.")

    def apply_data_patch(self, patch: DataPatch) -> None:
        """
        Apply a data patch to the target binary
        """
        self.binary.add_data(patch.data, patch.label)

    def apply_add_code_patch(self, patch: AddCodePatch) -> None:
        """
        Apply an add code patch to the target binary
        """
        self.binary.add_code(patch.code, patch.label)

    def apply_replace_code_patch(self, patch: ReplaceCodePatch) -> None:
        """
        Apply a replace code patch to the target binary
        """
        self.binary.write(patch.address, patch.code)

__init__(binary, use_angr=True, cle_opts=None, cfg_opts=None)

Set up patcher with the target binary

Source code in pypatches/patcher.py
39
40
41
42
43
44
45
46
47
def __init__(
    self,
    binary: Union[Path, str, bytes],
    use_angr: bool = True,
    cle_opts: Optional[Dict[str, bool]] = None,
    cfg_opts: Optional[Dict[str, bool]] = None,
) -> None:
    """Set up patcher with the target binary"""
    self.binary = BinaryManager(binary, use_angr, cle_opts, cfg_opts)

apply(patch)

Apply a patch to the target binary

Parameters:

Name Type Description Default
patch PatchType

The patch to apply, can be any of the patch types

required
Source code in pypatches/patcher.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def apply(self, patch: PatchType) -> None:
    """Apply a patch to the target binary

    Args:
        patch: The patch to apply, can be any of the patch types
    """

    def camel_to_snake(name: str) -> str:
        """
        Convert a camel case string to snake case
        """
        name = sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
        return sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()

    patch_dispatch_func_name = f"apply_{camel_to_snake(patch.__class__.__name__)}"
    if not hasattr(self, patch_dispatch_func_name):
        raise NotImplementedError(
            f"Patch type {patch.__class__.__name__} is not supported."
        )

    patch_dispatch_func = getattr(self, patch_dispatch_func_name)
    patch_dispatch_func(patch)

apply_add_code_patch(patch)

Apply an add code patch to the target binary

Source code in pypatches/patcher.py
156
157
158
159
160
def apply_add_code_patch(self, patch: AddCodePatch) -> None:
    """
    Apply an add code patch to the target binary
    """
    self.binary.add_code(patch.code, patch.label)

apply_always_branch_patch(patch)

Apply a branch patch to the target binary

Source code in pypatches/patcher.py
109
110
111
112
def apply_always_branch_patch(self, patch: AlwaysBranchPatch) -> None:
    """
    Apply a branch patch to the target binary
    """

apply_caller_replace_patch(patch)

Apply a caller replace patch to the target binary

Source code in pypatches/patcher.py
132
133
134
135
136
def apply_caller_replace_patch(self, patch: CallerReplacePatch) -> None:
    """
    Apply a caller replace patch to the target binary
    """
    raise NotImplementedError("Caller replace patches are not supported.")

apply_data_patch(patch)

Apply a data patch to the target binary

Source code in pypatches/patcher.py
150
151
152
153
154
def apply_data_patch(self, patch: DataPatch) -> None:
    """
    Apply a data patch to the target binary
    """
    self.binary.add_data(patch.data, patch.label)

apply_fini_patch(patch)

Apply a fini patch to the target binary

Source code in pypatches/patcher.py
144
145
146
147
148
def apply_fini_patch(self, patch: FiniPatch) -> None:
    """
    Apply a fini patch to the target binary
    """
    raise NotImplementedError("Fini patches are not supported.")

apply_function_replace_patch(patch)

Apply a function replace patch to the target binary

Source code in pypatches/patcher.py
126
127
128
129
130
def apply_function_replace_patch(self, patch: FunctionReplacePatch) -> None:
    """
    Apply a function replace patch to the target binary
    """
    raise NotImplementedError("Function replace patches are not supported.")

apply_init_patch(patch)

Apply an init patch to the target binary

Source code in pypatches/patcher.py
138
139
140
141
142
def apply_init_patch(self, patch: InitPatch) -> None:
    """
    Apply an init patch to the target binary
    """
    raise NotImplementedError("Init patches are not supported.")

apply_invert_branch_patch(patch)

Apply a branch patch to the target binary

Source code in pypatches/patcher.py
103
104
105
106
107
def apply_invert_branch_patch(self, patch: InvertBranchPatch) -> None:
    """
    Apply a branch patch to the target binary
    """
    raise NotImplementedError("Branch patches are not supported.")

apply_never_branch_patch(patch)

Apply a branch patch to the target binary

Source code in pypatches/patcher.py
114
115
116
117
118
def apply_never_branch_patch(self, patch: NeverBranchPatch) -> None:
    """
    Apply a branch patch to the target binary
    """
    raise NotImplementedError("Branch patches are not supported.")

apply_nop_patch(patch)

Apply a nop patch to the target binary

Source code in pypatches/patcher.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def apply_nop_patch(self, patch: NopPatch) -> None:
    """
    Apply a nop patch to the target binary
    """
    if self.binary.use_angr:
        nop_len = len(cast(Arch, self.binary.cle_binary.arch).nop_instruction)

        for address_range in patch.address_ranges:
            for address in range(
                address_range.start,
                max(address_range.end, address_range.start + nop_len),
                nop_len,
            ):
                self.binary.write(
                    address, cast(Arch, self.binary.cle_binary.arch).nop_instruction
                )
    else:
        for address_range in patch.address_ranges:
            self.binary.write(address_range.start, b"\x90" * address_range.length)

apply_replace_code_patch(patch)

Apply a replace code patch to the target binary

Source code in pypatches/patcher.py
162
163
164
165
166
def apply_replace_code_patch(self, patch: ReplaceCodePatch) -> None:
    """
    Apply a replace code patch to the target binary
    """
    self.binary.write(patch.address, patch.code)

apply_skip_and_return_patch(patch)

Apply a branch patch to the target binary

Source code in pypatches/patcher.py
120
121
122
123
124
def apply_skip_and_return_patch(self, patch: SkipAndReturnPatch) -> None:
    """
    Apply a branch patch to the target binary
    """
    raise NotImplementedError("Branch patches are not supported.")

save(path)

Save the patched binary to the given path

Source code in pypatches/patcher.py
72
73
74
75
76
77
78
79
80
81
def save(self, path: Union[str, Path]) -> None:
    """
    Save the patched binary to the given path
    """
    if isinstance(path, str):
        path = Path(path)

    self.binary.save(path)

    path.chmod(0o755)

Patch Types

Patch descriptors for various types of patches

Patches require various parameters to specify how to apply them, but some are common.

label, if available, allows a label to be specified. In any later patch, that label can be used to reference the location of that patch (this is especially useful for data patches)

AddCodePatch dataclass

Bases: CodePatch

Patch that adds some code to the binary at some labeled location

Attributes:

Name Type Description
label Optional[str]

The label to add the code at

Source code in pypatches/patches.py
187
188
189
190
191
192
193
194
195
@dataclass
class AddCodePatch(CodePatch):
    """Patch that adds some code to the binary at some labeled location

    Attributes:
        label: The label to add the code at
    """

    label: Optional[str] = None

AlwaysBranchPatch dataclass

Bases: BranchPatch

Patch that converts a conditional branch into an unconditional branch that always takes the "true" branch

Source code in pypatches/patches.py
63
64
65
66
67
@dataclass
class AlwaysBranchPatch(BranchPatch):
    """Patch that converts a conditional branch into an unconditional branch
    that always takes the "true" branch
    """

BranchPatch dataclass

Base patch that modifies a conditional branch at a particular address

Attributes:

Name Type Description
address int

The address of the branch to modify

Source code in pypatches/patches.py
47
48
49
50
51
52
53
54
55
@dataclass
class BranchPatch:
    """Base patch that modifies a conditional branch at a particular address

    Attributes:
        address: The address of the branch to modify
    """

    address: int

CallerReplacePatch dataclass

Patch that will redirect all or some callers of a function elsewhere

Attributes:

Name Type Description
new_code Code

The new code to replace the function with

function_address Optional[int]

The name of the function to replace

callers Set[int]

The set of callers to redirect, optional

Source code in pypatches/patches.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
@dataclass
class CallerReplacePatch:
    """Patch that will redirect all or some callers of a function elsewhere

    Attributes:
        new_code: The new code to replace the function with
        function_address: The name of the function to replace
        callers: The set of callers to redirect, optional

    """

    new_code: Code
    function_address: Optional[int] = None
    callers: Set[int] = field(default_factory=set)

    def __post_init__(self) -> None:
        """
        Check that we either got a function address or callers, or both
        """
        if self.function_address is None and not self.callers:
            raise ValueError(
                "A function address and/or set of callers must be provided."
            )

__post_init__()

Check that we either got a function address or callers, or both

Source code in pypatches/patches.py
120
121
122
123
124
125
126
127
def __post_init__(self) -> None:
    """
    Check that we either got a function address or callers, or both
    """
    if self.function_address is None and not self.callers:
        raise ValueError(
            "A function address and/or set of callers must be provided."
        )

CodePatch dataclass

Base patch that holds some code

Attributes:

Name Type Description
code Code

The code to add

Source code in pypatches/patches.py
175
176
177
178
179
180
181
182
183
184
@dataclass
class CodePatch:
    """Base patch that holds some code

    Attributes:
        code: The code to add

    """

    code: Code

DataPatch dataclass

Patch that adds some data with some protections

Attributes:

Name Type Description
data bytes

The data to add

label Optional[str]

The label to give the data, optional

read bool

Whether the data should be readable, defaults to True

write bool

Whether the data should be writable, defaults to False

execute bool

Whether the data should be executable, defaults to False

Source code in pypatches/patches.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
@dataclass
class DataPatch:
    """Patch that adds some data with some protections

    Attributes:
        data: The data to add
        label: The label to give the data, optional
        read: Whether the data should be readable, defaults to True
        write: Whether the data should be writable, defaults to False
        execute: Whether the data should be executable, defaults to False
    """

    data: bytes
    read: bool = True
    write: bool = False
    exec: bool = False
    label: Optional[str] = None

FiniPatch dataclass

Patch that will run some code upon exit from the program

Attributes:

Name Type Description
code Code

The code to run

priority int

The priority of the code to run, optional

Source code in pypatches/patches.py
143
144
145
146
147
148
149
150
151
152
153
@dataclass
class FiniPatch:
    """Patch that will run some code upon exit from the program

    Attributes:
        code: The code to run
        priority: The priority of the code to run, optional
    """

    code: Code
    priority: int = 0

FunctionReplacePatch dataclass

Patch that will replace a function's contents with new code

Attributes:

Name Type Description
function_address int

The name of the function to replace

new_code Code

The new code to replace the function with

Source code in pypatches/patches.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
@dataclass
class FunctionReplacePatch:
    """Patch that will replace a function's contents with new code

    Attributes:
        function_address: The name of the function to replace
        new_code: The new code to replace the function with
    """

    function_address: int
    new_code: Code

InitPatch dataclass

Patch that will run some code immediately on entry to the program

Attributes:

Name Type Description
code Code

The code to run

priority int

The priority of the code to run, optional

Source code in pypatches/patches.py
130
131
132
133
134
135
136
137
138
139
140
@dataclass
class InitPatch:
    """Patch that will run some code immediately on entry to the program

    Attributes:
        code: The code to run
        priority: The priority of the code to run, optional
    """

    code: Code
    priority: int = 0

InvertBranchPatch dataclass

Bases: BranchPatch

Patch that inverts the true/false branch targets of a conditional branch

Source code in pypatches/patches.py
58
59
60
@dataclass
class InvertBranchPatch(BranchPatch):
    """Patch that inverts the true/false branch targets of a conditional branch"""

NeverBranchPatch dataclass

Bases: BranchPatch

Patch that converts a conditional branch into an unconditional branch that always takes the "false" branch

Source code in pypatches/patches.py
70
71
72
73
74
@dataclass
class NeverBranchPatch(BranchPatch):
    """Patch that converts a conditional branch into an unconditional branch
    that always takes the "false" branch
    """

NopPatch dataclass

Patch that converts specific addresses or ranges of addresses to no-operations

Attributes:

Name Type Description
address_ranges Set[AddressRange]

The address ranges to convert to no-operations

Parameters:

Name Type Description Default
addresses InitVar[Optional[List[int]]]

Individual addresses to convert to no-operations, optional.

None
address_ranges Set[AddressRange]

The address ranges to convert to no-operations, optional.

field(default_factory=set)
Source code in pypatches/patches.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
@dataclass
class NopPatch:
    """Patch that converts specific addresses or ranges of addresses to no-operations

    Attributes:
        address_ranges: The address ranges to convert to no-operations

    Args:
        addresses: Individual addresses to convert to no-operations, optional.
        address_ranges: The address ranges to convert to no-operations, optional.

    """

    addresses: InitVar[Optional[List[int]]] = None
    address_ranges: Set[AddressRange] = field(default_factory=set)

    def __post_init__(self, addresses: Optional[List[int]]) -> None:
        """Convert optionally provided addresses to address ranges

        Args:
            addresses: Addresses that can be provided in lieu of address ranges
        """
        if addresses is None:
            return

        for address in addresses:
            self.address_ranges.add(AddressRange(start=address, end=address))

__post_init__(addresses)

Convert optionally provided addresses to address ranges

Parameters:

Name Type Description Default
addresses Optional[List[int]]

Addresses that can be provided in lieu of address ranges

required
Source code in pypatches/patches.py
34
35
36
37
38
39
40
41
42
43
44
def __post_init__(self, addresses: Optional[List[int]]) -> None:
    """Convert optionally provided addresses to address ranges

    Args:
        addresses: Addresses that can be provided in lieu of address ranges
    """
    if addresses is None:
        return

    for address in addresses:
        self.address_ranges.add(AddressRange(start=address, end=address))

ReplaceCodePatch dataclass

Bases: CodePatch

Patch that replaces some code with some other code

Attributes:

Name Type Description
address Union[str, Callable[[TransformInfo], int], int, List[int], Callable[[TransformInfo], List[int]]]

The address to replace the code at, optional. This "address" can either be a label, an actual address, a list of addresses, a function that takes a [TransformInfo][] and returns an address, or a function that takes a [TransformInfo][] and returns a list of addresses.

Source code in pypatches/patches.py
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
@dataclass
class ReplaceCodePatch(CodePatch):
    """Patch that replaces some code with some other code

    Attributes:
        address: The address to replace the code at, optional. This "address" can either
            be a label, an actual address, a list of addresses, a function that takes a
            [TransformInfo][TransformInfo] and returns an address, or a function that
            takes a [TransformInfo][TransformInfo] and returns a list of addresses.

    """

    address: Union[
        str,
        Callable[[TransformInfo], int],
        int,
        List[int],
        Callable[[TransformInfo], List[int]],
    ]

RuntimeResolverPatch dataclass

Bases: CodePatch

Patch that adds a runtime resolver to the binary

Source code in pypatches/patches.py
219
220
221
@dataclass
class RuntimeResolverPatch(CodePatch):
    """Patch that adds a runtime resolver to the binary"""

SkipAndReturnPatch dataclass

Patch that skips a call to a subroutine at a particular address and fakes a return value as if the called function had returned it using the default calling convention

Attributes:

Name Type Description
address

The address of the call to skip

return_value int

The value to return from the call

Source code in pypatches/patches.py
77
78
79
80
81
82
83
84
85
86
87
88
89
@dataclass
class SkipAndReturnPatch:
    """Patch that skips a call to a subroutine at a particular address and
    fakes a return value as if the called function had returned it using
    the default calling convention

    Attributes:
        address: The address of the call to skip
        return_value: The value to return from the call
    """

    caller_address: int
    return_value: int

Binary Manager

Binary program wrapper providing modification and analysis via LIEF and angr

BinaryManager

Wrapper for a binary program to provide information for patching

Attributes:

Name Type Description
path Optional[Path]

The path to the binary, if it exists on disk

blob BytesIO

The binary blob of the binary

lief_binary LIEFBinary

The LIEF binary object, which provides the majority of the program information for modification

angr_project Project

The angr project, which provides the CFG and other information for analysis and deep inspection

cle_binary Backend

The CLE binary object, which provides angr's loader information

cle_opts Dict[str, bool]

The kwargs passed to cle.Loader, which can be used to override the default loader options

cfg_opts Dict[str, bool]

The kwargs passed to angr.Project.analyses.CFGFast, which can be used to override the default CFG options

writes List[WriteOperation]

A list of WriteOperations that are queued for application to the binary

code_to_add Dict[str, Code]

A dictionary of Code objects to add to the binary, keyed by the label they are associated with

alignment

The alignment to use when aligning addresses

data_to_add Dict[str, bytes]

A dictionary of data to add to the binary, keyed by the label the data is associated with

Parameters:

Name Type Description Default
binary Union[Path, str, bytes]

If binary is a Path or str we will attempt to load it from that path on disk. If it is bytes, we will load the bytes as a binary blob.

required
cle_opts Optional[Dict[str, bool]]

If provided, will override the kwargs passed to cle.Loader, defaults to sane options.

None
cfg_opts Optional[Dict[str, bool]]

If provided, will override the kwargs passed to angr.Project.analyses.CFGFast, defaults to sane options.

None
silence_angr_logs bool

If True, will silence angr's logging output, defaults to False

False
alignment int

The alignment to use when aligning addresses, defaults to 8, but will be overridden by lief's alignment if it discovers a different alignment when loading the binary

8
Source code in pypatches/binary_manager.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
class BinaryManager:
    """Wrapper for a binary program to provide information for patching

    Attributes:
        path: The path to the binary, if it exists on disk
        blob: The binary blob of the binary
        lief_binary: The LIEF binary object, which provides the majority of the program
            information for modification
        angr_project: The angr project, which provides the CFG and other information
            for analysis and deep inspection
        cle_binary: The CLE binary object, which provides angr's loader information
        cle_opts: The kwargs passed to `cle.Loader`, which can be used to override
            the default loader options
        cfg_opts: The kwargs passed to `angr.Project.analyses.CFGFast`, which can be
            used to override the default CFG options
        writes: A list of `WriteOperation`s that are queued for application to the
            binary
        code_to_add: A dictionary of `Code` objects to add to the binary, keyed by
            the label they are associated with
        alignment: The alignment to use when aligning addresses
        data_to_add: A dictionary of data to add to the binary, keyed by the label
            the data is associated with

    Args:
        binary: If `binary` is a `Path` or `str` we will attempt to load it from that
            path on disk. If it is `bytes`, we will load the bytes as a binary blob.
        cle_opts: If provided, will override the kwargs passed to `cle.Loader`, defaults
            to sane options.
        cfg_opts: If provided, will override the kwargs passed to
            `angr.Project.analyses.CFGFast`, defaults to sane options.
        silence_angr_logs: If `True`, will silence angr's logging output, defaults to
            `False`
        alignment: The alignment to use when aligning addresses, defaults to 8, but will
            be overridden by lief's alignment if it discovers a different alignment when
            loading the binary

    """

    path: Optional[Path] = None
    blob: BytesIO
    lief_binary: LIEFBinary
    angr_project: Project
    cle_binary: Backend
    cle_opts: Dict[str, bool] = {
        "auto_load_libs": False,
        "use_system_libs": False,
    }
    cfg_opts: Dict[str, bool] = {
        "normalize": True,
        "data_references": False,
        "cross_references": False,
        "skip_unmapped_addrs": True,
        "force_complete_scan": False,
    }
    writes: List[WriteOperation] = []
    code_to_add: Dict[str, Code] = {}
    alignment: int = 0x1000
    data_to_add: Dict[str, bytes] = {}

    def __init__(
        self,
        binary: Union[Path, str, bytes],
        use_angr: bool = True,
        cle_opts: Optional[Dict[str, bool]] = None,
        cfg_opts: Optional[Dict[str, bool]] = None,
        silence_angr_logs: bool = False,
        alignment: int = 8,
    ) -> None:
        """Initialize the binary wrapper via one of several methods."""

        self.use_angr = use_angr

        if isinstance(binary, Path):
            self.path = binary
        elif isinstance(binary, str):
            self.path = Path(binary)
        elif isinstance(binary, bytes):
            self.path = None
        else:
            raise TypeError(
                f"Requested binary is of type {type(binary)}"
                ", expected Path, str, or bytes."
            )

        if self.path is not None and not self.path.is_file():
            raise FileNotFoundError(
                f"Requested binary {self.path} was not found or could not be opened."
            )

        if cle_opts is not None:
            self.cle_opts = cle_opts

        if cfg_opts is not None:
            self.cfg_opts = cfg_opts

        if silence_angr_logs:
            for logger_name in ("angr", "pyvex", "cle", "archinfo", "claripy"):
                getLogger(logger_name).setLevel("ERROR")

        self.alignment = alignment

        if self.path is None:
            self.blob = BytesIO(cast(bytes, binary))
        else:
            self.blob = BytesIO(self.path.read_bytes())

        self.load_lief_binary()

        self.load_angr_project()

    def reload_blob_from_lief(self) -> None:
        """Reload the blob from the current LIEF binary"""
        tempfile = NamedTemporaryFile(delete=False)
        temppath = Path(tempfile.name)

        try:
            tempfile.close()
            self.lief_binary.write(str(temppath.resolve()))
            self.blob = BytesIO(temppath.read_bytes())
        except Exception as e:
            raise e
        finally:
            temppath.unlink(missing_ok=True)

        self.load_lief_binary()

        self.load_angr_project()

    def load_lief_binary(self) -> None:
        """Load the binary into LIEF"""
        self.lief_binary = parse(self.blob.getbuffer())

    def load_angr_project(self) -> None:
        """Load the angr project from the binary blob"""

        if not self.use_angr:
            self.angr_project = None
            self.cle_binary = None
            return

        if self.proj_opts is None:
            self.proj_opts = {
                "main_opts": {"base_addr": self.lief_binary.imagebase},
                "load_options": self.cle_opts,
            }
        self.angr_project = Project(self.blob, **self.proj_opts)
        self.angr_project.analyses.CFGFast(  # type: ignore
            **self.cfg_opts,
        )

        if self.angr_project.loader.main_object is None:
            raise FileNotFoundError(
                f"Requested binary {self.path} " "was not found or could not be opened."
            )

        self.cle_binary = self.angr_project.loader.main_object

    def align(
        self,
        address: int,
        alignment: Optional[int] = None,
    ) -> int:
        """Align an address to the specified alignment. If no alignment is
        provided, the default alignment will be used.

        Args:
            address: The address to align
            alignment: The alignment to use, defaults to the default alignment of the
                binary
        """
        if alignment is None:
            alignment = self.alignment

        address = (address + (alignment - 1)) & ~(alignment - 1)

        return address

    def write(
        self,
        where: Union[
            str,
            Callable[[TransformInfo], int],
            int,
            List[int],
            Callable[[TransformInfo], List[int]],
        ],
        data: Union[bytes, Callable[[Dict[str, int]], bytes], Code],
    ) -> None:
        """Write data to the binary at the given virtual address

        Args:

            where: Either an address or a label that will resolve to an address, or
                a function that takes the transform info after segment modification and
                returns an address
            data: The bytes or a function that takes a dictionary of labels
                and addresses and returns bytes to write to the patch
        """

        operation = WriteOperation(data, where)
        logger.info(f"Queuing write operation: {operation}")
        self.writes.append(operation)

    def read(self, vaddr: int, size: int) -> bytes:
        """Read data from the binary at the given virtual address

        Args:
            vaddr: The virtual address to read from
            size: The number of bytes to read
        """
        try:
            if self.use_angr:
                offset = next(
                    filter(lambda s: s.contains_addr(vaddr), self.cle_binary.sections)
                ).addr_to_offset(vaddr)
            else:
                offset = self.lief_binary.virtual_address_to_offset(vaddr)

        except StopIteration as e:
            logger.error(f"Could not find section for address {vaddr}")
            raise NoSectionError(f"Could not find section for address {vaddr}") from e

        self.blob.seek(offset)
        return self.blob.read(size)

    def add_code(self, code: Code, label: Optional[str] = None) -> None:
        """Mark some code as being added on next save

        Args:
            code: The binary code being added
            label: The label to associate with the code for resolution of
                other patches referencing it
        """
        if label is None:
            label = f"code_{len(self.code_to_add)}"

        logger.info(f"Queueing code addition at label {label}: {code}")

        code.label = label

        self.code_to_add[label] = code

    def add_data(self, data: bytes, label: Optional[str] = None) -> None:
        """Add some data to the binary

        Args:
            data: The data to add
            label: The label to associate with the data location
        """
        if label is None:
            label = f"data_{len(self.data_to_add)}"

        logger.info(f"Queueing data addition at label {label}: ({len(data)} bytes)")

        self.data_to_add[label] = data

    def apply(self) -> None:
        """Apply patches to the binary"""
        # Offsets is filled in twice:
        # - first time fills in offset in the new section
        # - second time fills in offset from base address
        transform_info = TransformInfo(self.lief_binary, self.angr_project)

        transform_info.code_size = 0
        transform_info.data_size = 0
        transform_info.all_data = b""
        transform_info.data_offsets = {}
        transform_info.code_offsets = {}

        # Figure out how much space we need for code
        # Each code patch is dummy compiled and placed in a subsection that is aligned
        # to the alignment of the binary.
        for label, code in self.code_to_add.items():
            code.reset()
            code.dummy()
            compiled = code.compile(
                "dummy",
            )

            # TODO: There might be a smarter way but we don't really prioritize size
            aligned_size = self.align(len(compiled), self.alignment) * 2

            transform_info.code_offsets[label] = transform_info.code_size
            transform_info.code_size += aligned_size

        for label, data in self.data_to_add.items():
            transform_info.data_offsets[label] = len(data)
            transform_info.data_size += len(data)
            transform_info.all_data += data

        # Round up code and data sizes to the next multiple of self.alignment
        transform_info.code_size = (
            transform_info.code_size + (self.alignment - 1)
        ) & ~(self.alignment - 1)

        transform_info.data_size = (
            transform_info.data_size + (self.alignment - 1)
        ) & ~(self.alignment - 1)

        # Create new sections

        if transform_info.data_size > 0:
            new_data_segment = Segment()
            new_data_segment.content = list(transform_info.all_data)
            new_data_segment.type = SEGMENT_TYPES.LOAD
            new_data_segment.alignment = self.alignment
            new_data_segment.flags = SEGMENT_FLAGS(SEGMENT_FLAGS.R | SEGMENT_FLAGS.W)
            new_data_segment = self.lief_binary.add(new_data_segment)

            transform_info.data_base = new_data_segment.virtual_address

            # Fix up offsets to they point to the actual address
            for label, addr in transform_info.data_offsets.items():
                transform_info.data_offsets[label] = (
                    cast(int, transform_info.data_base) + addr
                )

        if transform_info.code_size > 0:
            new_code_segment = Segment()
            new_code_segment.content = list(b"\x00" * transform_info.code_size)
            new_code_segment.type = SEGMENT_TYPES.LOAD
            new_code_segment.alignment = self.alignment
            new_code_segment.flags = SEGMENT_FLAGS(SEGMENT_FLAGS.X | SEGMENT_FLAGS.R)
            new_code_segment = self.lief_binary.add(new_code_segment)

            transform_info.code_base = new_code_segment.virtual_address

            # Fix up offsets to they point to the actual address
            for label, addr in transform_info.code_offsets.items():
                transform_info.code_offsets[label] = (
                    cast(int, transform_info.code_base) + addr
                )

            # Queue writes for code
            for label, code in self.code_to_add.items():
                self.writes.append(WriteOperation(code, label))

        # Reload the angr project to get the new sections
        self.reload_blob_from_lief()
        transform_info.lief_binary = self.lief_binary
        transform_info.angr_project = self.angr_project

        for write in self.writes:
            offsets = []

            if isinstance(write.where, str):
                offsets.append(
                    transform_info.data_offsets.get(
                        write.where, transform_info.code_offsets.get(write.where, None)
                    )
                )

            elif isinstance(write.where, int):
                offsets.append(write.where)

            elif isinstance(write.where, list):
                offsets.extend(write.where)
            else:
                new_offsets = write.where(transform_info)

                if not isinstance(offsets, list):
                    offsets.append(new_offsets)
                else:
                    offsets.extend(new_offsets)

            for offset in offsets:
                logger.debug(f"Writing {write.data} to {offset:#0x}")
                transform_info.current_offset = offset

                if isinstance(write.data, bytes):
                    data = write.data

                elif isinstance(write.data, Code):

                    write.data.reset()
                    write.data.build(transform_info)
                    data = write.data.compile(
                        cast(str, write.data.label), transform_info
                    )

                    if self.use_angr:
                        disassembly = self.angr_project.arch.disasm(data, offset)
                    else:
                        logger.debug(f"Disassembly for {data} @ {offset}")
                        disassembler = Cs(CS_ARCH_X86, CS_MODE_64)
                        disassembler.detail = True
                        disassembly = "\n".join(
                            map(
                                lambda i: f"{i.address:#x}: {i.mnemonic} {i.op_str}",
                                disassembler.disasm(data, offset),
                            )
                        )

                    logger.debug(f"Disassembly of data for label {write.data.label}:")

                    for disas_line in disassembly.splitlines():
                        logger.debug(f"  {disas_line}")
                else:
                    data = write.data(transform_info.code_offsets)

                logger.info(f"Writing {len(data)} bytes to {offset:#0x}")
                self.lief_binary.patch_address(offset, list(data))

    def save(self, where: Path) -> None:
        """Apply any pending operations and save the binary to a file

        Args:
            where: The path to the destination to save the binary
        """
        logger.info("Applying pending operations")
        self.apply()

        logger.info(f"Saving binary to {where}")
        self.lief_binary.write(str(where.resolve()))

    def asm(self, asm: str, vaddr: int) -> bytes:
        """Assemble the given assembly code at the given virtual address

        Args:
            asm: The assembly code to assemble
            vaddr: The virtual address to assemble at
        """
        if self.use_angr:
            return self.cle_binary.arch.asm(asm, vaddr, as_bytes=True)  # type: ignore
        else:
            assembler = Ks(KS_ARCH_X86, KS_MODE_64)
            return assembler.asm(asm, vaddr, as_bytes=True)  # type: ignore

    def disasm(self, vaddr: int) -> CsInsn:
        """Disassemble one instruction from the binary at an address

        Args:
            vaddr: The virtual address to disassemble at
        """

        if self.use_angr:
            block = (
                self.binary.angr_project.kb.cfgs["CFGFast"]
                .get_any_node(vaddr, anyaddr=True)
                .block
            )

            for instr in block.capstone.insns:
                if instr.address == vaddr:
                    return instr

            raise ValueError(f"Could not find instruction at {vaddr:#0x}")
        else:
            logger.warning("Disassembly is not supported without angr")

__init__(binary, use_angr=True, cle_opts=None, cfg_opts=None, silence_angr_logs=False, alignment=8)

Initialize the binary wrapper via one of several methods.

Source code in pypatches/binary_manager.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def __init__(
    self,
    binary: Union[Path, str, bytes],
    use_angr: bool = True,
    cle_opts: Optional[Dict[str, bool]] = None,
    cfg_opts: Optional[Dict[str, bool]] = None,
    silence_angr_logs: bool = False,
    alignment: int = 8,
) -> None:
    """Initialize the binary wrapper via one of several methods."""

    self.use_angr = use_angr

    if isinstance(binary, Path):
        self.path = binary
    elif isinstance(binary, str):
        self.path = Path(binary)
    elif isinstance(binary, bytes):
        self.path = None
    else:
        raise TypeError(
            f"Requested binary is of type {type(binary)}"
            ", expected Path, str, or bytes."
        )

    if self.path is not None and not self.path.is_file():
        raise FileNotFoundError(
            f"Requested binary {self.path} was not found or could not be opened."
        )

    if cle_opts is not None:
        self.cle_opts = cle_opts

    if cfg_opts is not None:
        self.cfg_opts = cfg_opts

    if silence_angr_logs:
        for logger_name in ("angr", "pyvex", "cle", "archinfo", "claripy"):
            getLogger(logger_name).setLevel("ERROR")

    self.alignment = alignment

    if self.path is None:
        self.blob = BytesIO(cast(bytes, binary))
    else:
        self.blob = BytesIO(self.path.read_bytes())

    self.load_lief_binary()

    self.load_angr_project()

add_code(code, label=None)

Mark some code as being added on next save

Parameters:

Name Type Description Default
code Code

The binary code being added

required
label Optional[str]

The label to associate with the code for resolution of other patches referencing it

None
Source code in pypatches/binary_manager.py
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
def add_code(self, code: Code, label: Optional[str] = None) -> None:
    """Mark some code as being added on next save

    Args:
        code: The binary code being added
        label: The label to associate with the code for resolution of
            other patches referencing it
    """
    if label is None:
        label = f"code_{len(self.code_to_add)}"

    logger.info(f"Queueing code addition at label {label}: {code}")

    code.label = label

    self.code_to_add[label] = code

add_data(data, label=None)

Add some data to the binary

Parameters:

Name Type Description Default
data bytes

The data to add

required
label Optional[str]

The label to associate with the data location

None
Source code in pypatches/binary_manager.py
275
276
277
278
279
280
281
282
283
284
285
286
287
def add_data(self, data: bytes, label: Optional[str] = None) -> None:
    """Add some data to the binary

    Args:
        data: The data to add
        label: The label to associate with the data location
    """
    if label is None:
        label = f"data_{len(self.data_to_add)}"

    logger.info(f"Queueing data addition at label {label}: ({len(data)} bytes)")

    self.data_to_add[label] = data

align(address, alignment=None)

Align an address to the specified alignment. If no alignment is provided, the default alignment will be used.

Parameters:

Name Type Description Default
address int

The address to align

required
alignment Optional[int]

The alignment to use, defaults to the default alignment of the binary

None
Source code in pypatches/binary_manager.py
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
def align(
    self,
    address: int,
    alignment: Optional[int] = None,
) -> int:
    """Align an address to the specified alignment. If no alignment is
    provided, the default alignment will be used.

    Args:
        address: The address to align
        alignment: The alignment to use, defaults to the default alignment of the
            binary
    """
    if alignment is None:
        alignment = self.alignment

    address = (address + (alignment - 1)) & ~(alignment - 1)

    return address

apply()

Apply patches to the binary

Source code in pypatches/binary_manager.py
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
def apply(self) -> None:
    """Apply patches to the binary"""
    # Offsets is filled in twice:
    # - first time fills in offset in the new section
    # - second time fills in offset from base address
    transform_info = TransformInfo(self.lief_binary, self.angr_project)

    transform_info.code_size = 0
    transform_info.data_size = 0
    transform_info.all_data = b""
    transform_info.data_offsets = {}
    transform_info.code_offsets = {}

    # Figure out how much space we need for code
    # Each code patch is dummy compiled and placed in a subsection that is aligned
    # to the alignment of the binary.
    for label, code in self.code_to_add.items():
        code.reset()
        code.dummy()
        compiled = code.compile(
            "dummy",
        )

        # TODO: There might be a smarter way but we don't really prioritize size
        aligned_size = self.align(len(compiled), self.alignment) * 2

        transform_info.code_offsets[label] = transform_info.code_size
        transform_info.code_size += aligned_size

    for label, data in self.data_to_add.items():
        transform_info.data_offsets[label] = len(data)
        transform_info.data_size += len(data)
        transform_info.all_data += data

    # Round up code and data sizes to the next multiple of self.alignment
    transform_info.code_size = (
        transform_info.code_size + (self.alignment - 1)
    ) & ~(self.alignment - 1)

    transform_info.data_size = (
        transform_info.data_size + (self.alignment - 1)
    ) & ~(self.alignment - 1)

    # Create new sections

    if transform_info.data_size > 0:
        new_data_segment = Segment()
        new_data_segment.content = list(transform_info.all_data)
        new_data_segment.type = SEGMENT_TYPES.LOAD
        new_data_segment.alignment = self.alignment
        new_data_segment.flags = SEGMENT_FLAGS(SEGMENT_FLAGS.R | SEGMENT_FLAGS.W)
        new_data_segment = self.lief_binary.add(new_data_segment)

        transform_info.data_base = new_data_segment.virtual_address

        # Fix up offsets to they point to the actual address
        for label, addr in transform_info.data_offsets.items():
            transform_info.data_offsets[label] = (
                cast(int, transform_info.data_base) + addr
            )

    if transform_info.code_size > 0:
        new_code_segment = Segment()
        new_code_segment.content = list(b"\x00" * transform_info.code_size)
        new_code_segment.type = SEGMENT_TYPES.LOAD
        new_code_segment.alignment = self.alignment
        new_code_segment.flags = SEGMENT_FLAGS(SEGMENT_FLAGS.X | SEGMENT_FLAGS.R)
        new_code_segment = self.lief_binary.add(new_code_segment)

        transform_info.code_base = new_code_segment.virtual_address

        # Fix up offsets to they point to the actual address
        for label, addr in transform_info.code_offsets.items():
            transform_info.code_offsets[label] = (
                cast(int, transform_info.code_base) + addr
            )

        # Queue writes for code
        for label, code in self.code_to_add.items():
            self.writes.append(WriteOperation(code, label))

    # Reload the angr project to get the new sections
    self.reload_blob_from_lief()
    transform_info.lief_binary = self.lief_binary
    transform_info.angr_project = self.angr_project

    for write in self.writes:
        offsets = []

        if isinstance(write.where, str):
            offsets.append(
                transform_info.data_offsets.get(
                    write.where, transform_info.code_offsets.get(write.where, None)
                )
            )

        elif isinstance(write.where, int):
            offsets.append(write.where)

        elif isinstance(write.where, list):
            offsets.extend(write.where)
        else:
            new_offsets = write.where(transform_info)

            if not isinstance(offsets, list):
                offsets.append(new_offsets)
            else:
                offsets.extend(new_offsets)

        for offset in offsets:
            logger.debug(f"Writing {write.data} to {offset:#0x}")
            transform_info.current_offset = offset

            if isinstance(write.data, bytes):
                data = write.data

            elif isinstance(write.data, Code):

                write.data.reset()
                write.data.build(transform_info)
                data = write.data.compile(
                    cast(str, write.data.label), transform_info
                )

                if self.use_angr:
                    disassembly = self.angr_project.arch.disasm(data, offset)
                else:
                    logger.debug(f"Disassembly for {data} @ {offset}")
                    disassembler = Cs(CS_ARCH_X86, CS_MODE_64)
                    disassembler.detail = True
                    disassembly = "\n".join(
                        map(
                            lambda i: f"{i.address:#x}: {i.mnemonic} {i.op_str}",
                            disassembler.disasm(data, offset),
                        )
                    )

                logger.debug(f"Disassembly of data for label {write.data.label}:")

                for disas_line in disassembly.splitlines():
                    logger.debug(f"  {disas_line}")
            else:
                data = write.data(transform_info.code_offsets)

            logger.info(f"Writing {len(data)} bytes to {offset:#0x}")
            self.lief_binary.patch_address(offset, list(data))

asm(asm, vaddr)

Assemble the given assembly code at the given virtual address

Parameters:

Name Type Description Default
asm str

The assembly code to assemble

required
vaddr int

The virtual address to assemble at

required
Source code in pypatches/binary_manager.py
448
449
450
451
452
453
454
455
456
457
458
459
def asm(self, asm: str, vaddr: int) -> bytes:
    """Assemble the given assembly code at the given virtual address

    Args:
        asm: The assembly code to assemble
        vaddr: The virtual address to assemble at
    """
    if self.use_angr:
        return self.cle_binary.arch.asm(asm, vaddr, as_bytes=True)  # type: ignore
    else:
        assembler = Ks(KS_ARCH_X86, KS_MODE_64)
        return assembler.asm(asm, vaddr, as_bytes=True)  # type: ignore

disasm(vaddr)

Disassemble one instruction from the binary at an address

Parameters:

Name Type Description Default
vaddr int

The virtual address to disassemble at

required
Source code in pypatches/binary_manager.py
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
def disasm(self, vaddr: int) -> CsInsn:
    """Disassemble one instruction from the binary at an address

    Args:
        vaddr: The virtual address to disassemble at
    """

    if self.use_angr:
        block = (
            self.binary.angr_project.kb.cfgs["CFGFast"]
            .get_any_node(vaddr, anyaddr=True)
            .block
        )

        for instr in block.capstone.insns:
            if instr.address == vaddr:
                return instr

        raise ValueError(f"Could not find instruction at {vaddr:#0x}")
    else:
        logger.warning("Disassembly is not supported without angr")

load_angr_project()

Load the angr project from the binary blob

Source code in pypatches/binary_manager.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
def load_angr_project(self) -> None:
    """Load the angr project from the binary blob"""

    if not self.use_angr:
        self.angr_project = None
        self.cle_binary = None
        return

    if self.proj_opts is None:
        self.proj_opts = {
            "main_opts": {"base_addr": self.lief_binary.imagebase},
            "load_options": self.cle_opts,
        }
    self.angr_project = Project(self.blob, **self.proj_opts)
    self.angr_project.analyses.CFGFast(  # type: ignore
        **self.cfg_opts,
    )

    if self.angr_project.loader.main_object is None:
        raise FileNotFoundError(
            f"Requested binary {self.path} " "was not found or could not be opened."
        )

    self.cle_binary = self.angr_project.loader.main_object

load_lief_binary()

Load the binary into LIEF

Source code in pypatches/binary_manager.py
161
162
163
def load_lief_binary(self) -> None:
    """Load the binary into LIEF"""
    self.lief_binary = parse(self.blob.getbuffer())

read(vaddr, size)

Read data from the binary at the given virtual address

Parameters:

Name Type Description Default
vaddr int

The virtual address to read from

required
size int

The number of bytes to read

required
Source code in pypatches/binary_manager.py
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
def read(self, vaddr: int, size: int) -> bytes:
    """Read data from the binary at the given virtual address

    Args:
        vaddr: The virtual address to read from
        size: The number of bytes to read
    """
    try:
        if self.use_angr:
            offset = next(
                filter(lambda s: s.contains_addr(vaddr), self.cle_binary.sections)
            ).addr_to_offset(vaddr)
        else:
            offset = self.lief_binary.virtual_address_to_offset(vaddr)

    except StopIteration as e:
        logger.error(f"Could not find section for address {vaddr}")
        raise NoSectionError(f"Could not find section for address {vaddr}") from e

    self.blob.seek(offset)
    return self.blob.read(size)

reload_blob_from_lief()

Reload the blob from the current LIEF binary

Source code in pypatches/binary_manager.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def reload_blob_from_lief(self) -> None:
    """Reload the blob from the current LIEF binary"""
    tempfile = NamedTemporaryFile(delete=False)
    temppath = Path(tempfile.name)

    try:
        tempfile.close()
        self.lief_binary.write(str(temppath.resolve()))
        self.blob = BytesIO(temppath.read_bytes())
    except Exception as e:
        raise e
    finally:
        temppath.unlink(missing_ok=True)

    self.load_lief_binary()

    self.load_angr_project()

save(where)

Apply any pending operations and save the binary to a file

Parameters:

Name Type Description Default
where Path

The path to the destination to save the binary

required
Source code in pypatches/binary_manager.py
436
437
438
439
440
441
442
443
444
445
446
def save(self, where: Path) -> None:
    """Apply any pending operations and save the binary to a file

    Args:
        where: The path to the destination to save the binary
    """
    logger.info("Applying pending operations")
    self.apply()

    logger.info(f"Saving binary to {where}")
    self.lief_binary.write(str(where.resolve()))

write(where, data)

Write data to the binary at the given virtual address

Parameters:

Name Type Description Default
where Union[str, Callable[[TransformInfo], int], int, List[int], Callable[[TransformInfo], List[int]]]

Either an address or a label that will resolve to an address, or a function that takes the transform info after segment modification and returns an address

required
data Union[bytes, Callable[[Dict[str, int]], bytes], Code]

The bytes or a function that takes a dictionary of labels and addresses and returns bytes to write to the patch

required
Source code in pypatches/binary_manager.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
def write(
    self,
    where: Union[
        str,
        Callable[[TransformInfo], int],
        int,
        List[int],
        Callable[[TransformInfo], List[int]],
    ],
    data: Union[bytes, Callable[[Dict[str, int]], bytes], Code],
) -> None:
    """Write data to the binary at the given virtual address

    Args:

        where: Either an address or a label that will resolve to an address, or
            a function that takes the transform info after segment modification and
            returns an address
        data: The bytes or a function that takes a dictionary of labels
            and addresses and returns bytes to write to the patch
    """

    operation = WriteOperation(data, where)
    logger.info(f"Queuing write operation: {operation}")
    self.writes.append(operation)

Patch Code Types

Superclass for all code types

Code

Bases: ABC

Superclass for other code types defining required functions

Code objects are used to represent code that can be compiled or assembled into machine code. The code object is used to represent the code in a way that can be modified by the patching process, and then compiled or assembled into machine code for insertion into the binary.

Generally, each code object goes through the following life cycle: - dummy is called to generate a dummy version of the code that can be used to estimate the size of the code in the binary. - build is called to generate the final version of the code that will be compiled or assembled. - compile is called to compile or assemble the code into machine code bytes. - post_build is called to modify the compiled code after compilation (for example, to add a jump to the end of the code).

Attributes:

Name Type Description
code str

The code to be compiled or assembled

original_code str

The original code, before any modifications

dummy_transformer Callable[[str], str]

A function that takes this container's code and returns a valid compile/assemble-able code (see self.dummy)

build_transformer Callable[[TransformInfo, str], str]

A function that takes a TransformInfo object and this container's code and returns valid and final compile/assemble-able code.

post_transformer Callable[[TransformInfo, bytes], bytes]

A function that takes a TransformInfo object and the result of compiling this container's code to machine code bytes and returns a valid and final machine code byte string.

Parameters:

Name Type Description Default
code str

The code, to initialize with

required
dummy_transformer Callable[[str], str]

A function that takes this container's code and returns a valid compile/assemble-able code (see self.dummy), optional. Defaults to a function that returns the code unchanged.

None
build_transformer Callable[[TransformInfo, str], str]

A function that takes a TransformInfo object and this container's code and returns valid and final compile/assemble-able code, optional. Defaults to a function that returns the code unchanged.

None
post_transformer Callable[[TransformInfo, bytes], bytes]

A function that takes a TransformInfo object and the result of compiling this container's code to machine code bytes and returns a valid and final machine code byte string, optional. Defaults to a function that returns the compiled code unchanged.

None
Source code in pypatches/code/code.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
class Code(ABC):
    """Superclass for other code types defining required functions

    Code objects are used to represent code that can be compiled or assembled into
    machine code. The code object is used to represent the code in a way that can be
    modified by the patching process, and then compiled or assembled into machine code
    for insertion into the binary.

    Generally, each code object goes through the following life cycle:
    - [dummy][pypatches.code.code.Code.dummy] is called to generate a dummy version of
        the code that can be used
        to estimate the size of the code in the binary.
    - [build][pypatches.code.code.Code.build] is called to generate the final version of
        the code that will be
        compiled or assembled.
    - [compile][pypatches.code.code.Code.compile] is called to compile or assemble the
        code into machine code
        bytes.
    - [post_build][pypatches.code.code.Code.post_build] is called to modify the compiled
        code after compilation
        (for example, to add a jump to the end of the code).

    Attributes:
        code (str): The code to be compiled or assembled
        original_code (str): The original code, before any modifications
        dummy_transformer (Callable[[str], str]): A function that takes this container's
            code and returns a valid compile/assemble-able code (see `self.dummy`)
        build_transformer (Callable[[TransformInfo, str], str]): A function that takes a
            `TransformInfo` object and this container's code and returns valid and final
            compile/assemble-able code.
        post_transformer (Callable[[TransformInfo, bytes], bytes]): A function that
            takes
            a `TransformInfo` object and the result of compiling this container's code
            to
            machine code bytes and returns a valid and final machine code byte string.

    Args:
        code (str): The code, to initialize with
        dummy_transformer (Callable[[str], str]): A function that takes this container's
            code and returns a valid compile/assemble-able code (see `self.dummy`),
            optional. Defaults to a function that returns the code unchanged.
        build_transformer (Callable[[TransformInfo, str], str]): A function that takes a
            `TransformInfo` object and this container's code and returns valid and final
            compile/assemble-able code, optional. Defaults to a function that returns
            the code unchanged.
        post_transformer (Callable[[TransformInfo, bytes], bytes]): A function that
            takes a `TransformInfo` object and the result of compiling this container's
            code to machine code bytes and returns a valid and final machine code byte
            string, optional. Defaults to a function that returns the compiled code
            unchanged.
    """

    code: Union[str, bytes]
    original_code: Union[str, bytes]
    label: Optional[str] = None

    def __init__(
        self,
        code: Union[str, bytes],
        dummy_transformer: Optional[
            Callable[[Union[str, bytes]], Union[str, bytes]]
        ] = None,
        build_transformer: Optional[
            Callable[[TransformInfo, Union[str, bytes]], Union[str, bytes]]
        ] = None,
        post_transformer: Optional[Callable[[TransformInfo, bytes], bytes]] = None,
    ) -> None:
        """Initialize the Code object"""
        self.code = code
        self.original_code = self.code

        self.dummy_transformer = (
            dummy_transformer
            if dummy_transformer is not None
            else default_dummy_transformer
        )

        self.build_transformer = (
            build_transformer
            if build_transformer is not None
            else default_build_transformer
        )

        self.post_transformer = (
            post_transformer
            if post_transformer is not None
            else default_post_transformer
        )

    def reset(self) -> None:
        """Reset the code to its original state without any modifications"""
        self.code = self.original_code

    def dummy(self) -> None:
        """Generate valid dummy code to determine the size required to fit this code in
        the binary.


        This function will be called to create a dummy result from the code -- the dummy
        result should be valid code (ie if this Code instance is C code it should
        compile, and if it is assembly it should assemble) but is not necessarily
        the correct code to go into the final patch.

        For example, a code snippet:

        ```
        int get_data(void) {
            return {DATA_VALUE};
        }
        ```

        might have a "dummy" result of:

        ```
        int get_data(void) {
            return 0x41414141;
        }
        ```

        The code returned by this function will never be used to produce a binary, only
        to estimate the required size for segment insertion.
        """
        self.code = self.dummy_transformer(self.code)

    def build(self, info: TransformInfo) -> None:
        """Build the code into the final version that can be built with the binary
        context into the final patch.

        Args:
            info: The `TransformInfo` object containing information about the current
                patching context.
        """
        self.code = self.build_transformer(info, self.code)

    def post_build(self, info: TransformInfo) -> bytes:
        """Modify the built code after compilation

        Args:
            info: The `TransformInfo` object containing information about the current
                patching context.

        """
        return self.post_transformer(info, self.code)

    def compile(self, label: str, info: Optional[TransformInfo] = None) -> bytes:
        """Compile or assemble the code (or do not change it if raw)

        Args:
            label: The label to use for this code
            info: The `TransformInfo` object to use for this code
        """
        raise NotImplementedError("Subclasses must implement compile()")

__init__(code, dummy_transformer=None, build_transformer=None, post_transformer=None)

Initialize the Code object

Source code in pypatches/code/code.py
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def __init__(
    self,
    code: Union[str, bytes],
    dummy_transformer: Optional[
        Callable[[Union[str, bytes]], Union[str, bytes]]
    ] = None,
    build_transformer: Optional[
        Callable[[TransformInfo, Union[str, bytes]], Union[str, bytes]]
    ] = None,
    post_transformer: Optional[Callable[[TransformInfo, bytes], bytes]] = None,
) -> None:
    """Initialize the Code object"""
    self.code = code
    self.original_code = self.code

    self.dummy_transformer = (
        dummy_transformer
        if dummy_transformer is not None
        else default_dummy_transformer
    )

    self.build_transformer = (
        build_transformer
        if build_transformer is not None
        else default_build_transformer
    )

    self.post_transformer = (
        post_transformer
        if post_transformer is not None
        else default_post_transformer
    )

build(info)

Build the code into the final version that can be built with the binary context into the final patch.

Parameters:

Name Type Description Default
info TransformInfo

The TransformInfo object containing information about the current patching context.

required
Source code in pypatches/code/code.py
137
138
139
140
141
142
143
144
145
def build(self, info: TransformInfo) -> None:
    """Build the code into the final version that can be built with the binary
    context into the final patch.

    Args:
        info: The `TransformInfo` object containing information about the current
            patching context.
    """
    self.code = self.build_transformer(info, self.code)

compile(label, info=None)

Compile or assemble the code (or do not change it if raw)

Parameters:

Name Type Description Default
label str

The label to use for this code

required
info Optional[TransformInfo]

The TransformInfo object to use for this code

None
Source code in pypatches/code/code.py
157
158
159
160
161
162
163
164
def compile(self, label: str, info: Optional[TransformInfo] = None) -> bytes:
    """Compile or assemble the code (or do not change it if raw)

    Args:
        label: The label to use for this code
        info: The `TransformInfo` object to use for this code
    """
    raise NotImplementedError("Subclasses must implement compile()")

dummy()

Generate valid dummy code to determine the size required to fit this code in the binary.

This function will be called to create a dummy result from the code -- the dummy result should be valid code (ie if this Code instance is C code it should compile, and if it is assembly it should assemble) but is not necessarily the correct code to go into the final patch.

For example, a code snippet:

int get_data(void) {
    return {DATA_VALUE};
}

might have a "dummy" result of:

int get_data(void) {
    return 0x41414141;
}

The code returned by this function will never be used to produce a binary, only to estimate the required size for segment insertion.

Source code in pypatches/code/code.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def dummy(self) -> None:
    """Generate valid dummy code to determine the size required to fit this code in
    the binary.


    This function will be called to create a dummy result from the code -- the dummy
    result should be valid code (ie if this Code instance is C code it should
    compile, and if it is assembly it should assemble) but is not necessarily
    the correct code to go into the final patch.

    For example, a code snippet:

    ```
    int get_data(void) {
        return {DATA_VALUE};
    }
    ```

    might have a "dummy" result of:

    ```
    int get_data(void) {
        return 0x41414141;
    }
    ```

    The code returned by this function will never be used to produce a binary, only
    to estimate the required size for segment insertion.
    """
    self.code = self.dummy_transformer(self.code)

post_build(info)

Modify the built code after compilation

Parameters:

Name Type Description Default
info TransformInfo

The TransformInfo object containing information about the current patching context.

required
Source code in pypatches/code/code.py
147
148
149
150
151
152
153
154
155
def post_build(self, info: TransformInfo) -> bytes:
    """Modify the built code after compilation

    Args:
        info: The `TransformInfo` object containing information about the current
            patching context.

    """
    return self.post_transformer(info, self.code)

reset()

Reset the code to its original state without any modifications

Source code in pypatches/code/code.py
102
103
104
def reset(self) -> None:
    """Reset the code to its original state without any modifications"""
    self.code = self.original_code

ASM subclass for assembly code used in patches

ASMCode

Bases: Code

Assembly code container for patching

Assembly code should be written in a format that can be assembled by keystone-engine. Generally, that means AT&T syntax for x86 and standard syntax for everything else.

Source code in pypatches/code/asm.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class ASMCode(Code):
    """Assembly code container for patching

    Assembly code should be written in a format that can be assembled
    by keystone-engine. Generally, that means AT&T syntax for x86 and
    standard syntax for everything else.
    """

    def compile(self, label: str, info: Optional[TransformInfo] = None) -> bytes:
        """Assemble the assembly code to raw bytes of machine code.

        Args:
            label: The label to use for the code
            info: Optional transform info to use for the compilation

        Returns:
            The raw bytes of the compiled code
        """

        assert info is not None, "Must provide info for ASM compile"

        vaddr = info.code_offsets.get(label, 0)
        assembler = Ks(KS_ARCH_X86, KS_MODE_64)

        return cast(
            bytes,
            assembler.asm(self.code, vaddr, as_bytes=True)[0],
        )

compile(label, info=None)

Assemble the assembly code to raw bytes of machine code.

Parameters:

Name Type Description Default
label str

The label to use for the code

required
info Optional[TransformInfo]

Optional transform info to use for the compilation

None

Returns:

Type Description
bytes

The raw bytes of the compiled code

Source code in pypatches/code/asm.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def compile(self, label: str, info: Optional[TransformInfo] = None) -> bytes:
    """Assemble the assembly code to raw bytes of machine code.

    Args:
        label: The label to use for the code
        info: Optional transform info to use for the compilation

    Returns:
        The raw bytes of the compiled code
    """

    assert info is not None, "Must provide info for ASM compile"

    vaddr = info.code_offsets.get(label, 0)
    assembler = Ks(KS_ARCH_X86, KS_MODE_64)

    return cast(
        bytes,
        assembler.asm(self.code, vaddr, as_bytes=True)[0],
    )

Code type for C code

CCode

Bases: Code

C code container for patching

C code should be written such that it can be compiled by squishy. Generally, that means that the inline keyword should not be used, code should be written as C99-like as possible, and that most importantly external library calls must not be used. However, you can write code that uses system headers using the #include <...> syntax, and you can use data structures and macros from those headers.

Source code in pypatches/code/c_code.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class CCode(Code):
    """C code container for patching

    C code should be written such that it can be compiled by
    [squishy](https://github.com/novafacing/squishy). Generally, that means that the
    `inline` keyword should not be used, code should be written as C99-like as possible,
    and that most importantly external library calls must not be used. However, you can
    write code that uses system headers using the `#include <...>` syntax, and you can
    use data structures and macros from those headers.
    """

    def compile(self, label: str, info: Optional[TransformInfo] = None) -> bytes:
        """Compile the C code to raw machine code bytes

        Args:
            label: The label to use for the code
            info: Optional transform info to use for the compilation

        Returns:
            The raw bytes of the compiled code
        """
        return cast(bytes, Squishy().compile(self.code))

compile(label, info=None)

Compile the C code to raw machine code bytes

Parameters:

Name Type Description Default
label str

The label to use for the code

required
info Optional[TransformInfo]

Optional transform info to use for the compilation

None

Returns:

Type Description
bytes

The raw bytes of the compiled code

Source code in pypatches/code/c_code.py
24
25
26
27
28
29
30
31
32
33
34
def compile(self, label: str, info: Optional[TransformInfo] = None) -> bytes:
    """Compile the C code to raw machine code bytes

    Args:
        label: The label to use for the code
        info: Optional transform info to use for the compilation

    Returns:
        The raw bytes of the compiled code
    """
    return cast(bytes, Squishy().compile(self.code))

build_c_code(main_body, helpers=None, includes=None, extra_code=None, dummy_transformer=None, build_transformer=None, post_transformer=None)

Build a C code snippet a main body, helpers, and include files

Generally, you should use this function instead of manually creating a CCode object manually.

Parameters:

Name Type Description Default
main_body str

The body of the main function to build, do not include int main(){ or }

required
helpers Optional[List[str]]

A list of filenames or paths to include into the program as helpers from libs like: ['liblink.c', 'libutil.c', 'libgetreg.c']. Some of these helpers are provided, and you can use them without a full path. The helpers included are: -liblink.c: A library for linking to other functions and calling library functions -libutil.c: A library for utility functions prefixed with_like_strcpyand_strlen-libgetreg.c: A library for getting the value of a register -libsyscall.c: A library for calling system calls -libgetbase.c`: A library for getting the base address of the binary (note libgetbase.c requires libsyscall.c)

None
includes Optional[List[str]]

A list of include files like ["#include <stdint.h>", ...]

None
extra_code Optional[str]

Extra code to include in the program at global scope, write any functions you want to use in main_body here

None
dummy_transformer Optional[Callable[[Union[str, bytes]], Union[str, bytes]]]

A function that is passed self.code and returns a string of code to use for the dummy compilation. The result must compile but is not the code that will be inserted into the final patched binary, it is only used to obtain a size estimate. If not provided, the default is to return self.code unchanged.

None
build_transformer Optional[Callable[[TransformInfo, Union[str, bytes]], Union[str, bytes]]]

A function that is passed [self.code][pypatches.code.code.Code.code] and returns a string of code to use for the final compilation. The result must compile and will be inserted into the final patched binary. If not provided, the code will be passed through unchanged.

None
post_transformer Optional[Callable[[TransformInfo, bytes], bytes]]

A function that is passed the compiled code and returns the final bytes to use for the patch. This is useful for doing things like padding the code to a certain size. If not provided, the result of compilation will be used unchanged.

None
Source code in pypatches/code/c_code.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def build_c_code(
    main_body: str,
    helpers: Optional[List[str]] = None,
    includes: Optional[List[str]] = None,
    extra_code: Optional[str] = None,
    dummy_transformer: Optional[
        Callable[[Union[str, bytes]], Union[str, bytes]]
    ] = None,
    build_transformer: Optional[
        Callable[[TransformInfo, Union[str, bytes]], Union[str, bytes]]
    ] = None,
    post_transformer: Optional[Callable[[TransformInfo, bytes], bytes]] = None,
) -> CCode:
    """Build a C code snippet a main body, helpers, and include files

    Generally, you should use this function instead of manually creating a
    [CCode][pypatches.code.c_code.CCode] object manually.

    Args:
        main_body: The body of the main function to build, do not include `int main(){`
            or `}`
        helpers: A list of filenames or paths to include into the program as helpers from
            `libs` like: `['liblink.c', 'libutil.c', 'libgetreg.c']. Some of these
            helpers are provided, and you can use them without a full path. The helpers
            included are:
            - `liblink.c`: A library for linking to other functions and calling library
                functions
            - `libutil.c`: A library for utility functions prefixed with `_` like
                `_strcpy` and `_strlen`
            - `libgetreg.c`: A library for getting the value of a register
            - `libsyscall.c`: A library for calling system calls
            - `libgetbase.c`: A library for getting the base address of the binary
                (note libgetbase.c requires libsyscall.c)
        includes: A list of include files like `["#include <stdint.h>", ...]`
        extra_code: Extra code to include in the program at global scope, write any
            functions you want to use in `main_body` here
        dummy_transformer: A function that is passed [self.code](#code) and returns a
            string of code to use for the dummy compilation. The result must compile but
            is not the code that will be inserted into the final patched binary, it is
            only used to obtain a size estimate. If not provided, the default is to
            return [self.code](#code) unchanged.
        build_transformer: A function that is passed
            [self.code][pypatches.code.code.Code.code] and returns a
            string of code to use for the final compilation. The result must compile and
            will be inserted into the final patched binary. If not provided, the code
            will be passed through unchanged.
        post_transformer: A function that is passed the compiled code and returns the
            final bytes to use for the patch. This is useful for doing things like
            padding the code to a certain size. If not provided, the result of
            compilation will be used unchanged.
    """
    code = ""

    if includes is not None:
        code = "\n".join(includes)
        code += "\n"

    if helpers is not None:
        for helper in helpers:
            lib_text = get_lib(helper)
            code += f"\n{lib_text}\n"

    if extra_code is not None:
        code += extra_code + "\n"

    code += """int main() {\n"""
    code += main_body + "\n"
    code += "}"

    return CCode(
        code,
        dummy_transformer=dummy_transformer,
        build_transformer=build_transformer,
        post_transformer=post_transformer,
    )

Raw machine code Code subclass

RawCode

Bases: Code

Raw code bytes

Raw code is a container for raw bytes of machine code. It can be used to insert arbitrary machine code into a binary.

Source code in pypatches/code/raw.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class RawCode(Code):
    """Raw code bytes

    Raw code is a container for raw bytes of machine code. It can be used to insert
    arbitrary machine code into a binary.
    """

    def compile(self, label: str, info: Optional[TransformInfo] = None) -> bytes:
        """Return the raw bytes of the code

        Args:
            label: The label to use for the code
            info: Optional transform info to use for the compilation

        Returns:
            The raw bytes of the code
        """
        return self.code

compile(label, info=None)

Return the raw bytes of the code

Parameters:

Name Type Description Default
label str

The label to use for the code

required
info Optional[TransformInfo]

Optional transform info to use for the compilation

None

Returns:

Type Description
bytes

The raw bytes of the code

Source code in pypatches/code/raw.py
15
16
17
18
19
20
21
22
23
24
25
def compile(self, label: str, info: Optional[TransformInfo] = None) -> bytes:
    """Return the raw bytes of the code

    Args:
        label: The label to use for the code
        info: Optional transform info to use for the compilation

    Returns:
        The raw bytes of the code
    """
    return self.code

Patch context info

Transform info

TransformInfo dataclass

Info for transformations of Code objects

Attributes:

Name Type Description
lief_binary LIEFBinary

The lief binary for the binary

angr_project Optional[Project]

The angr project for the binary

code_base int

The base address of the code in the binary

code_size int

The size of the code in the binary

code_offsets Dict[str, int]

A dictionary mapping labels to offsets in the binary

data_base int

The base address of the data in the binary

data_size int

The size of the data in the binary

data_offsets Dict[str, int]

A dictionary mapping labels to offsets in the binary

current_offset int

The offset of the current operation, usually a write to the binay. For example, a replacecodepatch will have at least one write, so this value will be set to the offset of the write before passing the [TransformInfo][] object to the build transformer

Source code in pypatches/transform/info.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
@dataclass
class TransformInfo:
    """Info for transformations of [Code][pypatches.code.code.Code] objects

    Attributes:
        lief_binary: The lief binary for the binary
        angr_project: The angr project for the binary
        code_base: The base address of the code in the binary
        code_size: The size of the code in the binary
        code_offsets: A dictionary mapping labels to offsets in the binary
        data_base: The base address of the data in the binary
        data_size: The size of the data in the binary
        data_offsets: A dictionary mapping labels to offsets in the binary
        current_offset: The offset of the current operation, usually a write
            to the binay. For example, a replacecodepatch will have at least one
            write, so this value will be set to the offset of the write before
            passing the [TransformInfo][TransformInfo] object to the build transformer
    """

    lief_binary: LIEFBinary
    angr_project: Optional[Project]

    code_base: int = 0
    code_size: int = 0
    code_offsets: Dict[str, int] = field(default_factory=dict)
    data_base: int = 0
    data_size: int = 0
    data_offsets: Dict[str, int] = field(default_factory=dict)
    all_data: bytes = b""
    current_offset: int = 0

Utilities

Errors specific to the patches package.

BinaryCreateError

Bases: Exception

An error that occurs when a binary code file cannot be produced

Source code in pypatches/error.py
20
21
class BinaryCreateError(Exception):
    """An error that occurs when a binary code file cannot be produced"""

CodegenError

Bases: Exception

An error that occurs when code generation fails

Source code in pypatches/error.py
16
17
class CodegenError(Exception):
    """An error that occurs when code generation fails"""

CompilationError

Bases: Exception

An error that occurs when a snippet of C/C++ code fails to compile

Source code in pypatches/error.py
6
7
class CompilationError(Exception):
    """An error that occurs when a snippet of C/C++ code fails to compile"""

NoSectionError

Bases: Exception

An error that occurs when a section is not found

Source code in pypatches/error.py
24
25
class NoSectionError(Exception):
    """An error that occurs when a section is not found"""

TransformationError

Bases: Exception

An error that occurs when transformation of LLVM bitcode using the wrapper fails

Source code in pypatches/error.py
10
11
12
13
class TransformationError(Exception):
    """An error that occurs when transformation of LLVM bitcode using the
    wrapper fails
    """

Dataclass implementing comparable address ranges.

AddressRange dataclass

An address range with a start and end. If start == end, this address range is just one address

Attributes:

Name Type Description
start int

The start address of the range

end int

The end address of the range

Source code in pypatches/address_range.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
@dataclass(frozen=True)
class AddressRange:
    """An address range with a start and end.
    If start == end, this address range is just one address

    Attributes:
        start: The start address of the range
        end: The end address of the range
    """

    start: int
    end: int

    @property
    def address(self) -> bool:
        """Returns whether this range is "an address" """
        return self.start == self.end

address() property

Returns whether this range is "an address"

Source code in pypatches/address_range.py
20
21
22
23
@property
def address(self) -> bool:
    """Returns whether this range is "an address" """
    return self.start == self.end

Write operation to be performed on a binary.

WriteOperation dataclass

A write operation to be performed on a section

Attributes:

Name Type Description
data Union[bytes, Callable[[Dict[str, int]], bytes], Code]

Either raw bytes or a function that takes the resolved labels for all patches and returns raw bytes

vaddr Union[bytes, Callable[[Dict[str, int]], bytes], Code]

The virtual address or label location to write to

where Union[str, Callable[[TransformInfo], int], int, List[int], Callable[[TransformInfo], List[int]]]

The address to replace the code at, optional. This "address" can either be a label, an actual address, a list of addresses, a function that takes a [TransformInfo][] and returns an address, or a function that takes a [TransformInfo][] and returns a list of addresses.

Source code in pypatches/write_operation.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
@dataclass
class WriteOperation:
    """A write operation to be performed on a section

    Attributes:
        data: Either raw bytes or a function that takes the resolved
            labels for all patches and returns raw bytes
        vaddr: The virtual address or label location to write to
        where: The address to replace the code at, optional. This "address" can either
            be a label, an actual address, a list of addresses, a function that takes a
            [TransformInfo][TransformInfo] and returns an address, or a function that
            takes a [TransformInfo][TransformInfo] and returns a list of addresses.
    """

    data: Union[bytes, Callable[[Dict[str, int]], bytes], Code]
    where: Union[
        str,
        Callable[[TransformInfo], int],
        int,
        List[int],
        Callable[[TransformInfo], List[int]],
    ]

Check if two capstone MemOp objects are equal, since they don't have a eq method that works.

cs_memop_eq(first, second)

Check equality of two memory operands

Parameters:

Name Type Description Default
first Structure

The first memory operand

required
second Structure

The second memory operand

required

Returns:

Type Description
bool

True if the memory operands are equal, False otherwise

Source code in pypatches/util/cs_memop_eq.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def cs_memop_eq(first: Structure, second: Structure) -> bool:
    """Check equality of two memory operands

    Args:
        first: The first memory operand
        second: The second memory operand

    Returns:
        True if the memory operands are equal, False otherwise

    """
    # If they aren't the same subclass, they can't possibly be equal
    # in a memory operand sense
    if first.__class__.__name__ != second.__class__.__name__:
        return False

    check: Tuple[str, ...] = ()
    if isinstance(first, ArmOpMem):
        check = ("base", "index", "scale", "disp", "lshift")
    elif isinstance(first, M68KOpMem):
        check = (
            "base_reg",
            "index_reg",
            "in_base_reg",
            "in_disp",
            "out_disp",
            "disp",
            "scale",
        )
    elif isinstance(first, MipsOpMem):
        check = ("base", "disp")
    elif isinstance(first, Arm64OpMem):
        check = ("base", "index", "disp")
    elif isinstance(first, PpcOpMem):
        check = ("base", "disp")
    elif isinstance(first, X86OpMem):
        check = ("segment", "base", "index", "scale", "disp")
    elif isinstance(first, XcoreOpMem):
        check = ("base", "index", "disp", "direct")
    elif isinstance(first, TMS320C64xOpMem):
        check = ("base", "disp", "unit", "scaled", "disptype", "direction", "modify")
    elif isinstance(first, SparcOpMem):
        check = ("base", "index", "disp")
    elif isinstance(first, SyszOpMem):
        check = ("base", "index", "length", "disp")

    for attr in check:
        if getattr(first, attr) != getattr(second, attr):
            return False

    return True

Accessors for various libraries to add to patches

get_lib(name)

Return the text of a library file with a given name

Parameters:

Name Type Description Default
name str

The name of the library file to get, either a relative path to this file to retrieve a built in library or a full path to a library file to retrieve a custom library

required
Source code in pypatches/util/libs/libs.py
 9
10
11
12
13
14
15
16
17
18
19
20
def get_lib(name: str) -> str:
    """Return the text of a library file with a given name

    Args:
        name: The name of the library file to get, either a relative path to this file
            to retrieve a built in library or a full path to a library file
            to retrieve a custom library
    """
    try:
        return (LIBS_DIR / name).read_text(encoding="utf-8")
    except FileNotFoundError:
        return Path(name).read_text(encoding="utf-8")