Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect and reroll unrolled do-while loops #76

Open
wants to merge 59 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 46 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
0de7b12
Add --no-andor for loop.c, and a similar looping test case
matt-kempster Apr 26, 2020
9d10258
Initial infrastructure for detecting for-loops
matt-kempster Apr 26, 2020
d394e9b
Use do-while, not for; screwed up indentation
matt-kempster Apr 26, 2020
ad167f7
Fix screwed up indentation
matt-kempster Apr 26, 2020
8d99b31
Add complicated loop unrolling; break -g indentation
matt-kempster Apr 26, 2020
24f5e2f
Actually run tests
matt-kempster Apr 26, 2020
e35518b
Fix indentation of simple do-whiles
matt-kempster Apr 26, 2020
db72018
Negate loop-gating condition
matt-kempster Apr 26, 2020
9bf11da
Merge with master
matt-kempster Apr 27, 2020
b3b8e22
Divorce if-statement from do-while loop
matt-kempster Apr 27, 2020
09e85cb
Add missing semicolon after do-while
matt-kempster Apr 27, 2020
0a1156b
Delete a bunch of dead code
matt-kempster Apr 27, 2020
03a8ab0
Introduce crazy loop-rerolling with an option to disable
matt-kempster Apr 27, 2020
0b22145
Replace --no-andor with --no-reroll in loop test
matt-kempster Apr 27, 2020
c937919
Got loop.c working, but broke nested
matt-kempster Apr 29, 2020
b4ed8e0
Temp commit to see what used to work
matt-kempster Apr 29, 2020
75ce71f
Fix everything
matt-kempster Apr 29, 2020
1997ed4
CRLF -> LF
matt-kempster Apr 29, 2020
7919a49
Remove the code from if_statements that was moved
matt-kempster Apr 29, 2020
f637685
More CRLF
matt-kempster Apr 30, 2020
2d58683
Another flip-flop, using --no-reroll for a test
matt-kempster Apr 30, 2020
8a5bf38
Refactor all spaghetti AND fix a bug
matt-kempster May 1, 2020
2fe2c7d
Re-enable do-while emission
matt-kempster May 1, 2020
24e6066
Temporary while commit
matt-kempster May 1, 2020
3517860
Temporary while commit
matt-kempster May 1, 2020
57f6697
Throw this commit away
matt-kempster May 3, 2020
f4aa9e6
Add a broken test, to be deleted
matt-kempster May 3, 2020
2768028
Undo last several commits for a fresh start
matt-kempster May 3, 2020
49fbcfa
Improve detection of do-whiles
matt-kempster May 3, 2020
c359b3b
Clean up loop detection a bit
matt-kempster May 3, 2020
e4ce1f1
Merge with master
matt-kempster May 3, 2020
8073b65
Use imm.pdom heuristic for loop detection
matt-kempster May 3, 2020
ce9ba4e
Actually run tests
matt-kempster May 3, 2020
cc67261
Move parent generation alongside dominators
matt-kempster May 5, 2020
0ec795e
Merge branch 'master' into for_loops
simonlindholm Sep 30, 2020
03b6d42
Merge master into for_loops (using imerge)
matt-kempster Jun 24, 2021
90c8f2c
Fix indentation of do-while loops
matt-kempster Jun 24, 2021
4379021
Fix self_loop definition
matt-kempster Jun 24, 2021
a2ed466
Minor fixups
matt-kempster Jun 24, 2021
fb23b5d
Fix bug where emitting a node kills that node forever
matt-kempster Jun 24, 2021
774d467
Fix bug where nodes were not thought to be emitted
matt-kempster Jun 24, 2021
914479e
Fix mypy issue
matt-kempster Jun 24, 2021
ea1593f
Merge master into for_loops (using imerge)
matt-kempster Jun 25, 2021
75a6c19
Remove secretly=...
matt-kempster Jun 25, 2021
5849d59
Remove secretly=... but correctly
matt-kempster Jun 25, 2021
4fdaadc
Remove secretly=... but correctly... again
matt-kempster Jun 25, 2021
25e798f
Remove old wrong comments
matt-kempster Jun 30, 2021
2f9dcd0
Undo the translate.py copy import
matt-kempster Jun 30, 2021
66083c3
Undo all translate.py 'fixes'
matt-kempster Jun 30, 2021
85e9d55
Remove to_basic_node()
matt-kempster Jun 30, 2021
b595ce8
Use zbanks's suggestion in SwitchNode
matt-kempster Jun 30, 2021
ec17645
Move compute_relations() to 'if changed' block
matt-kempster Jun 30, 2021
ef59716
Factor out the matching of nodes logic
matt-kempster Jun 30, 2021
fc7fff9
Implement drop-in replacement for match_nodes
matt-kempster Jun 30, 2021
d63b4af
Delete match_nodes()
matt-kempster Jun 30, 2021
0df65fc
Delete idx_eq()
matt-kempster Jun 30, 2021
c89f8a0
De Morgan's simplification
matt-kempster Jun 30, 2021
f9b25ed
Introduce remove_and_replace_nodes()
matt-kempster Jul 1, 2021
587e770
Leave a note to myself
matt-kempster Jul 1, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion src/flow_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,18 +778,34 @@ class BaseNode(abc.ABC):
# i.e. there is an invariant `(node.loop is None) or (node.loop.head is node)`
loop: Optional["NaturalLoop"] = attr.ib(init=False, default=None)

def to_basic_node(self, successor: "Node") -> "BasicNode":
new_node = BasicNode(self.block, self.emit_goto, successor)
new_node.parents = self.parents
new_node.dominators = self.dominators
new_node.immediate_dominator = self.immediate_dominator
new_node.immediately_dominates = self.immediately_dominates
matt-kempster marked this conversation as resolved.
Show resolved Hide resolved
return new_node

def name(self) -> str:
return str(self.block.index)

@abc.abstractmethod
def children(self) -> List["Node"]:
...

@abc.abstractmethod
def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None:
...


@attr.s(eq=False)
class BasicNode(BaseNode):
successor: "Node" = attr.ib()

def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None:
if self.successor is replace_this:
self.successor = with_this

def children(self) -> List["Node"]:
# TODO: Should we also include the fallthrough node if `emit_goto` is True?
return [self.successor]
Expand All @@ -810,6 +826,12 @@ class ConditionalNode(BaseNode):
conditional_edge: "Node" = attr.ib()
fallthrough_edge: "Node" = attr.ib()

def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None:
if self.conditional_edge is replace_this:
self.conditional_edge = with_this
if self.fallthrough_edge is replace_this:
self.fallthrough_edge = with_this

def children(self) -> List["Node"]:
if self.conditional_edge == self.fallthrough_edge:
return [self.conditional_edge]
Expand Down Expand Up @@ -837,6 +859,9 @@ class ReturnNode(BaseNode):
def children(self) -> List["Node"]:
return [self.terminal]

def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None:
return None

def name(self) -> str:
name = super().name()
return name if self.is_real() else f"{name}.{self.index}"
Expand All @@ -858,6 +883,15 @@ def __str__(self) -> str:
class SwitchNode(BaseNode):
cases: List["Node"] = attr.ib()

def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None:
new_cases: List["Node"] = []
for case in self.cases:
if case is replace_this:
new_cases.append(with_this)
else:
new_cases.append(case)
self.cases = new_cases
matt-kempster marked this conversation as resolved.
Show resolved Hide resolved

def children(self) -> List["Node"]:
# Deduplicate nodes in `self.cases`
seen = set()
Expand Down Expand Up @@ -891,6 +925,9 @@ def terminal() -> "TerminalNode":
def children(self) -> List["Node"]:
return []

def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None:
return None

def __str__(self) -> str:
return "return"

Expand Down Expand Up @@ -1322,7 +1359,10 @@ def is_reducible(self) -> bool:
for s in n.children():
if s.loop is not None and n in s.loop.backedges:
continue
incoming_edges[s].remove(n)
try:
incoming_edges[s].remove(n)
except KeyError:
break
Comment on lines 1347 to +1353
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At first glance, I think the break should be continue (if it's OK to have missing edges) or return False (if it's not OK) -- otherwise, right now the behavior seems dependent on the order of the children in the list?

I think the fix here might actually be to iterate over set(children) instead. If n isn't in incoming_edges[s], then I think that means s.parents was not constructed correctly? (Sorry that this might be a pre-existing bug)

            for s in set(n.children()):
                if s.loop is not None and n in s.loop.backedges:
                    continue
                incoming_edges[s].remove(n)

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does that mean n.children() might have repeats? That makes sense actually, and is probably why my extremely old version of this PR didn't need this fix (I changed children() to a Set.)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh wait, I'm wrong, my bad!

n.children() shouldn't have repeats: the implementations for that fn in SwitchNode & ConditionalNode explicitly deduplicate the children.

I can take a look and figure out what's going on here.

if not incoming_edges[s]:
queue.add(s)

Expand Down
116 changes: 116 additions & 0 deletions src/loop_rerolling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
from typing import List

from .flow_graph import (
BasicNode,
ConditionalNode,
FlowGraph,
Node,
compute_relations,
)
from .parse_instruction import Instruction


def replace_node_references(
flow_graph: FlowGraph, replace_this: Node, with_this: Node
) -> None:
for node_to_modify in flow_graph.nodes:
node_to_modify.replace_any_children(replace_this, with_this)


def remove_node(flow_graph: FlowGraph, to_delete: Node, new_child: Node) -> None:
flow_graph.nodes.remove(to_delete)
replace_node_references(flow_graph, to_delete, new_child)


def replace_node(flow_graph: FlowGraph, replace_this: Node, with_this: Node) -> None:
replacement_index = flow_graph.nodes.index(replace_this)
flow_graph.nodes[replacement_index] = with_this
replace_node_references(flow_graph, replace_this, with_this)


def reroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a comment with the structure/asm we're trying to pattern-match here (or refer to a test?).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'd be nice to include a dot representation of what's going on:

    digraph before {
        start -> node_1 [label=F, color=blue];
        start -> node_7 [label=T, color=red];
        node_1 -> node_2 [label=F, color=blue];
        node_1 -> node_5 [label=T, color=red];
        node_2 -> node_3;
        node_3 -> node_4 [label=F, color=blue];
        node_3 -> node_3 [label=T, color=red];
        node_4 -> node_5 [label=F, color=blue];
        node_4 -> node_7 [label=T, color=red];
        node_5 -> node_6;
        node_6 -> node_7 [label=F, color=blue];
        node_6 -> node_6 [label=T, color=red];
    }

    digraph after {
        start -> new_node_1 [label=F, color=blue];
        start -> node_7 [label=T, color=red];
        new_node_1 -> node_2 [label=F, color=blue];
        new_node_1 -> node_7 [label=T, color=red];
        node_2 -> node_3;
        node_3 -> node_7 [label=F, color=blue];
        node_3 -> node_3 [label=T, color=red];
    }

And maybe a link to an online render, like https://bit.ly/3A2MEJt

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is actually nearly exactly what the new reroll.py does -- stay tuned

node_1 = start.fallthrough_edge
node_7 = start.conditional_edge

if not isinstance(node_1, ConditionalNode):
return False
node_2 = node_1.fallthrough_edge
node_5 = node_1.conditional_edge

if not isinstance(node_2, BasicNode):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should also check that these nodes have the parents we expect. I feel like this pass has a large potential for misdetection and changing the asm into something non-equivalent, so we should be as careful as we can in the pattern-matching.

return False
node_3 = node_2.successor

if not (
isinstance(node_3, ConditionalNode)
and node_3.loop
and node_3.conditional_edge is node_3
):
return False
node_4 = node_3.fallthrough_edge

if not (
isinstance(node_4, ConditionalNode)
and node_4.fallthrough_edge is node_5
and node_4.conditional_edge is node_7
):
return False

if not isinstance(node_5, BasicNode):
return False
node_6 = node_5.successor

if not (
isinstance(node_6, ConditionalNode)
and node_6.loop
and node_6.conditional_edge is node_6
and node_6.fallthrough_edge is node_7
):
return False

def modify_node_1_instructions(instructions: List[Instruction]) -> bool:
# First, we check that the node has the instructions we
# think it has.
branches = [instr for instr in instructions if instr.is_branch_instruction()]
if len(branches) != 1:
return False
andi_instrs = [instr for instr in instructions if instr.mnemonic == "andi"]
if len(andi_instrs) != 1:
return False
# We are now free to modify the instructions, as we have verified
# that this node fits the criteria.
instructions.remove(branches[0])
andi = andi_instrs[0]
move = Instruction.derived("move", [andi.args[0], andi.args[1]], andi)
instructions[instructions.index(andi)] = move
return True

if not modify_node_1_instructions(node_1.block.instructions):
return False

new_node_1 = node_1.to_basic_node(
successor=node_2 # node_2 doesn't know it's a parent yet
)
replace_node(flow_graph, node_1, new_node_1) # now it does
matt-kempster marked this conversation as resolved.
Show resolved Hide resolved
remove_node(flow_graph, node_4, node_7)
remove_node(flow_graph, node_5, node_7)
remove_node(flow_graph, node_6, node_7) # TODO: assert didn't execute anything?.

return True


def reroll_loops(flow_graph: FlowGraph) -> FlowGraph:
# TODO: What if knocking out nodes reveals another set of nodes
# that look identical? We will incorrectly be merging two
# adjacent for-loops.
matt-kempster marked this conversation as resolved.
Show resolved Hide resolved
changed: bool = True
while changed:
changed = False
for node in flow_graph.nodes:
if not isinstance(node, ConditionalNode):
continue
changed = reroll_loop(flow_graph, node)
if changed:
break
compute_relations(flow_graph.nodes)
matt-kempster marked this conversation as resolved.
Show resolved Hide resolved
return flow_graph
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function modifies flow_graph in-place, so I don't think it needs to return anything.

17 changes: 15 additions & 2 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

from .c_types import TypeMap, build_typemap, dump_typemap
from .error import DecompFailure
from .flow_graph import visualize_flowgraph
from .flow_graph import FlowGraph, build_flowgraph, visualize_flowgraph
from .loop_rerolling import reroll_loops
from .if_statements import get_function_text
from .options import CodingStyle, Options
from .parse_file import MIPSFile, parse_file
Expand Down Expand Up @@ -94,7 +95,12 @@ def run(options: Options) -> int:
function_infos: List[Union[FunctionInfo, Exception]] = []
for function in functions:
try:
info = translate_to_ast(function, options, global_info)
flowgraph: FlowGraph = build_flowgraph(function, mips_file.asm_data)

if options.loop_rerolling:
flowgraph = reroll_loops(flowgraph)

info = translate_to_ast(function, flowgraph, options, global_info)
function_infos.append(info)
except Exception as e:
# Store the exception for later, to preserve the order in the output
Expand Down Expand Up @@ -174,6 +180,12 @@ def parse_flags(flags: List[str]) -> Options:
help="disable detection of &&/||",
action="store_false",
)
parser.add_argument(
"--no-reroll",
dest="loop_rerolling",
help="disable detection and fixing of unrolled loops",
action="store_false",
)
parser.add_argument(
"--no-casts",
dest="skip_casts",
Expand Down Expand Up @@ -304,6 +316,7 @@ def parse_flags(flags: List[str]) -> Options:
void=args.void,
ifs=args.ifs,
andor_detection=args.andor_detection,
loop_rerolling=args.loop_rerolling,
skip_casts=args.skip_casts,
reg_vars=reg_vars,
goto_patterns=args.goto_patterns,
Expand Down
1 change: 1 addition & 0 deletions src/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Options:
void: bool = attr.ib()
ifs: bool = attr.ib()
andor_detection: bool = attr.ib()
loop_rerolling: bool = attr.ib()
skip_casts: bool = attr.ib()
reg_vars: List[str] = attr.ib()
goto_patterns: List[str] = attr.ib()
Expand Down
21 changes: 15 additions & 6 deletions src/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys
import traceback
from contextlib import contextmanager
from copy import copy
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple, Union

import attr
Expand Down Expand Up @@ -2981,7 +2982,7 @@ def regs_clobbered_until_dominator(
if node.immediate_dominator is None:
return set()
seen = {node.immediate_dominator}
stack = node.parents[:]
stack = copy(node.parents)
clobbered = set()
while stack:
n = stack.pop()
Expand All @@ -3003,7 +3004,7 @@ def reg_always_set(
if node.immediate_dominator is None:
return False
seen = {node.immediate_dominator}
stack = node.parents[:]
stack = copy(node.parents)
while stack:
n = stack.pop()
if n == node.immediate_dominator and not dom_set:
Expand Down Expand Up @@ -3032,11 +3033,17 @@ def assign_phis(used_phis: List[PhiExpr], stack_info: StackInfo) -> None:
while i < len(used_phis):
phi = used_phis[i]
assert phi.num_usages > 0
assert len(phi.node.parents) >= 2
# assert len(phi.node.parents) >= 2
if len(phi.node.parents) < 2:
i += 1
continue
matt-kempster marked this conversation as resolved.
Show resolved Hide resolved
exprs = []
for node in phi.node.parents:
block_info = node.block.block_info
assert isinstance(block_info, BlockInfo)
# assert isinstance(block_info, BlockInfo)
if not block_info:
i += 1
continue
matt-kempster marked this conversation as resolved.
Show resolved Hide resolved
exprs.append(block_info.final_register_states[phi.reg])

first_uw = early_unwrap(exprs[0])
Expand Down Expand Up @@ -3094,7 +3101,9 @@ def compute_has_custom_return(nodes: List[Node]) -> None:
continue
for p in n.parents:
block_info2 = p.block.block_info
assert isinstance(block_info2, BlockInfo)
# assert isinstance(block_info2, BlockInfo)
if not block_info2:
continue
if block_info2.has_custom_return:
block_info.has_custom_return = True
changed = True
Expand Down Expand Up @@ -3877,6 +3886,7 @@ class FunctionInfo:

def translate_to_ast(
function: Function,
flow_graph: FlowGraph,
options: Options,
global_info: GlobalInfo,
) -> FunctionInfo:
Expand All @@ -3886,7 +3896,6 @@ def translate_to_ast(
branch condition.
"""
# Initialize info about the function.
flow_graph: FlowGraph = build_flowgraph(function, global_info.asm_data)
stack_info = get_stack_info(function, global_info, flow_graph)
typemap = global_info.typemap

Expand Down
1 change: 1 addition & 0 deletions tests/end_to_end/loop/irix-o2-no-reroll-flags.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--no-reroll
49 changes: 49 additions & 0 deletions tests/end_to_end/loop/irix-o2-no-reroll-out.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
s32 test(s8 *arg0, s32 arg1) {
s32 temp_a3;
s32 temp_v0;
s32 temp_v0_2;
void *temp_v1;
s8 *phi_v1;
s32 phi_v0;
void *phi_v1_2;
s32 phi_v0_2;
s32 phi_return;
s32 phi_v0_3;

phi_return = 0;
if (arg1 > 0) {
temp_a3 = arg1 & 3;
phi_v0_3 = 0;
if (temp_a3 != 0) {
phi_v1 = arg0;
phi_v0 = 0;
do {
temp_v0 = phi_v0 + 1;
*phi_v1 = (u8)0;
phi_v1 += 1;
phi_v0 = temp_v0;
} while (temp_a3 != temp_v0);
phi_return = temp_v0;
phi_v0_3 = temp_v0;
if (temp_v0 != arg1) {
block_5:
phi_v1_2 = arg0 + phi_v0_3;
phi_v0_2 = phi_v0_3;
do {
temp_v0_2 = phi_v0_2 + 4;
phi_v1_2->unk1 = (u8)0;
phi_v1_2->unk2 = (u8)0;
phi_v1_2->unk3 = (u8)0;
temp_v1 = phi_v1_2 + 4;
temp_v1->unk-4 = (u8)0;
phi_v1_2 = temp_v1;
phi_v0_2 = temp_v0_2;
phi_return = temp_v0_2;
} while (temp_v0_2 != arg1);
}
} else {
goto block_5;
}
}
return phi_return;
}
Loading