Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions Include/internal/pycore_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,15 @@ typedef struct _PyJitTracerInitialState {
_Py_CODEUNIT *jump_backward_instr;
} _PyJitTracerInitialState;

#define MAX_RECORDED_VALUES 3
typedef struct _PyJitTracerPreviousState {
int instr_oparg;
int instr_stacklevel;
_Py_CODEUNIT *instr;
PyCodeObject *instr_code; // Strong
struct _PyInterpreterFrame *instr_frame;
PyObject *recorded_value; // Strong, may be NULL
PyObject *recorded_values[MAX_RECORDED_VALUES]; // Strong, may be NULL
int recorded_count;
} _PyJitTracerPreviousState;

typedef struct _PyJitTracerTranslatorState {
Expand Down Expand Up @@ -481,7 +483,12 @@ void _PyJit_TracerFree(_PyThreadStateImpl *_tstate);
#ifdef _Py_TIER2
typedef void (*_Py_RecordFuncPtr)(_PyInterpreterFrame *frame, _PyStackRef *stackpointer, int oparg, PyObject **recorded_value);
PyAPI_DATA(const _Py_RecordFuncPtr) _PyOpcode_RecordFunctions[];
PyAPI_DATA(const uint8_t) _PyOpcode_RecordFunctionIndices[256];

typedef struct {
uint8_t count;
uint8_t indices[MAX_RECORDED_VALUES];
} _PyOpcodeRecordEntry;
PyAPI_DATA(const _PyOpcodeRecordEntry) _PyOpcode_RecordEntries[256];
#endif

#ifdef __cplusplus
Expand Down
199 changes: 198 additions & 1 deletion Lib/test/test_generated_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,13 @@ def skip_if_different_mount_drives():

test_tools.skip_if_missing("cases_generator")
with test_tools.imports_under_tool("cases_generator"):
from analyzer import StackItem
from analyzer import StackItem, analyze_files
from cwriter import CWriter
import parser
from stack import Local, Stack
import tier1_generator
import optimizer_generator
import record_function_generator


def handle_stderr():
Expand Down Expand Up @@ -1948,6 +1949,202 @@ def test_recording_after_non_specializing(self):
with self.assertRaisesRegex(SyntaxError, "Recording uop"):
self.run_cases_test(input, "")

def test_multiple_consecutive_recording_uops(self):
"""Multiple consecutive recording uops at the start of a macro are legal."""
input = """
tier2 op(_RECORD_A, (a, b -- a, b)) {
RECORD_VALUE(a);
}
tier2 op(_RECORD_B, (a, b -- a, b)) {
RECORD_VALUE(b);
}
op(_DO_STUFF, (a, b -- res)) {
res = a;
INPUTS_DEAD();
}
macro(OP) = _RECORD_A + _RECORD_B + _DO_STUFF;
"""
output = """
TARGET(OP) {
#if _Py_TAIL_CALL_INTERP
int opcode = OP;
(void)(opcode);
#endif
frame->instr_ptr = next_instr;
next_instr += 1;
INSTRUCTION_STATS(OP);
_PyStackRef a;
_PyStackRef res;
// _DO_STUFF
{
a = stack_pointer[-2];
res = a;
}
stack_pointer[-2] = res;
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
DISPATCH();
}
"""
self.run_cases_test(input, output)

def test_multiple_recording_uops_after_specializing(self):
"""Multiple recording uops after a specializing uop are legal."""
input = """
specializing op(_SPECIALIZE_OP, (counter/1, a, b -- a, b)) {
SPAM();
}
tier2 op(_RECORD_A, (a, b -- a, b)) {
RECORD_VALUE(a);
}
tier2 op(_RECORD_B, (a, b -- a, b)) {
RECORD_VALUE(b);
}
op(_DO_STUFF, (a, b -- res)) {
res = a;
INPUTS_DEAD();
}
macro(OP) = _SPECIALIZE_OP + _RECORD_A + _RECORD_B + unused/2 + _DO_STUFF;
"""
output = """
TARGET(OP) {
#if _Py_TAIL_CALL_INTERP
int opcode = OP;
(void)(opcode);
#endif
_Py_CODEUNIT* const this_instr = next_instr;
(void)this_instr;
frame->instr_ptr = next_instr;
next_instr += 4;
INSTRUCTION_STATS(OP);
_PyStackRef a;
_PyStackRef res;
// _SPECIALIZE_OP
{
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
SPAM();
}
/* Skip 2 cache entries */
// _DO_STUFF
{
a = stack_pointer[-2];
res = a;
}
stack_pointer[-2] = res;
stack_pointer += -1;
ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__);
DISPATCH();
}
"""
self.run_cases_test(input, output)

def test_recording_uop_between_real_uops_rejected(self):
"""A recording uop sandwiched between real uops is rejected."""
input = """
tier2 op(_RECORD_A, (a, b -- a, b)) {
RECORD_VALUE(a);
}
op(_FIRST, (a, b -- a, b)) {
first(a);
}
tier2 op(_RECORD_B, (a, b -- a, b)) {
RECORD_VALUE(b);
}
macro(OP) = _RECORD_A + _FIRST + _RECORD_B;
"""
with self.assertRaisesRegex(SyntaxError,
"must precede all "
"non-recording, non-specializing uops"):
self.run_cases_test(input, "")


class TestRecorderTableGeneration(unittest.TestCase):

def setUp(self) -> None:
super().setUp()
self.maxDiff = None
self.temp_dir = tempfile.gettempdir()
self.temp_input_filename = os.path.join(self.temp_dir, "input.txt")

def tearDown(self) -> None:
try:
os.remove(self.temp_input_filename)
except FileNotFoundError:
pass
super().tearDown()

def generate_tables(self, input: str) -> str:
import io
with open(self.temp_input_filename, "w+") as f:
f.write(parser.BEGIN_MARKER)
f.write(input)
f.write(parser.END_MARKER)
with handle_stderr():
analysis = analyze_files([self.temp_input_filename])
buf = io.StringIO()
out = CWriter(buf, 0, False)
record_function_generator.generate_recorder_tables(analysis, out)
return buf.getvalue()

def test_single_recording_uop_generates_count(self):
input = """
tier2 op(_RECORD_TOS, (value -- value)) {
RECORD_VALUE(value);
}
op(_DO_STUFF, (value -- res)) {
res = value;
}
macro(OP) = _RECORD_TOS + _DO_STUFF;
"""
output = self.generate_tables(input)
self.assertIn("_RECORD_TOS_INDEX", output)
self.assertIn("[OP] = {1, {_RECORD_TOS_INDEX}}", output)

def test_three_recording_uops_generate_count_3_in_order(self):
input = """
tier2 op(_RECORD_X, (a, b, c -- a, b, c)) {
RECORD_VALUE(a);
}
tier2 op(_RECORD_Y, (a, b, c -- a, b, c)) {
RECORD_VALUE(b);
}
tier2 op(_RECORD_Z, (a, b, c -- a, b, c)) {
RECORD_VALUE(c);
}
op(_DO_STUFF, (a, b, c -- res)) {
res = a;
}
macro(OP) = _RECORD_X + _RECORD_Y + _RECORD_Z + _DO_STUFF;
"""
output = self.generate_tables(input)
self.assertIn(
"[OP] = {3, {_RECORD_X_INDEX, _RECORD_Y_INDEX, _RECORD_Z_INDEX}}",
output,
)

def test_four_recording_uops_rejected(self):
input = """
tier2 op(_RECORD_A, (a, b, c, d -- a, b, c, d)) {
RECORD_VALUE(a);
}
tier2 op(_RECORD_B, (a, b, c, d -- a, b, c, d)) {
RECORD_VALUE(b);
}
tier2 op(_RECORD_C, (a, b, c, d -- a, b, c, d)) {
RECORD_VALUE(c);
}
tier2 op(_RECORD_D, (a, b, c, d -- a, b, c, d)) {
RECORD_VALUE(d);
}
op(_DO_STUFF, (a, b, c, d -- res)) {
res = a;
}
macro(OP) = _RECORD_A + _RECORD_B + _RECORD_C + _RECORD_D + _DO_STUFF;
"""
with self.assertRaisesRegex(ValueError, "exceeds MAX_RECORDED_VALUES"):
self.generate_tables(input)


class TestGeneratedAbstractCases(unittest.TestCase):
def setUp(self) -> None:
Expand Down
18 changes: 11 additions & 7 deletions Modules/_testinternalcapi/test_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 9 additions & 5 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -6349,7 +6349,10 @@ dummy_func(
ERROR_IF(err < 0);
DISPATCH();
}
Py_CLEAR(tracer->prev_state.recorded_value);
for (int i = 0; i < tracer->prev_state.recorded_count; i++) {
Py_CLEAR(tracer->prev_state.recorded_values[i]);
}
tracer->prev_state.recorded_count = 0;
tracer->prev_state.instr = next_instr;
PyObject *prev_code = PyStackRef_AsPyObjectBorrow(frame->f_executable);
if (tracer->prev_state.instr_code != (PyCodeObject *)prev_code) {
Expand All @@ -6363,11 +6366,12 @@ dummy_func(
(&next_instr[1])->counter = trigger_backoff_counter();
}

uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[opcode];
if (record_func_index) {
_Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_func_index];
doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value);
const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[opcode];
for (int i = 0; i < record_entry->count; i++) {
_Py_RecordFuncPtr doesnt_escape = _PyOpcode_RecordFunctions[record_entry->indices[i]];
doesnt_escape(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]);
}
tracer->prev_state.recorded_count = record_entry->count;
DISPATCH_GOTO_NON_TRACING();
#else
(void)prev_instr;
Expand Down
18 changes: 11 additions & 7 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 17 additions & 8 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,7 @@ _PyJit_translate_single_bytecode_to_trace(
assert(nuops > 0);
uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM
uint32_t orig_target = target;
int record_idx = 0;
for (int i = 0; i < nuops; i++) {
oparg = orig_oparg;
target = orig_target;
Expand Down Expand Up @@ -946,8 +947,9 @@ _PyJit_translate_single_bytecode_to_trace(
operand = next->op.arg;
}
else if (_PyUop_Flags[uop] & HAS_RECORDS_VALUE_FLAG) {
PyObject *recorded_value = tracer->prev_state.recorded_value;
tracer->prev_state.recorded_value = NULL;
PyObject *recorded_value = tracer->prev_state.recorded_values[record_idx];
tracer->prev_state.recorded_values[record_idx] = NULL;
record_idx++;
operand = (uintptr_t)recorded_value;
}
// All other instructions
Expand Down Expand Up @@ -1060,12 +1062,16 @@ _PyJit_TryInitializeTracing(
tracer->prev_state.instr_frame = frame;
tracer->prev_state.instr_oparg = oparg;
tracer->prev_state.instr_stacklevel = tracer->initial_state.stack_depth;
tracer->prev_state.recorded_value = NULL;
uint8_t record_func_index = _PyOpcode_RecordFunctionIndices[curr_instr->op.code];
if (record_func_index) {
_Py_RecordFuncPtr record_func = _PyOpcode_RecordFunctions[record_func_index];
record_func(frame, stack_pointer, oparg, &tracer->prev_state.recorded_value);
tracer->prev_state.recorded_count = 0;
for (int i = 0; i < MAX_RECORDED_VALUES; i++) {
tracer->prev_state.recorded_values[i] = NULL;
}
const _PyOpcodeRecordEntry *record_entry = &_PyOpcode_RecordEntries[curr_instr->op.code];
for (int i = 0; i < record_entry->count; i++) {
_Py_RecordFuncPtr record_func = _PyOpcode_RecordFunctions[record_entry->indices[i]];
record_func(frame, stack_pointer, oparg, &tracer->prev_state.recorded_values[i]);
}
tracer->prev_state.recorded_count = record_entry->count;
assert(curr_instr->op.code == JUMP_BACKWARD_JIT || curr_instr->op.code == RESUME_CHECK_JIT || (exit != NULL));
tracer->initial_state.jump_backward_instr = curr_instr;

Expand Down Expand Up @@ -1117,7 +1123,10 @@ _PyJit_FinalizeTracing(PyThreadState *tstate, int err)
Py_CLEAR(tracer->initial_state.func);
Py_CLEAR(tracer->initial_state.executor);
Py_CLEAR(tracer->prev_state.instr_code);
Py_CLEAR(tracer->prev_state.recorded_value);
for (int i = 0; i < MAX_RECORDED_VALUES; i++) {
Py_CLEAR(tracer->prev_state.recorded_values[i]);
}
tracer->prev_state.recorded_count = 0;
uop_buffer_init(buffer, &tracer->uop_array[0], UOP_MAX_TRACE_LENGTH);
tracer->is_tracing = false;
}
Expand Down
Loading
Loading