@@ -8,50 +8,10 @@ extern "C" {
8
8
* Specialization and quickening structs and helper functions
9
9
*/
10
10
11
- typedef struct {
12
- int32_t cache_count ;
13
- int32_t _ ; /* Force 8 byte size */
14
- } _PyEntryZero ;
15
-
16
- typedef struct {
17
- uint8_t original_oparg ;
18
- uint8_t counter ;
19
- uint16_t index ;
20
- uint32_t version ;
21
- } _PyAdaptiveEntry ;
22
11
23
- typedef struct {
24
- /* Borrowed ref */
25
- PyObject * obj ;
26
- } _PyObjectCache ;
27
-
28
- typedef struct {
29
- uint32_t func_version ;
30
- uint16_t min_args ;
31
- uint16_t defaults_len ;
32
- } _PyCallCache ;
33
-
34
-
35
- /* Add specialized versions of entries to this union.
36
- *
37
- * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8
38
- * Preserving this invariant is necessary because:
39
- - If any one form uses more space, then all must and on 64 bit machines
40
- this is likely to double the memory consumption of caches
41
- - The function for calculating the offset of caches assumes a 4:1
42
- cache:instruction size ratio. Changing that would need careful
43
- analysis to choose a new function.
44
- */
45
- typedef union {
46
- _PyEntryZero zero ;
47
- _PyAdaptiveEntry adaptive ;
48
- _PyObjectCache obj ;
49
- _PyCallCache call ;
50
- } SpecializedCacheEntry ;
51
-
52
- #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT))
53
-
54
- /* Inline caches */
12
+ // Inline caches. If you change the number of cache entries for an instruction,
13
+ // you must *also* update the number of cache entries in Lib/opcode.py and bump
14
+ // the magic number in Lib/importlib/_bootstrap_external.py!
55
15
56
16
#define CACHE_ENTRIES (cache ) (sizeof(cache)/sizeof(_Py_CODEUNIT))
57
17
@@ -112,73 +72,22 @@ typedef struct {
112
72
113
73
#define INLINE_CACHE_ENTRIES_LOAD_METHOD CACHE_ENTRIES(_PyLoadMethodCache)
114
74
115
- /* Maximum size of code to quicken, in code units. */
116
- #define MAX_SIZE_TO_QUICKEN 5000
117
-
118
- typedef union _cache_or_instruction {
119
- _Py_CODEUNIT code [1 ];
120
- SpecializedCacheEntry entry ;
121
- } SpecializedCacheOrInstruction ;
75
+ typedef struct {
76
+ _Py_CODEUNIT counter ;
77
+ _Py_CODEUNIT func_version [2 ];
78
+ _Py_CODEUNIT min_args ;
79
+ } _PyCallCache ;
122
80
123
- /* Get pointer to the nth cache entry, from the first instruction and n.
124
- * Cache entries are indexed backwards, with [count-1] first in memory, and [0] last.
125
- * The zeroth entry immediately precedes the instructions.
126
- */
127
- static inline SpecializedCacheEntry *
128
- _GetSpecializedCacheEntry (const _Py_CODEUNIT * first_instr , Py_ssize_t n )
129
- {
130
- SpecializedCacheOrInstruction * last_cache_plus_one = (SpecializedCacheOrInstruction * )first_instr ;
131
- assert (& last_cache_plus_one -> code [0 ] == first_instr );
132
- return & last_cache_plus_one [-1 - n ].entry ;
133
- }
81
+ #define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
134
82
135
- /* Following two functions form a pair.
136
- *
137
- * oparg_from_offset_and_index() is used to compute the oparg
138
- * when quickening, so that offset_from_oparg_and_nexti()
139
- * can be used at runtime to compute the offset.
140
- *
141
- * The relationship between the three values is currently
142
- * offset == (index>>1) + oparg
143
- * This relation is chosen based on the following observations:
144
- * 1. typically 1 in 4 instructions need a cache
145
- * 2. instructions that need a cache typically use 2 entries
146
- * These observations imply: offset ≈ index/2
147
- * We use the oparg to fine tune the relation to avoid wasting space
148
- * and allow consecutive instructions to use caches.
149
- *
150
- * If the number of cache entries < number of instructions/2 we will waste
151
- * some small amoount of space.
152
- * If the number of cache entries > (number of instructions/2) + 255, then
153
- * some instructions will not be able to use a cache.
154
- * In practice, we expect some small amount of wasted space in a shorter functions
155
- * and only functions exceeding a 1000 lines or more not to have enugh cache space.
156
- *
157
- */
158
- static inline int
159
- oparg_from_offset_and_nexti (int offset , int nexti )
160
- {
161
- return offset - (nexti >>1 );
162
- }
83
+ typedef struct {
84
+ _Py_CODEUNIT counter ;
85
+ } _PyPrecallCache ;
163
86
164
- static inline int
165
- offset_from_oparg_and_nexti (int oparg , int nexti )
166
- {
167
- return (nexti >>1 )+ oparg ;
168
- }
87
+ #define INLINE_CACHE_ENTRIES_PRECALL CACHE_ENTRIES(_PyPrecallCache)
169
88
170
- /* Get pointer to the cache entry associated with an instruction.
171
- * nexti is the index of the instruction plus one.
172
- * nexti is used as it corresponds to the instruction pointer in the interpreter.
173
- * This doesn't check that an entry has been allocated for that instruction. */
174
- static inline SpecializedCacheEntry *
175
- _GetSpecializedCacheEntryForInstruction (const _Py_CODEUNIT * first_instr , int nexti , int oparg )
176
- {
177
- return _GetSpecializedCacheEntry (
178
- first_instr ,
179
- offset_from_oparg_and_nexti (oparg , nexti )
180
- );
181
- }
89
+ /* Maximum size of code to quicken, in code units. */
90
+ #define MAX_SIZE_TO_QUICKEN 10000
182
91
183
92
#define QUICKENING_WARMUP_DELAY 8
184
93
@@ -205,6 +114,13 @@ _Py_IncrementCountAndMaybeQuicken(PyCodeObject *code)
205
114
206
115
extern Py_ssize_t _Py_QuickenedCount ;
207
116
117
+ // Borrowed references to common callables:
118
+ struct callable_cache {
119
+ PyObject * isinstance ;
120
+ PyObject * len ;
121
+ PyObject * list_append ;
122
+ };
123
+
208
124
/* "Locals plus" for a code object is the set of locals + cell vars +
209
125
* free vars. This relates to variable names as well as offsets into
210
126
* the "fast locals" storage array of execution frames. The compiler
@@ -332,11 +248,6 @@ extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range);
332
248
333
249
#define ADAPTIVE_CACHE_BACKOFF 64
334
250
335
- static inline void
336
- cache_backoff (_PyAdaptiveEntry * entry ) {
337
- entry -> counter = ADAPTIVE_CACHE_BACKOFF ;
338
- }
339
-
340
251
/* Specialization functions */
341
252
342
253
extern int _Py_Specialize_LoadAttr (PyObject * owner , _Py_CODEUNIT * instr ,
@@ -348,10 +259,10 @@ extern int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr,
348
259
PyObject * name );
349
260
extern int _Py_Specialize_BinarySubscr (PyObject * sub , PyObject * container , _Py_CODEUNIT * instr );
350
261
extern int _Py_Specialize_StoreSubscr (PyObject * container , PyObject * sub , _Py_CODEUNIT * instr );
351
- extern int _Py_Specialize_Call (PyObject * callable , _Py_CODEUNIT * instr , int nargs ,
352
- PyObject * kwnames , SpecializedCacheEntry * cache );
353
- extern int _Py_Specialize_Precall (PyObject * callable , _Py_CODEUNIT * instr , int nargs ,
354
- PyObject * kwnames , SpecializedCacheEntry * cache , PyObject * builtins );
262
+ extern int _Py_Specialize_Call (PyObject * callable , _Py_CODEUNIT * instr ,
263
+ int nargs , PyObject * kwnames );
264
+ extern int _Py_Specialize_Precall (PyObject * callable , _Py_CODEUNIT * instr ,
265
+ int nargs , PyObject * kwnames , int oparg );
355
266
extern void _Py_Specialize_BinaryOp (PyObject * lhs , PyObject * rhs , _Py_CODEUNIT * instr ,
356
267
int oparg );
357
268
extern void _Py_Specialize_CompareOp (PyObject * lhs , PyObject * rhs ,
0 commit comments