-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcache.h
233 lines (200 loc) · 7.85 KB
/
cache.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#ifndef CACHE_H
#define CACHE_H
#include "memory_class.h"
// PAGE
extern uint32_t PAGE_TABLE_LATENCY, SWAP_LATENCY;
// CACHE TYPE
#define IS_ITLB 0
#define IS_DTLB 1
#define IS_STLB 2
#define IS_L1I 3
#define IS_L1D 4
#define IS_L2C 5
#define IS_LLC 6
// INSTRUCTION TLB
#define ITLB_SET 16
#define ITLB_WAY 4
#define ITLB_RQ_SIZE 16
#define ITLB_WQ_SIZE 16
#define ITLB_PQ_SIZE 0
#define ITLB_MSHR_SIZE 8
#define ITLB_LATENCY 1
// DATA TLB
#define DTLB_SET 16
#define DTLB_WAY 4
#define DTLB_RQ_SIZE 16
#define DTLB_WQ_SIZE 16
#define DTLB_PQ_SIZE 0
#define DTLB_MSHR_SIZE 8
#define DTLB_LATENCY 1
// SECOND LEVEL TLB
#define STLB_SET 128
#define STLB_WAY 12
#define STLB_RQ_SIZE 32
#define STLB_WQ_SIZE 32
#define STLB_PQ_SIZE 0
#define STLB_MSHR_SIZE 16
#define STLB_LATENCY 8
// L1 INSTRUCTION CACHE
#define L1I_SET 64
#define L1I_WAY 8
#define L1I_RQ_SIZE 64
#define L1I_WQ_SIZE 64
#define L1I_PQ_SIZE 32
#define L1I_MSHR_SIZE 8
#define L1I_LATENCY 4
// L1 DATA CACHE
#define L1D_SET 64
#define L1D_WAY 12
#define L1D_RQ_SIZE 64
#define L1D_WQ_SIZE 64
#define L1D_PQ_SIZE 8
#define L1D_MSHR_SIZE 16
#define L1D_LATENCY 5
// L2 CACHE
#define L2C_SET 1024
#define L2C_WAY 8
#define L2C_RQ_SIZE 32
#define L2C_WQ_SIZE 32
#define L2C_PQ_SIZE 16
#define L2C_MSHR_SIZE 32
#define L2C_LATENCY 10 // 4/5 (L1I or L1D) + 10 = 14/15 cycles
// LAST LEVEL CACHE
#define LLC_SET NUM_CPUS*2048
#define LLC_WAY 16
#define LLC_RQ_SIZE NUM_CPUS*L2C_MSHR_SIZE //48
#define LLC_WQ_SIZE NUM_CPUS*L2C_MSHR_SIZE //48
#define LLC_PQ_SIZE NUM_CPUS*32
#define LLC_MSHR_SIZE NUM_CPUS*64
#define LLC_LATENCY 20 // 4/5 (L1I or L1D) + 10 + 20 = 34/35 cycles
class CACHE : public MEMORY {
public:
uint32_t cpu;
const string NAME;
const uint32_t NUM_SET, NUM_WAY, NUM_LINE, WQ_SIZE, RQ_SIZE, PQ_SIZE, MSHR_SIZE;
uint32_t LATENCY;
BLOCK **block;
int fill_level;
uint32_t MAX_READ, MAX_FILL;
uint32_t reads_available_this_cycle;
uint8_t cache_type;
// prefetch stats
uint64_t pf_requested,
pf_issued,
pf_useful,
pf_useless,
pf_fill;
// queues
PACKET_QUEUE WQ{NAME + "_WQ", WQ_SIZE}, // write queue
RQ{NAME + "_RQ", RQ_SIZE}, // read queue
PQ{NAME + "_PQ", PQ_SIZE}, // prefetch queue
MSHR{NAME + "_MSHR", MSHR_SIZE}, // MSHR
PROCESSED{NAME + "_PROCESSED", ROB_SIZE}; // processed queue
uint64_t sim_access[NUM_CPUS][NUM_TYPES],
sim_hit[NUM_CPUS][NUM_TYPES],
sim_miss[NUM_CPUS][NUM_TYPES],
roi_access[NUM_CPUS][NUM_TYPES],
roi_hit[NUM_CPUS][NUM_TYPES],
roi_miss[NUM_CPUS][NUM_TYPES];
uint64_t total_miss_latency;
// constructor
CACHE(string v1, uint32_t v2, int v3, uint32_t v4, uint32_t v5, uint32_t v6, uint32_t v7, uint32_t v8)
: NAME(v1), NUM_SET(v2), NUM_WAY(v3), NUM_LINE(v4), WQ_SIZE(v5), RQ_SIZE(v6), PQ_SIZE(v7), MSHR_SIZE(v8) {
LATENCY = 0;
// cache block
block = new BLOCK* [NUM_SET];
for (uint32_t i=0; i<NUM_SET; i++) {
block[i] = new BLOCK[NUM_WAY];
for (uint32_t j=0; j<NUM_WAY; j++) {
block[i][j].lru = j;
}
}
for (uint32_t i=0; i<NUM_CPUS; i++) {
upper_level_icache[i] = NULL;
upper_level_dcache[i] = NULL;
for (uint32_t j=0; j<NUM_TYPES; j++) {
sim_access[i][j] = 0;
sim_hit[i][j] = 0;
sim_miss[i][j] = 0;
roi_access[i][j] = 0;
roi_hit[i][j] = 0;
roi_miss[i][j] = 0;
}
}
total_miss_latency = 0;
lower_level = NULL;
extra_interface = NULL;
fill_level = -1;
MAX_READ = 1;
MAX_FILL = 1;
pf_requested = 0;
pf_issued = 0;
pf_useful = 0;
pf_useless = 0;
pf_fill = 0;
};
// destructor
~CACHE() {
for (uint32_t i=0; i<NUM_SET; i++)
delete[] block[i];
delete[] block;
};
// functions
int add_rq(PACKET *packet),
add_wq(PACKET *packet),
add_pq(PACKET *packet);
void return_data(PACKET *packet),
operate(),
increment_WQ_FULL(uint64_t address);
uint32_t get_occupancy(uint8_t queue_type, uint64_t address),
get_size(uint8_t queue_type, uint64_t address);
int check_hit(PACKET *packet),
invalidate_entry(uint64_t inval_addr),
check_mshr(PACKET *packet),
prefetch_line(uint64_t ip, uint64_t base_addr, uint64_t pf_addr, int prefetch_fill_level, uint32_t prefetch_metadata),
kpc_prefetch_line(uint64_t base_addr, uint64_t pf_addr, int prefetch_fill_level, int delta, int depth, int signature, int confidence, uint32_t prefetch_metadata);
void handle_fill(),
handle_writeback(),
handle_read(),
handle_prefetch();
void add_mshr(PACKET *packet),
update_fill_cycle(),
llc_initialize_replacement(),
update_replacement_state(uint32_t cpu, uint32_t set, uint32_t way, uint64_t full_addr, uint64_t ip, uint64_t victim_addr, uint32_t type, uint8_t hit),
llc_update_replacement_state(uint32_t cpu, uint32_t set, uint32_t way, uint64_t full_addr, uint64_t ip, uint64_t victim_addr, uint32_t type, uint8_t hit),
lru_update(uint32_t set, uint32_t way),
fill_cache(uint32_t set, uint32_t way, PACKET *packet),
replacement_final_stats(),
llc_replacement_final_stats(),
//prefetcher_initialize(),
l1d_prefetcher_initialize(),
l2c_prefetcher_initialize(),
llc_prefetcher_initialize(),
prefetcher_operate(uint64_t addr, uint64_t ip, uint8_t cache_hit, uint8_t type),
l1d_prefetcher_operate(uint64_t addr, uint64_t ip, uint8_t cache_hit, uint8_t type),
prefetcher_cache_fill(uint64_t addr, uint32_t set, uint32_t way, uint8_t prefetch, uint64_t evicted_addr),
l1d_prefetcher_cache_fill(uint64_t addr, uint32_t set, uint32_t way, uint8_t prefetch, uint64_t evicted_addr, uint32_t metadata_in),
//prefetcher_final_stats(),
l1d_prefetcher_final_stats(),
l2c_prefetcher_final_stats(),
llc_prefetcher_final_stats();
void (*l1i_prefetcher_cache_operate)(uint32_t, uint64_t, uint8_t, uint8_t);
void (*l1i_prefetcher_cache_fill)(uint32_t, uint64_t, uint32_t, uint32_t, uint8_t, uint64_t);
uint32_t l2c_prefetcher_operate(uint64_t addr, uint64_t ip, uint8_t cache_hit, uint8_t type, uint32_t metadata_in),
llc_prefetcher_operate(uint64_t addr, uint64_t ip, uint8_t cache_hit, uint8_t type, uint32_t metadata_in),
l2c_prefetcher_cache_fill(uint64_t addr, uint32_t set, uint32_t way, uint8_t prefetch, uint64_t evicted_addr, uint32_t metadata_in),
llc_prefetcher_cache_fill(uint64_t addr, uint32_t set, uint32_t way, uint8_t prefetch, uint64_t evicted_addr, uint32_t metadata_in);
uint32_t get_set(uint64_t address),
get_way(uint64_t address, uint32_t set),
find_victim(uint32_t cpu, uint64_t instr_id, uint32_t set, const BLOCK *current_set, uint64_t ip, uint64_t full_addr, uint32_t type),
llc_find_victim(uint32_t cpu, uint64_t instr_id, uint32_t set, const BLOCK *current_set, uint64_t ip, uint64_t full_addr, uint32_t type),
lru_victim(uint32_t cpu, uint64_t instr_id, uint32_t set, const BLOCK *current_set, uint64_t ip, uint64_t full_addr, uint32_t type);
// Funciton declarations of functions related to UMON
void UMON(uint32_t cpu,uint64_t full_addr,uint32_t set),
UMON_update(uint32_t cpu, uint32_t set_index, uint32_t way),
make_partition(),
ucp_update(uint32_t set,uint32_t way,uint32_t cpu);
uint32_t UMON_find_victim(uint32_t cpu, uint32_t set_index);
uint64_t get_utility(uint32_t cpu, uint32_t a);
};
#endif