第一次没过破防了决定直接一部到胃一起写必做一和选做一
1. 概述
- 根据指导书可知,我们需要实现一级高速缓存和二级告诉缓存,其中l2会多一个脏标签的处理
- 公式:C=B* E *S (E缓存块 B字节 S组 C总大小)
- l1: E=8 B=64 C=64*1024 S=128
- l2: E=8 B=64 C=4*1024 *1024 S=4096
- Address:
- tag: t bits
- set index: s bits
- block offset: b bits
- l1: b=6 s=7
- l2: b=6 s=12
2. 实现
- 定义高速缓存的结构(nemu/include/memory/cache.h)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
| #include <stdint.h>
#define CACHE_b 6 #define CACHE_L1_e 3 #define CACHE_L1_s 7 #define CACHE_L2_e 4 #define CACHE_L2_s 12 #define CACHE_L1_CAP (64 * 1024) #define CACHE_L2_CAP (4 * 1024 * 1024)
#define CACHE_B (1 << CACHE_b) #define CACHE_L1_E (1 << CACHE_L1_e) #define CACHE_L1_S (1 << CACHE_L1_s) #define CACHE_L2_E (1 << CACHE_L2_e) #define CACHE_L2_S (1 << CACHE_L2_s)
typedef struct{ uint8_t data[CACHE_B]; uint32_t tag; bool validVal; } L1;
L1 cache_L1[CACHE_L1_S * CACHE_L1_E];
typedef struct{ uint8_t data[CACHE_B]; uint32_t tag; bool validVal; bool dirtyVal; } L2; L2 cache_L2[CACHE_L2_S * CACHE_L2_E];
|
- 实现相关函数(nemu/src/memory/cache.c)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
| void init_cache() { int i; for (i = 0; i < CACHE_L1_S * CACHE_L1_E; i++) { cache_L1[i].validVal = false; } for (i = 0; i < CACHE_L2_S * CACHE_L2_E; i++) { cache_L2[i].dirtyVal = false; cache_L2[i].validVal = false; } return; }
void ddr3_read_me(hwaddr_t addr, void* data);
int read_cache_L1(hwaddr_t addr) { uint32_t set = ((addr >> CACHE_b) & (CACHE_L1_S - 1)); uint32_t tag = (addr >> (CACHE_b + CACHE_L1_s)); int block_i; int set_begin = set * CACHE_L1_E; int set_end = (set + 1) * CACHE_L1_E; for (block_i = set_begin; block_i < set_end; block_i++) if (cache_L1[block_i].validVal && cache_L1[block_i].tag == tag) return block_i; srand(time(0)); int block_i_L2 = read_cache_L2(addr); for (block_i = set_begin; block_i < set_end; block_i++) if (!cache_L1[block_i].validVal) break; if (block_i == set_end) block_i = set_begin + rand() % CACHE_L1_E; memcpy(cache_L1[block_i].data, cache_L2[block_i_L2].data, CACHE_B); cache_L1[block_i].validVal = true; cache_L1[block_i].tag = tag; return block_i; }
void ddr3_write_me(hwaddr_t addr, void* data, uint8_t* mask);
int read_cache_L2(hwaddr_t addr) { uint32_t set = ((addr >> CACHE_b) & (CACHE_L2_S - 1)); uint32_t tag = (addr >> (CACHE_b + CACHE_L2_s)); uint32_t block_start = ((addr >> CACHE_b) << CACHE_b); int block_i; int set_begin = set * CACHE_L2_E; int set_end = (set + 1) * CACHE_L2_E; for (block_i = set_begin; block_i < set_end; block_i++) if (cache_L2[block_i].validVal && cache_L2[block_i].tag == tag) return block_i; srand(time(0)); for (block_i = set_begin; block_i < set_end; block_i++) if (!cache_L2[block_i].validVal) break; if (block_i == set_end) block_i = set_begin + rand() % CACHE_L2_E; int i; if (cache_L2[block_i].validVal && cache_L2[block_i].dirtyVal) { uint8_t tmp[BURST_LEN << 1]; memset(tmp, 1, sizeof(tmp)); uint32_t block_start_x = (cache_L2[block_i].tag << (CACHE_L2_s + CACHE_b)) | (set << CACHE_b); for (i = 0; i < CACHE_B / BURST_LEN; i++) { ddr3_write_me(block_start_x + BURST_LEN * i, cache_L2[block_i].data + BURST_LEN * i, tmp); } } for (i = 0; i < CACHE_B / BURST_LEN; i++) { ddr3_read_me(block_start + BURST_LEN * i, cache_L2[block_i].data + BURST_LEN * i); } cache_L2[block_i].validVal = true; cache_L2[block_i].dirtyVal = false; cache_L2[block_i].tag = tag; return block_i; }
|
- 更改hwaddr_read()和hwaddr_write() (emu/src/memory/memory.c)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| uint32_t hwaddr_read(hwaddr_t addr, size_t len) { int cache_L1_way_1_index = read_cache_L1(addr); uint32_t block_bias = addr & (CACHE_B - 1); uint8_t ret[BURST_LEN << 1]; if (block_bias + len > CACHE_B) { int cache_L1_way_2_index = read_cache_L1(addr + CACHE_B - block_bias); memcpy(ret, cache_L1[cache_L1_way_1_index].data + block_bias, CACHE_B - block_bias); memcpy(ret + CACHE_B - block_bias, cache_L1[cache_L1_way_2_index].data, len - (CACHE_B - block_bias)); } else { memcpy(ret, cache_L1[cache_L1_way_1_index].data + block_bias, len); } int tmp = 0; uint32_t result = unalign_rw(ret + tmp, 4) & (~0u >> ((4 - len) << 3)); return result; } void hwaddr_write(hwaddr_t addr, size_t len, uint32_t data) { write_cache_L1(addr, len, data); }
|
- 由于被多次调用的读写操作函数位于dram.c文件中的,其中有两个函数是用static封装,则需在dram.c文件中进行全局变量的声明(作为接口调用这两个函数)
1 2 3 4 5 6
| void ddr3_read_me(hwaddr_t addr, void* data) { ddr3_read(addr, data) ; } void ddr3_write_me(hwaddr_t addr, void* data, uint8_t* mask) { ddr3_write(addr, data, mask) ; }
|
3. 调试
通过即可