Skip to content

Commit 211d471

Browse files
committed
Fix s3fifo plugin cache
1 parent a51375c commit 211d471

File tree

7 files changed

+149
-141
lines changed

7 files changed

+149
-141
lines changed

README.md

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,7 @@ Implement custom cache replacement algorithms using pure Python functions - **no
7676

7777
### Python Hook Cache Overview
7878

79-
The `PluginCache` allows you to define custom caching behavior through Python callback functions without without any C/C++ compilation.
80-
81-
### Hook Functions
82-
83-
You need to implement these callback functions:
79+
The `PluginCache` allows you to define custom caching behavior through Python callback functions. You need to implement these callback functions:
8480

8581
| Function | Signature | Description |
8682
|----------|-----------|-------------|

docs/src/en/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,4 @@ We welcome contributions! Please see our [GitHub repository](https://github.com/
6565

6666
## License
6767

68-
This project is licensed under the Apache License 2.0.
68+
This project is licensed under the GPL-3.0 License.

docs/src/zh/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,4 @@ pip install -e .
6565

6666
## 许可证
6767

68-
本项目采用 Apache License 2.0 许可证。
68+
本项目采用 GPL-3.0 许可证。

examples/plugin_cache/lru.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def cache_free_hook(cache):
6262

6363
reader = SyntheticReader(
6464
num_of_req=100000,
65-
num_objects=100,
65+
num_objects=10000,
6666
obj_size=100,
6767
seed=42,
6868
alpha=0.8,

examples/plugin_cache/s3fifo.py

Lines changed: 143 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
# An example of plugin for s3fifo
2+
3+
# NOTE(haocheng): the one shows that with plugin system, we can make cache as lego blocks
4+
# Happy caching!
5+
6+
import libcachesim as lcs
27
from collections import OrderedDict
3-
from libcachesim import PluginCache, CommonCacheParams, Request, S3FIFO, SyntheticReader
8+
from collections import deque
9+
from libcachesim import PluginCache, CommonCacheParams, Request, S3FIFO, FIFO, SyntheticReader
410

511
# NOTE(haocheng): we only support ignore object size for now
612
class StandaloneS3FIFO:
@@ -9,160 +15,157 @@ def __init__(self,
915
ghost_size_ratio: float = 0.9,
1016
move_to_main_threshold: int = 2,
1117
cache_size: int = 1024):
12-
# S3-FIFO uses three queues with OrderedDict for O(1) operations
13-
self.small_fifo = OrderedDict()
14-
self.main_fifo = OrderedDict()
15-
self.ghost_fifo = OrderedDict()
16-
17-
# Size limits
18-
self.small_max_size = int(small_size_ratio * cache_size)
19-
self.main_max_size = int(cache_size - small_size_ratio * cache_size)
20-
self.ghost_max_size = int(ghost_size_ratio * cache_size)
18+
self.cache_size = cache_size
19+
small_fifo_size = int(small_size_ratio * cache_size)
20+
main_fifo_size = cache_size - small_fifo_size
21+
ghost_fifo_size = int(ghost_size_ratio * cache_size)
22+
23+
self.small_set = set()
24+
self.main_set = set()
25+
self.ghost_set = deque(maxlen=ghost_fifo_size)
26+
27+
self.small_fifo = FIFO(small_fifo_size)
28+
self.main_fifo = FIFO(main_fifo_size)
29+
self.ghost_fifo = FIFO(ghost_fifo_size)
2130

2231
# Frequency tracking
23-
self.small_freq = {}
24-
self.main_freq = {}
25-
self.ghost_freq = {}
32+
self.freq = {}
2633

2734
# Other parameters
2835
self.max_freq = 3
2936
self.move_to_main_threshold = move_to_main_threshold
3037

31-
def cache_hit(self, obj_id):
32-
"""
33-
Cache hit can happen in two cases:
34-
1. Small FIFO cache hit (small_fifo)
35-
2. Main FIFO cache hit (main_fifo)
36-
"""
37-
if obj_id in self.main_fifo:
38-
self.main_freq[obj_id] += 1
39-
elif obj_id in self.small_fifo:
40-
self.small_freq[obj_id] += 1
41-
else:
42-
print(f"Cache hit for obj_id {obj_id} but not found in any queue")
43-
print(f"small_fifo: {list(self.small_fifo.keys())}")
44-
print(f"main_fifo: {list(self.main_fifo.keys())}")
45-
print(f"ghost_fifo: {list(self.ghost_fifo.keys())}")
46-
assert False, "Cache hit should happen in small_fifo or main_fifo"
38+
self.has_evicted = False # Mark if we start to evict, only after full we will start eviction
39+
self.hit_on_ghost = False
40+
41+
def cache_hit(self, req: Request):
42+
hit_small = False
43+
hit_main = False
44+
if self.small_fifo.find(req, update_cache=False):
45+
self.freq[req.obj_id] += 1
46+
47+
if self.main_fifo.find(req, update_cache=False):
48+
self.freq[req.obj_id] += 1
4749

48-
def cache_miss(self, obj_id, obj_size=1):
49-
"""
50-
Cache miss can happen in three cases:
51-
1. Miss in small and main but hit in ghost
52-
2. Miss all three queues
53-
"""
54-
if obj_id in self.ghost_fifo:
55-
del self.ghost_fifo[obj_id]
56-
del self.ghost_freq[obj_id]
57-
self.insert_to_main(obj_id)
50+
def cache_miss(self, req: Request):
51+
if not self.hit_on_ghost:
52+
obj = self.ghost_fifo.find(req, update_cache=False)
53+
if obj is not None:
54+
self.hit_on_ghost = True
55+
# remove from ghost set
56+
self.ghost_fifo.remove(req.obj_id)
57+
self.ghost_set.remove(req.obj_id)
58+
59+
60+
# NOTE(haocheng): first we need to know this miss object has record in ghost or not
61+
if not self.hit_on_ghost:
62+
if req.obj_size >= self.small_fifo.cache_size:
63+
# If object is too large, we do not process it
64+
return
65+
66+
# If is initialization state, we need to insert to small fifo,
67+
# then we can insert to main fifo
68+
if not self.has_evicted and self.small_fifo.get_occupied_byte() >= self.small_fifo.cache_size:
69+
obj = self.main_fifo.insert(req)
70+
self.main_set.add(obj.obj_id)
71+
else:
72+
obj = self.small_fifo.insert(req)
73+
self.small_set.add(obj.obj_id)
5874
else:
59-
# Miss all three queues
60-
cond = (obj_id not in self.small_fifo) and (obj_id not in self.main_fifo)
61-
assert cond, "Should not be in small_fifo or main_fifo"
62-
63-
# Then we need to insert to small fifo queue
64-
self.insert_to_small(obj_id)
65-
66-
def insert_to_small(self, obj_id):
67-
if len(self.small_fifo) >= self.small_max_size:
68-
self.cache_evict_small()
69-
self.small_fifo[obj_id] = None # OrderedDict value doesn't matter
70-
self.small_freq[obj_id] = 0
71-
72-
def insert_to_main(self, obj_id):
73-
if len(self.main_fifo) >= self.main_max_size:
74-
self.cache_evict_main()
75-
self.main_fifo[obj_id] = None
76-
self.main_freq[obj_id] = 0
77-
78-
def insert_to_ghost(self, obj_id, original_freq=0):
79-
if len(self.ghost_fifo) >= self.ghost_max_size:
80-
# Remove oldest item
81-
oldest_id = next(iter(self.ghost_fifo))
82-
del self.ghost_fifo[oldest_id]
83-
del self.ghost_freq[oldest_id]
84-
self.ghost_fifo[obj_id] = None
85-
self.ghost_freq[obj_id] = original_freq
75+
obj = self.main_fifo.insert(req)
76+
self.main_set.add(req.obj_id)
77+
self.hit_on_ghost = False
78+
self.freq[obj.obj_id] = 0
8679

87-
def cache_evict_small(self):
80+
def cache_evict_small(self, req: Request):
8881
has_evicted = False
8982
evicted_id = None
90-
while not has_evicted and len(self.small_fifo) > 0:
91-
obj_to_evict = next(iter(self.small_fifo)) # Get first item
92-
if self.small_freq[obj_to_evict] >= self.move_to_main_threshold:
93-
# Move to main fifo cache (not real evict, just move)
94-
del self.small_fifo[obj_to_evict]
95-
del self.small_freq[obj_to_evict]
96-
self.insert_to_main(obj_to_evict)
83+
real_evicted_id = None
84+
while not has_evicted and self.small_fifo.get_occupied_byte() > 0:
85+
obj_to_evict = self.small_fifo.to_evict(req)
86+
evicted_id = obj_to_evict.obj_id # Store the ID before any operations
87+
if self.freq[obj_to_evict.obj_id] >= self.move_to_main_threshold:
88+
new_req = Request(obj_id=evicted_id, obj_size=1)
89+
self.main_fifo.insert(new_req)
90+
self.main_set.add(evicted_id)
91+
# Reset frequency
92+
self.freq[evicted_id] = 0
9793
else:
98-
evicted_id = obj_to_evict
99-
# Insert to ghost fifo cache (real evict)
100-
del self.small_fifo[obj_to_evict]
101-
del self.small_freq[obj_to_evict]
102-
self.insert_to_ghost(obj_to_evict)
94+
new_req = Request(obj_id=evicted_id, obj_size=1)
95+
self.ghost_fifo.get(new_req)
96+
self.ghost_set.append(evicted_id)
10397
has_evicted = True
104-
return evicted_id
98+
real_evicted_id = evicted_id
99+
flag = self.small_fifo.remove(evicted_id)
100+
self.small_set.remove(evicted_id)
101+
assert flag, "Should be able to remove"
102+
return real_evicted_id
105103

106-
def cache_evict_main(self):
104+
def cache_evict_main(self, req: Request):
107105
has_evicted = False
108106
evicted_id = None
109-
while not has_evicted and len(self.main_fifo) > 0:
110-
obj_to_evict = next(iter(self.main_fifo)) # Get first item
111-
freq = self.main_freq[obj_to_evict]
107+
while not has_evicted and self.main_fifo.get_occupied_byte() > 0:
108+
obj_to_evict = self.main_fifo.to_evict(req)
109+
assert obj_to_evict is not None
110+
evicted_id = obj_to_evict.obj_id # Store the ID before any operations
111+
freq = self.freq[evicted_id]
112112
if freq >= 1:
113113
# Reinsert with decremented frequency
114-
del self.main_fifo[obj_to_evict]
115-
del self.main_freq[obj_to_evict]
116-
self.insert_to_main(obj_to_evict)
117-
self.main_freq[obj_to_evict] = min(freq, self.max_freq) - 1
114+
self.main_fifo.remove(evicted_id)
115+
self.main_set.remove(evicted_id)
116+
new_req = Request(obj_id=evicted_id, obj_size=1)
117+
self.main_fifo.insert(new_req)
118+
self.main_set.add(evicted_id)
119+
self.freq[evicted_id] = min(freq, self.max_freq) - 1
118120
else:
119-
evicted_id = obj_to_evict
120-
# Real eviction
121-
del self.main_fifo[obj_to_evict]
122-
del self.main_freq[obj_to_evict]
121+
flag = self.main_fifo.remove(evicted_id)
122+
self.main_set.remove(evicted_id)
123123
has_evicted = True
124+
# print(f"Evicted {evicted_id}")
124125
return evicted_id
125126

126-
def cache_evict(self):
127-
evicted_id = None
128-
# if main is full or small is empty, evict main
129-
if len(self.main_fifo) >= self.main_max_size or len(self.small_fifo) == 0:
130-
evicted_id = self.cache_evict_main()
131-
# if small is not empty, evict small
127+
def cache_evict(self, req: Request):
128+
if not self.hit_on_ghost:
129+
obj = self.ghost_fifo.find(req, update_cache=False)
130+
if obj is not None:
131+
self.hit_on_ghost = True
132+
# remove from ghost set
133+
self.ghost_fifo.remove(req.obj_id)
134+
self.ghost_set.remove(req.obj_id)
135+
136+
self.has_evicted = True
137+
cond = (self.main_fifo.get_occupied_byte() > self.main_fifo.cache_size)
138+
if (cond or (self.small_fifo.get_occupied_byte() == 0)):
139+
obj_id = self.cache_evict_main(req)
132140
else:
133-
evicted_id = self.cache_evict_small()
134-
if evicted_id is None:
135-
assert False, "Should not be None"
136-
return evicted_id
141+
obj_id = self.cache_evict_small(req)
142+
143+
if obj_id is not None:
144+
del self.freq[obj_id]
145+
146+
return obj_id
137147

138148
def cache_remove(self, obj_id):
139149
removed = False
140-
if obj_id in self.small_fifo:
141-
del self.small_fifo[obj_id]
142-
del self.small_freq[obj_id]
143-
removed = True
144-
elif obj_id in self.ghost_fifo:
145-
del self.ghost_fifo[obj_id]
146-
del self.ghost_freq[obj_id]
147-
removed = True
148-
elif obj_id in self.main_fifo:
149-
del self.main_fifo[obj_id]
150-
del self.main_freq[obj_id]
151-
removed = True
150+
removed |= self.small_fifo.remove(obj_id)
151+
removed |= self.ghost_fifo.remove(obj_id)
152+
removed |= self.main_fifo.remove(obj_id)
152153
return removed
153154

154155
def cache_init_hook(common_cache_params: CommonCacheParams):
155156
return StandaloneS3FIFO(cache_size=common_cache_params.cache_size)
156157

157158
def cache_hit_hook(cache, request: Request):
158-
cache.cache_hit(request.obj_id)
159+
cache.cache_hit(request)
159160

160161
def cache_miss_hook(cache, request: Request):
161-
cache.cache_miss(request.obj_id, request.obj_size)
162+
cache.cache_miss(request)
162163

163164
def cache_eviction_hook(cache, request: Request):
164-
# NOTE(haocheng): never called
165-
pass
165+
evicted_id = None
166+
while evicted_id is None:
167+
evicted_id = cache.cache_evict(request)
168+
return evicted_id
166169

167170
def cache_remove_hook(cache, obj_id):
168171
cache.cache_remove(obj_id)
@@ -176,7 +179,7 @@ def cache_free_hook(cache):
176179
cache.main_freq.clear()
177180

178181
cache = PluginCache(
179-
cache_size=1024*1024,
182+
cache_size=1024,
180183
cache_init_hook=cache_init_hook,
181184
cache_hit_hook=cache_hit_hook,
182185
cache_miss_hook=cache_miss_hook,
@@ -185,20 +188,29 @@ def cache_free_hook(cache):
185188
cache_free_hook=cache_free_hook,
186189
cache_name="S3FIFO")
187190

188-
ref_s3fifo = S3FIFO(cache_size=1024)
191+
URI = "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst"
192+
dl = lcs.DataLoader()
193+
dl.load(URI)
189194

190-
reader = SyntheticReader(
191-
num_of_req=1000000,
192-
num_objects=100,
193-
obj_size=1,
194-
seed=42,
195-
alpha=0.8,
196-
dist="zipf",
195+
# Step 2: Open trace and process efficiently
196+
reader = lcs.TraceReader(
197+
trace = dl.get_cache_path(URI),
198+
trace_type = lcs.TraceType.ORACLE_GENERAL_TRACE,
199+
reader_init_params = lcs.ReaderInitParam(ignore_obj_size=True)
197200
)
198201

199-
for req in reader:
200-
plugin_hit = cache.get(req)
201-
ref_hit = ref_s3fifo.get(req)
202-
assert plugin_hit == ref_hit, f"Cache hit mismatch: {plugin_hit} != {ref_hit}"
202+
ref_s3fifo = S3FIFO(cache_size=1024, small_size_ratio=0.1, ghost_size_ratio=0.9, move_to_main_threshold=2)
203+
204+
# for req in reader:
205+
# hit = cache.get(req)
206+
# ref_hit = ref_s3fifo.get(req)
207+
# assert hit == ref_hit, f"Cache hit mismatch: {hit} != {ref_hit}"
208+
209+
req_miss_ratio, byte_miss_ratio = cache.process_trace(reader)
210+
ref_req_miss_ratio, ref_byte_miss_ratio = ref_s3fifo.process_trace(reader)
211+
print(f"Plugin req miss ratio: {req_miss_ratio}, ref req miss ratio: {ref_req_miss_ratio}")
212+
print(f"Plugin byte miss ratio: {byte_miss_ratio}, ref byte miss ratio: {ref_byte_miss_ratio}")
203213

214+
assert req_miss_ratio == ref_req_miss_ratio
215+
assert byte_miss_ratio == ref_byte_miss_ratio
204216
print("All requests processed successfully. Plugin cache matches reference S3FIFO cache.")

libcachesim/cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def __init__(self, _cache: Cache):
5454
def get(self, req: Request) -> bool:
5555
return self._cache.get(req)
5656

57-
def find(self, req: Request, update_cache: bool = True) -> CacheObject:
57+
def find(self, req: Request, update_cache: bool = True) -> Optional[CacheObject]:
5858
return self._cache.find(req, update_cache)
5959

6060
def can_insert(self, req: Request) -> bool:

src/export_cache.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ void export_cache(py::module& m) {
281281
"find",
282282
[](cache_t& self, const request_t& req, const bool update_cache) {
283283
cache_obj_t* obj = self.find(&self, &req, update_cache);
284-
return py::cast(obj, py::return_value_policy::reference);
284+
return obj ? py::cast(obj, py::return_value_policy::reference) : py::none();
285285
},
286286
"req"_a, "update_cache"_a = true)
287287
.def(

0 commit comments

Comments
 (0)