11# An example of plugin for s3fifo 
2+ 
3+ # NOTE(haocheng): the one shows that with plugin system, we can make cache as lego blocks 
4+ # Happy caching! 
5+ 
6+ import  libcachesim  as  lcs 
27from  collections  import  OrderedDict 
3- from  libcachesim  import  PluginCache , CommonCacheParams , Request , S3FIFO , SyntheticReader 
8+ from  collections  import  deque 
9+ from  libcachesim  import  PluginCache , CommonCacheParams , Request , S3FIFO , FIFO , SyntheticReader 
410
511# NOTE(haocheng): we only support ignore object size for now 
612class  StandaloneS3FIFO :
@@ -9,160 +15,157 @@ def __init__(self,
915                 ghost_size_ratio : float  =  0.9 ,
1016                 move_to_main_threshold : int  =  2 ,
1117                 cache_size : int  =  1024 ):
12-         # S3-FIFO uses three queues with OrderedDict for O(1) operations 
13-         self .small_fifo  =  OrderedDict ()
14-         self .main_fifo  =  OrderedDict ()
15-         self .ghost_fifo  =  OrderedDict ()
16-         
17-         # Size limits 
18-         self .small_max_size  =  int (small_size_ratio  *  cache_size )
19-         self .main_max_size  =  int (cache_size  -  small_size_ratio  *  cache_size )
20-         self .ghost_max_size  =  int (ghost_size_ratio  *  cache_size )
18+         self .cache_size  =  cache_size 
19+         small_fifo_size  =  int (small_size_ratio  *  cache_size )
20+         main_fifo_size  =  cache_size  -  small_fifo_size 
21+         ghost_fifo_size  =  int (ghost_size_ratio  *  cache_size )
22+ 
23+         self .small_set  =  set ()
24+         self .main_set  =  set ()
25+         self .ghost_set  =  deque (maxlen = ghost_fifo_size )
26+ 
27+         self .small_fifo  =  FIFO (small_fifo_size )
28+         self .main_fifo  =  FIFO (main_fifo_size )
29+         self .ghost_fifo  =  FIFO (ghost_fifo_size )
2130
2231        # Frequency tracking 
23-         self .small_freq  =  {}
24-         self .main_freq  =  {}
25-         self .ghost_freq  =  {}
32+         self .freq  =  {}
2633
2734        # Other parameters 
2835        self .max_freq  =  3 
2936        self .move_to_main_threshold  =  move_to_main_threshold 
3037
31-     def  cache_hit (self , obj_id ):
32-         """ 
33-         Cache hit can happen in two cases: 
34-         1. Small FIFO cache hit (small_fifo) 
35-         2. Main FIFO cache hit (main_fifo) 
36-         """ 
37-         if  obj_id  in  self .main_fifo :
38-             self .main_freq [obj_id ] +=  1 
39-         elif  obj_id  in  self .small_fifo :
40-             self .small_freq [obj_id ] +=  1 
41-         else :
42-             print (f"Cache hit for obj_id { obj_id }  )
43-             print (f"small_fifo: { list (self .small_fifo .keys ())}  )
44-             print (f"main_fifo: { list (self .main_fifo .keys ())}  )
45-             print (f"ghost_fifo: { list (self .ghost_fifo .keys ())}  )
46-             assert  False , "Cache hit should happen in small_fifo or main_fifo" 
38+         self .has_evicted  =  False  # Mark if we start to evict, only after full we will start eviction 
39+         self .hit_on_ghost  =  False 
40+ 
41+     def  cache_hit (self , req : Request ):
42+         hit_small  =  False 
43+         hit_main  =  False 
44+         if  self .small_fifo .find (req , update_cache = False ):
45+             self .freq [req .obj_id ] +=  1 
46+ 
47+         if  self .main_fifo .find (req , update_cache = False ):
48+             self .freq [req .obj_id ] +=  1 
4749
48-     def  cache_miss (self , obj_id , obj_size = 1 ):
49-         """ 
50-         Cache miss can happen in three cases: 
51-         1. Miss in small and main but hit in ghost 
52-         2. Miss all three queues 
53-         """ 
54-         if  obj_id  in  self .ghost_fifo :
55-             del  self .ghost_fifo [obj_id ]
56-             del  self .ghost_freq [obj_id ]
57-             self .insert_to_main (obj_id )
50+     def  cache_miss (self , req : Request ):
51+         if  not  self .hit_on_ghost :
52+             obj  =  self .ghost_fifo .find (req , update_cache = False )
53+             if  obj  is  not None :
54+                 self .hit_on_ghost  =  True 
55+                 # remove from ghost set 
56+                 self .ghost_fifo .remove (req .obj_id )
57+                 self .ghost_set .remove (req .obj_id )
58+ 
59+ 
60+         # NOTE(haocheng): first we need to know this miss object has record in ghost or not 
61+         if  not  self .hit_on_ghost :
62+             if  req .obj_size  >=  self .small_fifo .cache_size :
63+                 # If object is too large, we do not process it 
64+                 return 
65+ 
66+             # If is initialization state, we need to insert to small fifo,  
67+             # then we can insert to main fifo 
68+             if  not  self .has_evicted  and  self .small_fifo .get_occupied_byte () >=  self .small_fifo .cache_size :
69+                 obj  =  self .main_fifo .insert (req )
70+                 self .main_set .add (obj .obj_id )
71+             else :
72+                 obj  =  self .small_fifo .insert (req )
73+                 self .small_set .add (obj .obj_id )
5874        else :
59-             # Miss all three queues 
60-             cond  =  (obj_id  not  in self .small_fifo ) and  (obj_id  not  in self .main_fifo )
61-             assert  cond , "Should not be in small_fifo or main_fifo" 
62- 
63-             # Then we need to insert to small fifo queue 
64-             self .insert_to_small (obj_id )
65- 
66-     def  insert_to_small (self , obj_id ):
67-         if  len (self .small_fifo ) >=  self .small_max_size :
68-             self .cache_evict_small ()
69-         self .small_fifo [obj_id ] =  None   # OrderedDict value doesn't matter 
70-         self .small_freq [obj_id ] =  0 
71- 
72-     def  insert_to_main (self , obj_id ):
73-         if  len (self .main_fifo ) >=  self .main_max_size :
74-             self .cache_evict_main ()
75-         self .main_fifo [obj_id ] =  None 
76-         self .main_freq [obj_id ] =  0 
77-     
78-     def  insert_to_ghost (self , obj_id , original_freq = 0 ):
79-         if  len (self .ghost_fifo ) >=  self .ghost_max_size :
80-             # Remove oldest item 
81-             oldest_id  =  next (iter (self .ghost_fifo ))
82-             del  self .ghost_fifo [oldest_id ]
83-             del  self .ghost_freq [oldest_id ]
84-         self .ghost_fifo [obj_id ] =  None 
85-         self .ghost_freq [obj_id ] =  original_freq 
75+             obj  =  self .main_fifo .insert (req )
76+             self .main_set .add (req .obj_id )
77+             self .hit_on_ghost  =  False 
78+         self .freq [obj .obj_id ] =  0 
8679
87-     def  cache_evict_small (self ):
80+     def  cache_evict_small (self ,  req :  Request ):
8881        has_evicted  =  False 
8982        evicted_id  =  None 
90-         while  not  has_evicted  and  len (self .small_fifo ) >  0 :
91-             obj_to_evict  =  next (iter (self .small_fifo ))  # Get first item 
92-             if  self .small_freq [obj_to_evict ] >=  self .move_to_main_threshold :
93-                 # Move to main fifo cache (not real evict, just move) 
94-                 del  self .small_fifo [obj_to_evict ]
95-                 del  self .small_freq [obj_to_evict ]
96-                 self .insert_to_main (obj_to_evict )
83+         real_evicted_id  =  None 
84+         while  not  has_evicted  and  self .small_fifo .get_occupied_byte () >  0 :
85+             obj_to_evict  =  self .small_fifo .to_evict (req )
86+             evicted_id  =  obj_to_evict .obj_id   # Store the ID before any operations 
87+             if  self .freq [obj_to_evict .obj_id ] >=  self .move_to_main_threshold :
88+                 new_req  =  Request (obj_id = evicted_id , obj_size = 1 )
89+                 self .main_fifo .insert (new_req )
90+                 self .main_set .add (evicted_id )
91+                 # Reset frequency 
92+                 self .freq [evicted_id ] =  0 
9793            else :
98-                 evicted_id  =  obj_to_evict 
99-                 # Insert to ghost fifo cache (real evict) 
100-                 del  self .small_fifo [obj_to_evict ]
101-                 del  self .small_freq [obj_to_evict ]
102-                 self .insert_to_ghost (obj_to_evict )
94+                 new_req  =  Request (obj_id = evicted_id , obj_size = 1 )
95+                 self .ghost_fifo .get (new_req )
96+                 self .ghost_set .append (evicted_id )
10397                has_evicted  =  True 
104-         return  evicted_id 
98+                 real_evicted_id  =  evicted_id 
99+             flag  =  self .small_fifo .remove (evicted_id )
100+             self .small_set .remove (evicted_id )
101+             assert  flag , "Should be able to remove" 
102+         return  real_evicted_id 
105103
106-     def  cache_evict_main (self ):
104+     def  cache_evict_main (self ,  req :  Request ):
107105        has_evicted  =  False 
108106        evicted_id  =  None 
109-         while  not  has_evicted  and  len (self .main_fifo ) >  0 :
110-             obj_to_evict  =  next (iter (self .main_fifo ))  # Get first item 
111-             freq  =  self .main_freq [obj_to_evict ]
107+         while  not  has_evicted  and  self .main_fifo .get_occupied_byte () >  0 :
108+             obj_to_evict  =  self .main_fifo .to_evict (req )
109+             assert  obj_to_evict  is  not None 
110+             evicted_id  =  obj_to_evict .obj_id   # Store the ID before any operations 
111+             freq  =  self .freq [evicted_id ]
112112            if  freq  >=  1 :
113113                # Reinsert with decremented frequency 
114-                 del  self .main_fifo [obj_to_evict ]
115-                 del  self .main_freq [obj_to_evict ]
116-                 self .insert_to_main (obj_to_evict )
117-                 self .main_freq [obj_to_evict ] =  min (freq , self .max_freq ) -  1 
114+                 self .main_fifo .remove (evicted_id )
115+                 self .main_set .remove (evicted_id )
116+                 new_req  =  Request (obj_id = evicted_id , obj_size = 1 )
117+                 self .main_fifo .insert (new_req )
118+                 self .main_set .add (evicted_id )
119+                 self .freq [evicted_id ] =  min (freq , self .max_freq ) -  1 
118120            else :
119-                 evicted_id  =  obj_to_evict 
120-                 # Real eviction 
121-                 del  self .main_fifo [obj_to_evict ]
122-                 del  self .main_freq [obj_to_evict ]
121+                 flag  =  self .main_fifo .remove (evicted_id )
122+                 self .main_set .remove (evicted_id )
123123                has_evicted  =  True 
124+             # print(f"Evicted {evicted_id}") 
124125        return  evicted_id 
125126
126-     def  cache_evict (self ):
127-         evicted_id  =  None 
128-         # if main is full or small is empty, evict main 
129-         if  len (self .main_fifo ) >=  self .main_max_size  or  len (self .small_fifo ) ==  0 :
130-             evicted_id  =  self .cache_evict_main ()
131-         # if small is not empty, evict small 
127+     def  cache_evict (self , req : Request ):
128+         if  not  self .hit_on_ghost :
129+             obj  =  self .ghost_fifo .find (req , update_cache = False )
130+             if  obj  is  not None :
131+                 self .hit_on_ghost  =  True 
132+                 # remove from ghost set 
133+                 self .ghost_fifo .remove (req .obj_id )
134+                 self .ghost_set .remove (req .obj_id )
135+ 
136+         self .has_evicted  =  True 
137+         cond  =  (self .main_fifo .get_occupied_byte () >  self .main_fifo .cache_size )
138+         if  (cond  or  (self .small_fifo .get_occupied_byte () ==  0 )):
139+             obj_id  =  self .cache_evict_main (req )
132140        else :
133-             evicted_id  =  self .cache_evict_small ()
134-         if  evicted_id  is  None :
135-             assert  False , "Should not be None" 
136-         return  evicted_id 
141+             obj_id  =  self .cache_evict_small (req )
142+ 
143+         if  obj_id  is  not None :
144+             del  self .freq [obj_id ]
145+         
146+         return  obj_id 
137147
138148    def  cache_remove (self , obj_id ):
139149        removed  =  False 
140-         if  obj_id  in  self .small_fifo :
141-             del  self .small_fifo [obj_id ]
142-             del  self .small_freq [obj_id ]
143-             removed  =  True 
144-         elif  obj_id  in  self .ghost_fifo :
145-             del  self .ghost_fifo [obj_id ]
146-             del  self .ghost_freq [obj_id ]
147-             removed  =  True 
148-         elif  obj_id  in  self .main_fifo :
149-             del  self .main_fifo [obj_id ]
150-             del  self .main_freq [obj_id ]
151-             removed  =  True 
150+         removed  |=  self .small_fifo .remove (obj_id )
151+         removed  |=  self .ghost_fifo .remove (obj_id )
152+         removed  |=  self .main_fifo .remove (obj_id )
152153        return  removed 
153154
154155def  cache_init_hook (common_cache_params : CommonCacheParams ):
155156    return  StandaloneS3FIFO (cache_size = common_cache_params .cache_size )
156157
157158def  cache_hit_hook (cache , request : Request ):
158-     cache .cache_hit (request . obj_id )
159+     cache .cache_hit (request )
159160
160161def  cache_miss_hook (cache , request : Request ):
161-     cache .cache_miss (request . obj_id ,  request . obj_size )
162+     cache .cache_miss (request )
162163
163164def  cache_eviction_hook (cache , request : Request ):
164-     # NOTE(haocheng): never called 
165-     pass 
165+     evicted_id  =  None 
166+     while  evicted_id  is  None :
167+         evicted_id  =  cache .cache_evict (request )
168+     return  evicted_id 
166169
167170def  cache_remove_hook (cache , obj_id ):
168171    cache .cache_remove (obj_id )
@@ -176,7 +179,7 @@ def cache_free_hook(cache):
176179    cache .main_freq .clear ()
177180
178181cache  =  PluginCache (
179-     cache_size = 1024 * 1024 ,
182+     cache_size = 1024 ,
180183    cache_init_hook = cache_init_hook ,
181184    cache_hit_hook = cache_hit_hook ,
182185    cache_miss_hook = cache_miss_hook ,
@@ -185,20 +188,29 @@ def cache_free_hook(cache):
185188    cache_free_hook = cache_free_hook ,
186189    cache_name = "S3FIFO" )
187190
188- ref_s3fifo  =  S3FIFO (cache_size = 1024 )
191+ URI  =  "cache_dataset_oracleGeneral/2007_msr/msr_hm_0.oracleGeneral.zst" 
192+ dl  =  lcs .DataLoader ()
193+ dl .load (URI )
189194
190- reader  =  SyntheticReader (
191-     num_of_req = 1000000 ,
192-     num_objects = 100 ,
193-     obj_size = 1 ,
194-     seed = 42 ,
195-     alpha = 0.8 ,
196-     dist = "zipf" ,
195+ # Step 2: Open trace and process efficiently 
196+ reader  =  lcs .TraceReader (
197+     trace  =  dl .get_cache_path (URI ),
198+     trace_type  =  lcs .TraceType .ORACLE_GENERAL_TRACE ,
199+     reader_init_params  =  lcs .ReaderInitParam (ignore_obj_size = True )
197200)
198201
199- for  req  in  reader :
200-     plugin_hit  =  cache .get (req )
201-     ref_hit  =  ref_s3fifo .get (req )
202-     assert  plugin_hit  ==  ref_hit , f"Cache hit mismatch: { plugin_hit } { ref_hit }  
202+ ref_s3fifo  =  S3FIFO (cache_size = 1024 , small_size_ratio = 0.1 , ghost_size_ratio = 0.9 , move_to_main_threshold = 2 )
203+ 
204+ # for req in reader: 
205+ #     hit = cache.get(req) 
206+ #     ref_hit = ref_s3fifo.get(req) 
207+ #     assert hit == ref_hit, f"Cache hit mismatch: {hit} != {ref_hit}" 
208+ 
209+ req_miss_ratio , byte_miss_ratio  =  cache .process_trace (reader )
210+ ref_req_miss_ratio , ref_byte_miss_ratio  =  ref_s3fifo .process_trace (reader )
211+ print (f"Plugin req miss ratio: { req_miss_ratio } { ref_req_miss_ratio }  )
212+ print (f"Plugin byte miss ratio: { byte_miss_ratio } { ref_byte_miss_ratio }  )
203213
214+ assert  req_miss_ratio  ==  ref_req_miss_ratio 
215+ assert  byte_miss_ratio  ==  ref_byte_miss_ratio 
204216print ("All requests processed successfully. Plugin cache matches reference S3FIFO cache." )
0 commit comments