3
3
#include < assert.h>
4
4
5
5
#include < algorithm>
6
+ #include < atomic> // for std::atomic_thread_fence
6
7
7
8
#include < windows.h>
8
9
10
+ namespace
11
+ {
12
+ // This is a dirty hack for speedup inter-thread communication on hyperthreading CPUs
13
+ // We could choose any other paid of logical CPUs based on the same physical CPU and sharing the same cache
14
+ // I'm sure it will give no effect in the current task because we are switching too rarely between threads.
15
+ const int PreferedCpuForMainThread = 0 ;
16
+ const int PreferedCpuForWorkerThread = PreferedCpuForMainThread + 1 ;
17
+ static_assert (PreferedCpuForMainThread % 2 == 0 );
18
+ const DWORD ThreadPriority = THREAD_PRIORITY_TIME_CRITICAL;
19
+ # define ENABLE_THREAD_PREFERRED_AFFINITY 0
20
+ # define ENABLE_THREAD_FIXED_AFFINITY 0
21
+ # define ENABLE_THREAD_PRIORITY 0
22
+ }
23
+
9
24
10
25
CScanFile::~CScanFile ()
11
26
{
@@ -18,7 +33,7 @@ bool CScanFile::Open(const wchar_t* const filename, const bool asyncMode)
18
33
{
19
34
return false ;
20
35
}
21
- assert (this ->_hEvent == nullptr );
36
+ assert (this ->_hAsyncEvent == nullptr );
22
37
23
38
const DWORD dwDesiredAccess = FILE_READ_DATA | FILE_READ_ATTRIBUTES; // minimal required rights
24
39
// FILE_READ_ATTRIBUTES is needed to get file size for mapping file to memory
@@ -38,18 +53,18 @@ bool CScanFile::Open(const wchar_t* const filename, const bool asyncMode)
38
53
}
39
54
40
55
// Init data for async IO:
41
- this ->_hEvent = CreateEventW (nullptr , TRUE , FALSE , nullptr );
56
+ this ->_hAsyncEvent = CreateEventW (nullptr , TRUE , FALSE , nullptr );
42
57
43
- if (this ->_hEvent == nullptr )
58
+ if (this ->_hAsyncEvent == nullptr )
44
59
{
45
60
CloseHandle (this ->_hFile );
46
61
this ->_hFile = nullptr ;
47
62
return false ;
48
63
}
49
64
50
- this ->_overlapped .hEvent = this ->_hEvent ;
51
- this ->_fileOffset .QuadPart = 0 ;
52
- this ->_operationInProgress = false ;
65
+ this ->_asyncOverlapped .hEvent = this ->_hAsyncEvent ;
66
+ this ->_asyncFileOffset .QuadPart = 0 ;
67
+ this ->_asyncOperationInProgress = false ;
53
68
54
69
return true ;
55
70
}
@@ -76,15 +91,19 @@ void CScanFile::Close()
76
91
this ->_hFile = nullptr ;
77
92
}
78
93
79
- if (this ->_hEvent != nullptr )
94
+ if (this ->_hAsyncEvent != nullptr )
80
95
{
81
- CloseHandle (this ->_hEvent );
82
- this ->_hEvent = nullptr ;
96
+ CloseHandle (this ->_hAsyncEvent );
97
+ this ->_hAsyncEvent = nullptr ;
83
98
}
84
99
85
- this ->_operationInProgress = false ;
100
+ this ->_asyncOperationInProgress = false ;
86
101
}
87
102
103
+ // ////////////////////////////////////////////////////////////////////////
104
+ // / Implementation of mapping file to memory
105
+ // ////////////////////////////////////////////////////////////////////////
106
+
88
107
std::optional<std::string_view> CScanFile::MapToMemory ()
89
108
{
90
109
if (this ->_hFile == nullptr || this ->_hFileMapping != nullptr )
@@ -131,6 +150,8 @@ std::optional<std::string_view> CScanFile::MapToMemory()
131
150
return std::string_view (static_cast <const char *>(this ->_pViewOfFile ), fileSizeAsSizeT);
132
151
}
133
152
153
+ // ////////////////////////////////////////////////////////////////////////
154
+ // / Implementation of synchronous file API
134
155
// ////////////////////////////////////////////////////////////////////////
135
156
136
157
__declspec (noinline) // noinline is added to help CPU profiling in release version
@@ -156,49 +177,51 @@ bool CScanFile::Read(char* const buffer, const size_t bufferLength, size_t& read
156
177
return !!succeeded;
157
178
}
158
179
180
+ // ////////////////////////////////////////////////////////////////////////
181
+ // / Implementation of Asynchronous file API
159
182
// ////////////////////////////////////////////////////////////////////////
160
183
161
184
__declspec (noinline) // noinline is added to help CPU profiling in release version
162
185
bool CScanFile::AsyncReadStart(char * const buffer, const size_t bufferLength)
163
186
{
164
- if (this ->_hFile == nullptr || buffer == nullptr || this ->_operationInProgress )
187
+ if (this ->_hFile == nullptr || buffer == nullptr || this ->_asyncOperationInProgress )
165
188
{
166
189
return false ;
167
190
}
168
- assert (this ->_hEvent != nullptr );
191
+ assert (this ->_hAsyncEvent != nullptr );
169
192
170
193
const DWORD usedBufferLength = static_cast <DWORD>(min (bufferLength, MAXDWORD));
171
194
172
- this ->_overlapped .Offset = this ->_fileOffset .LowPart ;
173
- this ->_overlapped .OffsetHigh = this ->_fileOffset .HighPart ;
195
+ this ->_asyncOverlapped .Offset = this ->_asyncFileOffset .LowPart ;
196
+ this ->_asyncOverlapped .OffsetHigh = this ->_asyncFileOffset .HighPart ;
174
197
175
- const bool readOk = !!ReadFile (this ->_hFile , buffer, usedBufferLength, nullptr , &this ->_overlapped );
198
+ const bool readOk = !!ReadFile (this ->_hFile , buffer, usedBufferLength, nullptr , &this ->_asyncOverlapped );
176
199
if (!readOk && GetLastError () != ERROR_IO_PENDING)
177
200
{
178
201
return false ;
179
202
}
180
203
181
- this ->_operationInProgress = true ;
204
+ this ->_asyncOperationInProgress = true ;
182
205
183
206
return true ;
184
207
}
185
208
186
209
__declspec (noinline) // noinline is added to help CPU profiling in release version
187
210
bool CScanFile::AsyncReadWait(size_t & readBytes)
188
211
{
189
- if (!this ->_operationInProgress )
212
+ if (!this ->_asyncOperationInProgress )
190
213
{
191
214
return false ;
192
215
}
193
216
assert (this ->_hFile != nullptr );
194
- assert (this ->_hEvent != nullptr );
217
+ assert (this ->_hAsyncEvent != nullptr );
195
218
196
219
DWORD numberOfBytesRead = 0 ;
197
220
198
- const bool overlappedOk = !!GetOverlappedResult (this ->_hFile , &this ->_overlapped , &numberOfBytesRead, TRUE );
221
+ const bool overlappedOk = !!GetOverlappedResult (this ->_hFile , &this ->_asyncOverlapped , &numberOfBytesRead, TRUE );
199
222
if (!overlappedOk)
200
223
{
201
- this ->_operationInProgress = false ; // I'm not sure this is correct
224
+ this ->_asyncOperationInProgress = false ; // I'm not sure this is correct
202
225
if (GetLastError () == ERROR_HANDLE_EOF)
203
226
{
204
227
assert (numberOfBytesRead == 0 );
@@ -209,138 +232,164 @@ bool CScanFile::AsyncReadWait(size_t& readBytes)
209
232
return false ;
210
233
}
211
234
212
- this ->_fileOffset .QuadPart += numberOfBytesRead;
213
- this ->_operationInProgress = false ;
235
+ this ->_asyncFileOffset .QuadPart += numberOfBytesRead;
236
+ this ->_asyncOperationInProgress = false ;
214
237
215
238
readBytes = numberOfBytesRead;
216
239
217
240
return true ;
218
241
}
219
242
243
+ // ////////////////////////////////////////////////////////////////////////
244
+ // / Implementation of file API executed in a separate thread with the help of spinlocks
220
245
// ////////////////////////////////////////////////////////////////////////
221
246
222
247
bool CScanFile::LockFreecInit ()
223
248
{
224
- if (this ->_pThread != nullptr )
249
+ if (this ->_hThread != nullptr )
225
250
{
226
251
return false ;
227
252
}
228
253
229
- this ->_threadFinishSignal = false ;
230
- this ->_threadOperationReadStartSignal = false ;
231
- this ->_threadOperationReadCompletedSignal = false ;
254
+ this ->_threadFinishSpinlock = false ;
255
+ this ->_threadOperationReadStartSpinlock = false ;
256
+ this ->_threadOperationReadCompletedSpinlock = false ;
257
+ this ->_pThreadReadBuffer = nullptr ;
258
+ this ->_threadReadBufferSize = 0 ;
259
+ this ->_threadActuallyReadBytes = 0 ;
260
+ this ->_threadReadSucceeded = false ;
261
+ std::atomic_thread_fence (std::memory_order_release);
262
+
263
+ // This is a dirty hack for speedup inter-thread communication on hyperthreading CPUs
264
+ #if ENABLE_THREAD_PREFERRED_AFFINITY
265
+ SetThreadIdealProcessor (GetCurrentThread (), PreferedCpuForMainThread);
266
+ #endif
267
+ #if ENABLE_THREAD_FIXED_AFFINITY
268
+ SetThreadAffinityMask (GetCurrentThread (), 1 << PreferedCpuForMainThread);
269
+ #endif
270
+ #if ENABLE_THREAD_PRIORITY
271
+ SetThreadPriority (GetCurrentThread (), ThreadPriority);
272
+ #endif
273
+
274
+ using ThreadProcType = unsigned __stdcall (void *);
275
+ ThreadProcType* const threadProc = [](void * p) -> unsigned
276
+ {
277
+ // This is a dirty hack for speedup inter-thread communication on hyperthreading CPUs
278
+ #if ENABLE_THREAD_PREFERRED_AFFINITY
279
+ SetThreadIdealProcessor (GetCurrentThread (), PreferedCpuForWorkerThread);
280
+ #endif
281
+ #if ENABLE_THREAD_FIXED_AFFINITY
282
+ SetThreadAffinityMask (GetCurrentThread (), 1 << PreferedCpuForWorkerThread);
283
+ #endif
284
+ #if ENABLE_THREAD_PRIORITY
285
+ SetThreadPriority (GetCurrentThread (), ThreadPriority);
286
+ #endif
287
+
288
+ CScanFile* const that = static_cast <CScanFile*>(p);
289
+ that->LockFreeThreadProc ();
290
+ _endthreadex (0 );
291
+ return 0 ;
292
+ };
293
+
294
+ unsigned threadID = 0 ;
295
+ this ->_hThread = reinterpret_cast <HANDLE>(_beginthreadex (NULL , 0 , threadProc, this , 0 , &threadID));
296
+ if (this ->_hThread == nullptr )
232
297
{
233
- std::lock_guard<std::mutex> lock (this ->_protectThreadData );
234
- this ->_pThreadReadBuffer = nullptr ;
235
- this ->_threadReadBufferSize = 0 ;
236
- this ->_threadActuallyReadBytes = 0 ;
237
- this ->_threadReadSucceeded = false ;
298
+ return false ;
238
299
}
239
300
240
- this ->_pThread = std::make_unique<std::thread>(&CScanFile::LockFreeThread, this );
241
-
242
301
return true ;
243
302
}
244
303
245
304
void CScanFile::LockFreecClean ()
246
305
{
247
- if (this ->_pThread != nullptr )
306
+ if (this ->_hThread != nullptr )
248
307
{
249
- this ->_threadFinishSignal = true ;
250
- this ->_pThread -> join ();
251
- this ->_pThread . reset () ;
308
+ this ->_threadFinishSpinlock = true ;
309
+ WaitForSingleObject ( this ->_hThread , INFINITE); // ignore return value in this case
310
+ this ->_hThread = nullptr ;
252
311
}
253
312
}
254
313
255
314
__declspec (noinline) // noinline is added to help CPU profiling in release version
256
315
bool CScanFile::LockFreeReadStart(char * const buffer, const size_t bufferLength)
257
316
{
258
- if (this ->_pThread == nullptr || this ->_threadOperationInProgress )
317
+ if (this ->_hThread == nullptr || this ->_threadOperationInProgress )
259
318
{
260
319
return false ;
261
320
}
262
321
263
322
this ->_threadOperationInProgress = true ;
323
+ this ->_threadOperationReadCompletedSpinlock = false ;
324
+ this ->_pThreadReadBuffer = buffer;
325
+ this ->_threadReadBufferSize = bufferLength;
326
+ this ->_threadActuallyReadBytes = 0 ;
327
+ this ->_threadReadSucceeded = false ;
328
+ std::atomic_thread_fence (std::memory_order_release);
264
329
265
- this ->_threadOperationReadCompletedSignal = false ;
266
-
267
- {
268
- std::lock_guard<std::mutex> lock (this ->_protectThreadData );
269
- this ->_pThreadReadBuffer = buffer;
270
- this ->_threadReadBufferSize = bufferLength;
271
- this ->_threadActuallyReadBytes = 0 ;
272
- this ->_threadReadSucceeded = false ;
273
- }
274
-
275
- this ->_threadOperationReadStartSignal = true ;
330
+ this ->_threadOperationReadStartSpinlock = true ;
331
+ std::atomic_thread_fence (std::memory_order_release);
276
332
277
333
return true ;
278
334
}
279
335
280
336
__declspec (noinline) // noinline is added to help CPU profiling in release version
281
337
bool CScanFile::LockFreeReadWait(size_t & readBytes)
282
338
{
283
- if (this ->_pThread == nullptr || !this ->_threadOperationInProgress )
339
+ if (this ->_hThread == nullptr || !this ->_threadOperationInProgress )
284
340
{
285
341
return false ;
286
342
}
287
343
288
344
this ->_threadOperationInProgress = false ;
345
+ std::atomic_thread_fence (std::memory_order_release);
289
346
290
- while (!this ->_threadOperationReadCompletedSignal )
347
+ std::atomic_thread_fence (std::memory_order_acquire);
348
+ while (!this ->_threadOperationReadCompletedSpinlock )
291
349
{
292
350
// nothing
351
+ std::atomic_thread_fence (std::memory_order_acquire);
293
352
}
294
353
354
+ std::atomic_thread_fence (std::memory_order_acquire);
355
+ readBytes = this ->_threadActuallyReadBytes ;
356
+ if (!this ->_threadReadSucceeded )
295
357
{
296
- std::lock_guard<std::mutex> lock (this ->_protectThreadData );
297
- readBytes = this ->_threadActuallyReadBytes ;
298
- if (!this ->_threadReadSucceeded )
299
- {
300
- return false ;
301
- }
358
+ return false ;
302
359
}
303
360
304
361
return true ;
305
362
}
306
363
307
- void CScanFile::LockFreeThread ()
364
+ void CScanFile::LockFreeThreadProc ()
308
365
{
309
366
while (true )
310
367
{
311
- if (this ->_threadFinishSignal )
368
+ std::atomic_thread_fence (std::memory_order_acquire);
369
+ if (this ->_threadFinishSpinlock )
312
370
{
313
371
// thread exit signal is caught
314
372
break ;
315
373
}
316
374
317
- if (!this ->_threadOperationReadStartSignal )
375
+ if (!this ->_threadOperationReadStartSpinlock )
318
376
{
319
377
// nothing to do
320
378
continue ;
321
379
}
322
380
323
- this ->_threadOperationReadStartSignal = false ;
324
-
325
- char * buffer = nullptr ;
326
- size_t bufferSize = 0 ;
327
-
328
- {
329
- std::lock_guard<std::mutex> lock (this ->_protectThreadData );
330
- buffer = this ->_pThreadReadBuffer ;
331
- bufferSize = this ->_threadReadBufferSize ;
332
- }
381
+ std::atomic_thread_fence (std::memory_order_acquire);
333
382
334
383
size_t readBytes = 0 ;
335
- const bool readOk = this ->Read (buffer, bufferSize , readBytes);
384
+ const bool readOk = this ->Read (this -> _pThreadReadBuffer , this -> _threadReadBufferSize , readBytes);
336
385
337
- {
338
- std::lock_guard<std::mutex> lock (this ->_protectThreadData );
339
- this ->_threadActuallyReadBytes = readBytes;
340
- this ->_threadReadSucceeded = readOk;
341
- }
386
+ this ->_threadOperationReadStartSpinlock = false ;
387
+ this ->_threadActuallyReadBytes = readBytes;
388
+ this ->_threadReadSucceeded = readOk;
389
+ std::atomic_thread_fence (std::memory_order_release);
342
390
343
- this ->_threadOperationReadCompletedSignal = true ;
391
+ this ->_threadOperationReadCompletedSpinlock = true ;
392
+ std::atomic_thread_fence (std::memory_order_release);
344
393
}
345
394
}
346
395
0 commit comments