|
1 | 1 | <?php |
| 2 | + |
2 | 3 | /* |
3 | 4 | * Copyright (c) 2022 The Recognize contributors. |
4 | 5 | * This file is licensed under the Affero General Public License version 3 or later. See the COPYING file. |
|
7 | 8 |
|
8 | 9 | namespace OCA\ContextChat\BackgroundJobs; |
9 | 10 |
|
| 11 | +use OCA\ContextChat\AppInfo\Application; |
10 | 12 | use OCA\ContextChat\Db\QueueFile; |
11 | 13 | use OCA\ContextChat\Service\DiagnosticService; |
12 | 14 | use OCA\ContextChat\Service\LangRopeService; |
@@ -46,15 +48,15 @@ class IndexerJob extends TimedJob { |
46 | 48 | public const DEFAULT_MAX_JOBS_COUNT = 3; |
47 | 49 |
|
48 | 50 | public function __construct( |
49 | | - ITimeFactory $time, |
| 51 | + ITimeFactory $time, |
50 | 52 | private LoggerInterface $logger, |
51 | | - private QueueService $queue, |
| 53 | + private QueueService $queue, |
52 | 54 | private IUserMountCache $userMountCache, |
53 | | - private IJobList $jobList, |
| 55 | + private IJobList $jobList, |
54 | 56 | private LangRopeService $langRopeService, |
55 | | - private StorageService $storageService, |
56 | | - private IRootFolder $rootFolder, |
57 | | - private IAppConfig $appConfig, |
| 57 | + private StorageService $storageService, |
| 58 | + private IRootFolder $rootFolder, |
| 59 | + private IAppConfig $appConfig, |
58 | 60 | private DiagnosticService $diagnosticService, |
59 | 61 | private IDBConnection $db, |
60 | 62 | private ITimeFactory $timeFactory, |
@@ -178,49 +180,82 @@ protected function hasEnoughRunningJobs(): bool { |
178 | 180 | protected function index(array $files): void { |
179 | 181 | $maxTime = $this->getMaxIndexingTime(); |
180 | 182 | $startTime = time(); |
| 183 | + $sources = []; |
| 184 | + $allSourceIds = []; |
| 185 | + $loadedSources = []; |
| 186 | + $retryQFiles = []; |
| 187 | + $size = 0; |
| 188 | + |
181 | 189 | foreach ($files as $queueFile) { |
182 | 190 | $this->diagnosticService->sendHeartbeat(static::class, $this->getId()); |
183 | 191 | if ($startTime + $maxTime < time()) { |
184 | 192 | break; |
185 | 193 | } |
| 194 | + |
186 | 195 | $file = current($this->rootFolder->getById($queueFile->getFileId())); |
187 | 196 | if (!$file instanceof File) { |
188 | 197 | continue; |
189 | 198 | } |
| 199 | + |
| 200 | + $file_size = $file->getSize(); |
| 201 | + if ($size + $file_size > Application::CC_MAX_SIZE || count($sources) >= Application::CC_MAX_FILES) { |
| 202 | + $loadedSources = array_merge($loadedSources, $this->langRopeService->indexSources($sources)); |
| 203 | + $sources = []; |
| 204 | + $size = 0; |
| 205 | + } |
| 206 | + |
190 | 207 | $userIds = $this->storageService->getUsersForFileId($queueFile->getFileId()); |
191 | | - foreach ($userIds as $userId) { |
192 | | - $this->diagnosticService->sendHeartbeat(static::class, $this->getId()); |
| 208 | + $this->diagnosticService->sendHeartbeat(static::class, $this->getId()); |
| 209 | + |
| 210 | + try { |
193 | 211 | try { |
194 | | - try { |
195 | | - $fileHandle = $file->fopen('r'); |
196 | | - } catch (LockedException|NotPermittedException $e) { |
197 | | - $this->logger->error('Could not open file ' . $file->getPath() . ' for reading', ['exception' => $e]); |
198 | | - continue; |
199 | | - } |
200 | | - if (!is_resource($fileHandle)) { |
201 | | - $this->logger->warning('File handle for' . $file->getPath() . ' is not readable'); |
202 | | - continue; |
203 | | - } |
204 | | - $source = new Source( |
205 | | - $userId, |
206 | | - ProviderConfigService::getSourceId($file->getId()), |
207 | | - $file->getPath(), |
208 | | - $fileHandle, |
209 | | - $file->getMtime(), |
210 | | - $file->getMimeType(), |
211 | | - ProviderConfigService::getDefaultProviderKey(), |
212 | | - ); |
213 | | - } catch (InvalidPathException|NotFoundException $e) { |
214 | | - $this->logger->error('Could not find file ' . $file->getPath(), ['exception' => $e]); |
215 | | - continue 2; |
| 212 | + $fileHandle = $file->fopen('r'); |
| 213 | + } catch (NotPermittedException $e) { |
| 214 | + $this->logger->error('Could not open file ' . $file->getPath() . ' for reading', ['exception' => $e]); |
| 215 | + continue; |
| 216 | + } catch (LockedException $e) { |
| 217 | + $retryQFiles[] = $queueFile; |
| 218 | + $this->logger->info('File ' . $file->getPath() . ' is locked, could not read for indexing. Adding it to the next batch.'); |
| 219 | + continue; |
216 | 220 | } |
217 | | - $this->langRopeService->indexSources([$source]); |
| 221 | + if (!is_resource($fileHandle)) { |
| 222 | + $this->logger->warning('File handle for' . $file->getPath() . ' is not readable'); |
| 223 | + continue; |
| 224 | + } |
| 225 | + |
| 226 | + $sources[] = new Source( |
| 227 | + $userIds, |
| 228 | + ProviderConfigService::getSourceId($file->getId()), |
| 229 | + substr($file->getInternalPath(), 6), // remove 'files/' prefix |
| 230 | + $fileHandle, |
| 231 | + $file->getMtime(), |
| 232 | + $file->getMimeType(), |
| 233 | + ProviderConfigService::getDefaultProviderKey(), |
| 234 | + ); |
| 235 | + $allSourceIds[] = ProviderConfigService::getSourceId($file->getId()); |
| 236 | + } catch (InvalidPathException|NotFoundException $e) { |
| 237 | + $this->logger->error('Could not find file ' . $file->getPath(), ['exception' => $e]); |
| 238 | + continue; |
218 | 239 | } |
219 | | - try { |
220 | | - $this->queue->removeFromQueue($queueFile); |
221 | | - } catch (Exception $e) { |
222 | | - $this->logger->error('Could not remove file from queue', ['exception' => $e]); |
| 240 | + } |
| 241 | + |
| 242 | + if (count($sources) > 0) { |
| 243 | + $loadedSources = array_merge($loadedSources, $this->langRopeService->indexSources($sources)); |
| 244 | + } |
| 245 | + |
| 246 | + $emptyInvalidSources = array_diff($allSourceIds, $loadedSources); |
| 247 | + if (count($emptyInvalidSources) > 0) { |
| 248 | + $this->logger->info('Invalid or empty sources that were not indexed', ['sourceIds' => $emptyInvalidSources]); |
| 249 | + } |
| 250 | + |
| 251 | + try { |
| 252 | + $this->queue->removeFromQueue($files); |
| 253 | + // add files that were locked to the end of the queue |
| 254 | + foreach ($retryQFiles as $queueFile) { |
| 255 | + $this->queue->insertIntoQueue($queueFile); |
223 | 256 | } |
| 257 | + } catch (Exception $e) { |
| 258 | + $this->logger->error('Could not remove indexed files from queue', ['exception' => $e]); |
224 | 259 | } |
225 | 260 | } |
226 | 261 | } |
0 commit comments