-
Notifications
You must be signed in to change notification settings - Fork 0
HPC-10152: fix PostgreSQL query params limit #352
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,8 @@ import { type Database } from '@unocha/hpc-api-core/src/db'; | |
import { type FlowId } from '@unocha/hpc-api-core/src/db/models/flow'; | ||
import { Op } from '@unocha/hpc-api-core/src/db/util/conditions'; | ||
import { type InstanceOfModel } from '@unocha/hpc-api-core/src/db/util/types'; | ||
import { splitIntoChunks } from '@unocha/hpc-api-core/src/util'; | ||
import { PG_MAX_QUERY_PARAMS } from '@unocha/hpc-api-core/src/util/consts'; | ||
import { | ||
createBrandedValue, | ||
getTableColumns, | ||
|
@@ -24,7 +26,6 @@ import type { | |
UniqueFlowEntity, | ||
} from './model'; | ||
import { buildSearchFlowsConditions } from './strategy/impl/utils'; | ||
|
||
@Service() | ||
export class FlowService { | ||
constructor(private readonly flowObjectService: FlowObjectService) {} | ||
|
@@ -94,7 +95,6 @@ export class FlowService { | |
|
||
const refDirection = orderBy.direction ?? 'source'; | ||
|
||
let flowObjects = []; | ||
let entityIDsSorted: number[] = []; | ||
|
||
switch (entity) { | ||
|
@@ -322,29 +322,44 @@ export class FlowService { | |
const entityCondKey = orderBy.entity as unknown; | ||
const entityCondKeyFlowObjectType = entityCondKey as FlowObjectType; | ||
|
||
flowObjects = await database.flowObject.find({ | ||
where: { | ||
objectType: entityCondKeyFlowObjectType, | ||
refDirection, | ||
objectID: { | ||
[Op.IN]: entityIDsSorted, | ||
}, | ||
}, | ||
distinct: ['flowID', 'versionID'], | ||
}); | ||
// Order map | ||
const orderMap = new Map<number, number>(); | ||
for (const [index, entityID] of entityIDsSorted.entries()) { | ||
orderMap.set(entityID, index); | ||
} | ||
|
||
// Instead of doing a single query that may end up on a 'Memory Error' | ||
// we will do a progressive search | ||
// by chunks of PG_MAX_QUERY_PARAMS - 2 => ( (2 ** 16 - 1) - 2 = 65533 ) | ||
const flowObjects = ( | ||
await Promise.all( | ||
splitIntoChunks(entityIDsSorted, PG_MAX_QUERY_PARAMS - 2).map( | ||
(entityIds) => | ||
database.flowObject.find({ | ||
where: { | ||
objectType: entityCondKeyFlowObjectType, | ||
refDirection, | ||
objectID: { | ||
[Op.IN]: entityIds, | ||
}, | ||
}, | ||
distinct: ['flowID', 'versionID'], | ||
}) | ||
) | ||
) | ||
).flat(); | ||
|
||
// Then, we need to filter the results from the flowObject table | ||
// using the planVersions list as sorted reference | ||
// this is because we cannot apply the order of a given list | ||
// to the query directly | ||
flowObjects = flowObjects | ||
const sortedFlowObjects = flowObjects | ||
.map((flowObject) => ({ | ||
...flowObject, | ||
sortingKey: entityIDsSorted.indexOf(flowObject.objectID.valueOf()), | ||
sortingKey: orderMap.get(flowObject.objectID), | ||
})) | ||
.sort((a, b) => a.sortingKey - b.sortingKey); | ||
|
||
return this.mapFlowsToUniqueFlowEntities(flowObjects); | ||
.toSorted((a, b) => (a.sortingKey ?? 0) - (b.sortingKey ?? 0)); | ||
return this.mapFlowsToUniqueFlowEntities(sortedFlowObjects); | ||
} | ||
|
||
private mapFlowsToUniqueFlowEntities( | ||
|
@@ -359,7 +374,7 @@ export class FlowService { | |
); | ||
} | ||
|
||
async getParketParents( | ||
async getParkedParents( | ||
flow: FlowInstance, | ||
flowLinkArray: Array<InstanceOfModel<Database['flowLink']>>, | ||
models: Database | ||
|
@@ -374,7 +389,6 @@ export class FlowService { | |
if (flowLinksParentsIDs.length === 0) { | ||
return null; | ||
} | ||
|
||
const parkedCategory = await models.category.findOne({ | ||
where: { | ||
group: 'flowType', | ||
|
@@ -454,17 +468,18 @@ export class FlowService { | |
models: Database, | ||
flowObjectFilters: FlowObjectFilterGrouped | ||
): Promise<UniqueFlowEntity[]> { | ||
// 1. Retrieve the parked category | ||
const parkedCategory = await models.category.findOne({ | ||
where: { | ||
name: 'Parked', | ||
group: 'flowType', | ||
}, | ||
}); | ||
|
||
if (!parkedCategory) { | ||
throw new Error('Parked category not found'); | ||
} | ||
|
||
// 2. Get all category references for parked flows | ||
const categoryRefs = await models.categoryRef.find({ | ||
where: { | ||
categoryID: parkedCategory.id, | ||
|
@@ -473,91 +488,87 @@ export class FlowService { | |
distinct: ['objectID', 'versionID'], | ||
}); | ||
|
||
// Build list of parent IDs from categoryRefs | ||
const parentIDs: FlowId[] = categoryRefs.map((ref) => | ||
createBrandedValue(ref.objectID) | ||
); | ||
|
||
// 3. Retrieve flow links where the parent is among those references and depth > 0 | ||
const flowLinks = await models.flowLink.find({ | ||
where: { | ||
depth: { | ||
[Op.GT]: 0, | ||
}, | ||
parentID: { | ||
[Op.IN]: categoryRefs.map((categoryRef) => | ||
createBrandedValue(categoryRef.objectID) | ||
), | ||
}, | ||
depth: { [Op.GT]: 0 }, | ||
parentID: { [Op.IN]: parentIDs }, | ||
}, | ||
distinct: ['parentID', 'childID'], | ||
}); | ||
|
||
// Create a reference list of parent flows from the flow links | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fact that you create this lookup array and pass it to |
||
const parentFlowsRef: UniqueFlowEntity[] = flowLinks.map((flowLink) => ({ | ||
id: createBrandedValue(flowLink.parentID), | ||
id: flowLink.parentID, | ||
versionID: null, | ||
})); | ||
|
||
// Since this list can be really large in size: ~42k flow links | ||
// This can cause a performance issue when querying the database | ||
// and even end up with a error like: | ||
// could not resize shared memory segment \"/PostgreSQL.2154039724\" | ||
// to 53727360 bytes: No space left on device | ||
|
||
// We need to do this query by chunks | ||
// 4. Query parent flows progressively in chunks | ||
const parentFlows = await this.progresiveSearch( | ||
models, | ||
parentFlowsRef, | ||
1000, | ||
PG_MAX_QUERY_PARAMS - 2, // Use a batch size of PG_MAX_QUERY_PARAMS - 2 to avoid hitting the limit | ||
0, | ||
false, // Stop on batch size | ||
false, // Do not stop on batch size | ||
[], | ||
{ activeStatus: true } | ||
); | ||
|
||
// 5. Retrieve flow objects using the flow object filters | ||
const flowObjectsWhere = | ||
buildWhereConditionsForFlowObjectFilters(flowObjectFilters); | ||
|
||
const flowObjects = await this.flowObjectService.getFlowFromFlowObjects( | ||
models, | ||
flowObjectsWhere | ||
); | ||
|
||
// Once we get the flowObjects - we need to keep only those that are present in both lists | ||
const filteredParentFlows = parentFlows.filter((parentFlow) => | ||
flowObjects.some( | ||
(flowObject) => | ||
flowObject.id === parentFlow.id && | ||
flowObject.versionID === parentFlow.versionID | ||
// 6. Build a Set for flowObjects for fast lookup (using a composite key of id and versionID) | ||
const flowObjectsSet = new Set( | ||
flowObjects.map( | ||
(flowObject) => `${flowObject.id}|${flowObject.versionID}` | ||
) | ||
); | ||
|
||
// Once we have the ParentFlows whose status are 'parked' | ||
// We keep look for the flowLinks of those flows to obtain the child flows | ||
// that are linked to them | ||
const childFlowsIDs: FlowId[] = []; | ||
// 7. Filter parent flows that are present in the flowObjects list | ||
const filteredParentFlows = parentFlows.filter((parentFlow) => { | ||
const key = `${parentFlow.id}|${parentFlow.versionID}`; | ||
return flowObjectsSet.has(key); | ||
}); | ||
|
||
// 8. Build a Set of filtered parent flow IDs for quick membership checking | ||
const filteredParentFlowIds = new Set( | ||
filteredParentFlows.map((flow) => flow.id) | ||
); | ||
|
||
// 9. Extract child flow IDs from flowLinks where the parent is in the filtered set | ||
const childFlowsIDsSet = new Set<FlowId>(); | ||
for (const flowLink of flowLinks) { | ||
if ( | ||
filteredParentFlows.some( | ||
(parentFlow) => parentFlow.id === flowLink.parentID | ||
) | ||
) { | ||
childFlowsIDs.push(flowLink.childID); | ||
if (filteredParentFlowIds.has(flowLink.parentID)) { | ||
childFlowsIDsSet.add(flowLink.childID); | ||
} | ||
} | ||
|
||
// 10. Retrieve child flows | ||
const childFlows = await models.flow.find({ | ||
where: { | ||
deletedAt: null, | ||
activeStatus: true, | ||
id: { | ||
[Op.IN]: childFlowsIDs, | ||
}, | ||
id: { [Op.IN]: childFlowsIDsSet }, | ||
}, | ||
distinct: ['id', 'versionID'], | ||
}); | ||
|
||
// Once we have the child flows, we need to filter them | ||
// using the flowObjectFilters | ||
// This search needs to be also done by chunks | ||
return childFlows.map((ref) => ({ | ||
id: createBrandedValue(ref.id), | ||
// 11. Map child flows to UniqueFlowEntity and return the result | ||
const result = childFlows.map((ref) => ({ | ||
id: ref.id, | ||
versionID: ref.versionID, | ||
})); | ||
|
||
return result; | ||
} | ||
|
||
/** | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.