Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Backend.Tests/Controllers/MergeControllerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ public void TestGraylistAdd()
public void TestFindPotentialDuplicatesNoPermission()
{
_mergeController.ControllerContext.HttpContext = PermissionServiceMock.UnauthorizedHttpContext();
var result = _mergeController.FindPotentialDuplicates("projId", 2, 1).Result;
var result = _mergeController.FindPotentialDuplicates("projId", 2, 1, false).Result;
Assert.That(result, Is.InstanceOf<ForbidResult>());
}

Expand Down
1 change: 1 addition & 0 deletions Backend.Tests/Models/ProjectTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public void TestClone()
DefinitionsEnabled = true,
GrammaticalInfoEnabled = true,
AutocompleteSetting = OffOnSetting.On,
ProtectedDataMergeAvoidEnabled = OffOnSetting.Off,
ProtectedDataOverrideEnabled = OffOnSetting.Off,
SemDomWritingSystem = new("fr", "Français"),
VernacularWritingSystem = new("en", "English", "Calibri"),
Expand Down
14 changes: 9 additions & 5 deletions Backend/Controllers/MergeController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,12 @@ public async Task<IActionResult> GraylistAdd(string projectId, [FromBody, BindRe
/// <param name="projectId"> Id of project in which to search the frontier for potential duplicates. </param>
/// <param name="maxInList"> Max number of words allowed within a list of potential duplicates. </param>
/// <param name="maxLists"> Max number of lists of potential duplicates. </param>
[HttpGet("finddups/{maxInList:int}/{maxLists:int}", Name = "FindPotentialDuplicates")]
/// <param name="ignoreProtected"> Whether to require each set to have at least one unprotected word. </param>
[HttpGet("finddups/{maxInList:int}/{maxLists:int}/{ignoreProtected:bool}", Name = "FindPotentialDuplicates")]
[ProducesResponseType(StatusCodes.Status200OK)]
[ProducesResponseType(StatusCodes.Status403Forbidden)]
public async Task<IActionResult> FindPotentialDuplicates(string projectId, int maxInList, int maxLists)
public async Task<IActionResult> FindPotentialDuplicates(
string projectId, int maxInList, int maxLists, bool ignoreProtected)
{
if (!await _permissionService.HasProjectPermission(
HttpContext, Permission.MergeAndReviewEntries, projectId))
Expand All @@ -132,14 +134,16 @@ public async Task<IActionResult> FindPotentialDuplicates(string projectId, int m
// Run the task without waiting for completion.
// This Task will be scheduled within the existing Async executor thread pool efficiently.
// See: https://stackoverflow.com/a/64614779/1398841
_ = Task.Run(() => GetDuplicatesThenSignal(projectId, maxInList, maxLists, userId));
_ = Task.Run(() => GetDuplicatesThenSignal(projectId, maxInList, maxLists, userId, ignoreProtected));

return Ok();
}

internal async Task<bool> GetDuplicatesThenSignal(string projectId, int maxInList, int maxLists, string userId)
internal async Task<bool> GetDuplicatesThenSignal(
string projectId, int maxInList, int maxLists, string userId, bool ignoreProtected = false)
{
var success = await _mergeService.GetAndStorePotentialDuplicates(projectId, maxInList, maxLists, userId);
var success = await _mergeService.GetAndStorePotentialDuplicates(
projectId, maxInList, maxLists, userId, ignoreProtected);
if (success)
{
await _notifyService.Clients.All.SendAsync(CombineHub.MethodSuccess, userId);
Expand Down
15 changes: 12 additions & 3 deletions Backend/Helper/DuplicateFinder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public DuplicateFinder(int maxInList, int maxLists, double maxScore)
/// each with multiple <see cref="Word"/>s having a common Vernacular.
/// </summary>
public async Task<List<List<Word>>> GetIdenticalVernWords(
List<Word> collection, Func<List<string>, Task<bool>> isUnavailableSet)
List<Word> collection, Func<List<string>, Task<bool>> isUnavailableSet, bool ignoreProtected = false)
{
var wordLists = new List<List<Word>> { Capacity = _maxLists };
while (collection.Count > 0 && wordLists.Count < _maxLists)
Expand All @@ -48,6 +48,13 @@ public async Task<List<List<Word>>> GetIdenticalVernWords(
continue;
}

if (ignoreProtected && word.Accessibility == Status.Protected
&& similarWords.All(w => w.Accessibility == Status.Protected))
{
// If all the words are protected, skip this set.
continue;
}

// Remove from collection and add to main list.
var idsToRemove = similarWords.Select(w => w.Id);
collection.RemoveAll(w => idsToRemove.Contains(w.Id));
Expand Down Expand Up @@ -81,11 +88,13 @@ await isUnavailableSet(ids[..Math.Min(ids.Count, _maxInList)]))
/// the outer list is ordered by similarity of the first two items in each inner List.
/// </returns>
public async Task<List<List<Word>>> GetSimilarWords(
List<Word> collection, Func<List<string>, Task<bool>> isUnavailableSet)
List<Word> collection, Func<List<string>, Task<bool>> isUnavailableSet, bool ignoreProtected = false)
{
var similarWordsLists = collection.AsParallel()
.Select(w => GetSimilarToWord(w, collection))
.Where(wl => wl.Count > 1).ToList();
.Where(
wl => wl.Count > 1 && (!ignoreProtected || wl.Any(w => w.Item2.Accessibility != Status.Protected)))
.ToList();

var best = new List<List<Word>>();
var bestIds = new List<string>();
Expand Down
3 changes: 2 additions & 1 deletion Backend/Interfaces/IMergeService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ public interface IMergeService
Task<bool> IsInMergeGraylist(string projectId, List<string> wordIds, string? userId = null);
Task<int> UpdateMergeBlacklist(string projectId);
Task<int> UpdateMergeGraylist(string projectId);
Task<bool> GetAndStorePotentialDuplicates(string projectId, int maxInList, int maxLists, string userId);
Task<bool> GetAndStorePotentialDuplicates(
string projectId, int maxInList, int maxLists, string userId, bool ignoreProtected = false);
List<List<Word>>? RetrieveDups(string userId);
Task<bool> HasGraylistEntries(string projectId, string? userId = null);
Task<List<List<Word>>> GetGraylistEntries(string projectId, int maxLists, string? userId = null);
Expand Down
7 changes: 7 additions & 0 deletions Backend/Models/Project.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ public class Project
[BsonRepresentation(BsonType.String)]
public OffOnSetting AutocompleteSetting { get; set; }

[Required]
[BsonElement("protectedDataMergeAvoidEnabled")]
[BsonRepresentation(BsonType.String)]
public OffOnSetting ProtectedDataMergeAvoidEnabled { get; set; }

[Required]
[BsonElement("protectedDataOverrideEnabled")]
[BsonRepresentation(BsonType.String)]
Expand Down Expand Up @@ -94,6 +99,7 @@ public Project()
DefinitionsEnabled = false;
GrammaticalInfoEnabled = false;
AutocompleteSetting = OffOnSetting.On;
ProtectedDataMergeAvoidEnabled = OffOnSetting.Off;
ProtectedDataOverrideEnabled = OffOnSetting.Off;
SemDomWritingSystem = new();
VernacularWritingSystem = new();
Expand All @@ -119,6 +125,7 @@ public Project Clone()
DefinitionsEnabled = DefinitionsEnabled,
GrammaticalInfoEnabled = GrammaticalInfoEnabled,
AutocompleteSetting = AutocompleteSetting,
ProtectedDataMergeAvoidEnabled = ProtectedDataMergeAvoidEnabled,
ProtectedDataOverrideEnabled = ProtectedDataOverrideEnabled,
SemDomWritingSystem = SemDomWritingSystem.Clone(),
VernacularWritingSystem = VernacularWritingSystem.Clone(),
Expand Down
1 change: 1 addition & 0 deletions Backend/Repositories/ProjectRepository.cs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ public async Task<ResultOfUpdate> Update(string projectId, Project project)
.Set(x => x.DefinitionsEnabled, project.DefinitionsEnabled)
.Set(x => x.GrammaticalInfoEnabled, project.GrammaticalInfoEnabled)
.Set(x => x.AutocompleteSetting, project.AutocompleteSetting)
.Set(x => x.ProtectedDataMergeAvoidEnabled, project.ProtectedDataMergeAvoidEnabled)
.Set(x => x.ProtectedDataOverrideEnabled, project.ProtectedDataOverrideEnabled)
.Set(x => x.SemDomWritingSystem, project.SemDomWritingSystem)
.Set(x => x.VernacularWritingSystem, project.VernacularWritingSystem)
Expand Down
10 changes: 5 additions & 5 deletions Backend/Services/MergeService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -379,14 +379,14 @@ public async Task<List<List<Word>>> GetGraylistEntries(
/// </summary>
/// <returns> bool: true if successful or false if a newer request has begun. </returns>
public async Task<bool> GetAndStorePotentialDuplicates(
string projectId, int maxInList, int maxLists, string userId)
string projectId, int maxInList, int maxLists, string userId, bool ignoreProtected = false)
{
var counter = Interlocked.Increment(ref _mergeCounter);
if (StoreDups(userId, counter, null) != counter)
{
return false;
}
var dups = await GetPotentialDuplicates(projectId, maxInList, maxLists, userId);
var dups = await GetPotentialDuplicates(projectId, maxInList, maxLists, userId, ignoreProtected);
// Store the potential duplicates for user to retrieve later.
return StoreDups(userId, counter, dups) == counter;
}
Expand All @@ -395,7 +395,7 @@ public async Task<bool> GetAndStorePotentialDuplicates(
/// Get Lists of potential duplicate <see cref="Word"/>s in specified <see cref="Project"/>'s frontier.
/// </summary>
private async Task<List<List<Word>>> GetPotentialDuplicates(
string projectId, int maxInList, int maxLists, string? userId = null)
string projectId, int maxInList, int maxLists, string? userId = null, bool ignoreProtected = false)
{
var dupFinder = new DuplicateFinder(maxInList, maxLists, 2);

Expand All @@ -405,13 +405,13 @@ async Task<bool> isUnavailableSet(List<string> wordIds) =>
(await IsInMergeGraylist(projectId, wordIds, userId));

// First pass, only look for words with identical vernacular.
var wordLists = await dupFinder.GetIdenticalVernWords(collection, isUnavailableSet);
var wordLists = await dupFinder.GetIdenticalVernWords(collection, isUnavailableSet, ignoreProtected);

// If no such sets found, look for similar words.
if (wordLists.Count == 0)
{
collection = await _wordRepo.GetFrontier(projectId);
wordLists = await dupFinder.GetSimilarWords(collection, isUnavailableSet);
wordLists = await dupFinder.GetSimilarWords(collection, isUnavailableSet, ignoreProtected);
}

return wordLists;
Expand Down
15 changes: 11 additions & 4 deletions docs/user_guide/docs/project.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,18 @@ entry, rather than creating a (mostly) duplicate to something previously entered
(This does not affect spelling suggestions for the gloss, since those suggestions are based on a dictionary independent
of existing project data.)

#### Protected Data Override
#### Protected Data Management

The default setting is Off. Change this to On to allow project users in Merge Duplicates to override the
[protection](goals.md#protected-entries-and-senses) of words and senses that were imported with data not handled by The
Combine.
This section has two Off/On setting toggles related to the [protection](goals.md#protected-entries-and-senses) of words
and senses that were imported with data not handled by The Combine. Both settings are off by default.

Turn on "Avoid protected sets in Merge Duplicates" to make the Merge Duplicates tool only show sets of potential
duplicates with at least one word that isn't protected. This will avoid sets of mature entries imported from FieldWorks
and promote merging entries collected in The Combine.

Turn on "Allow data protection override in Merge Duplicates" to allow project users in Merge Duplicates to manually
override protection of words and senses. If anybody tries to merge or delete a protected entry or senses, The Combine
warns them of the fields that will be lost.

#### Archive Project

Expand Down
15 changes: 11 additions & 4 deletions docs/user_guide/docs/project.zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,18 @@ Combine 顶部应用框的中间位置看到一个齿轮图标或该项目名。

(这不影响对注释的拼写建议,因为这些建议是基于独立于现有项目数据的字典的)。

#### Protected Data Override
#### Protected Data Management

The default setting is Off. Change this to On to allow project users in Merge Duplicates to override the
[protection](goals.md#protected-entries-and-senses) of words and senses that were imported with data not handled by The
Combine.
This section has two Off/On setting toggles related to the [protection](goals.md#protected-entries-and-senses) of words
and senses that were imported with data not handled by The Combine. Both settings are off by default.

Turn on "Avoid protected sets in Merge Duplicates" to make the Merge Duplicates tool only show sets of potential
duplicates with at least one word that isn't protected. This will avoid sets of mature entries imported from FieldWorks
and promote merging entries collected in The Combine.

Turn on "Allow data protection override in Merge Duplicates" to allow project users in Merge Duplicates to manually
override protection of words and senses. If anybody tries to merge or delete a protected entry or senses, The Combine
warns them of the fields that will be lost.

#### 存档项目

Expand Down
13 changes: 10 additions & 3 deletions public/locales/en/translation.json
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,16 @@
"on": "On",
"hint": "In Data Entry, suggest existing Vernaculars similar to the Vernacular being typed."
},
"protectedDataOverride": {
"hint": "In Merge Duplicates, allow overriding protection of protected words and senses.",
"label": "Protected Data Override"
"protectedData": {
"label": "Protected Data Management",
"avoidInMerge": {
"hint": "In Merge Duplicates, skip sets that only have protected words. This will avoid sets of mature entries imported from FieldWorks and promote merging entries collected in The Combine.",
"label": "Avoid protected sets in Merge Duplicates"
},
"override": {
"hint": "In Merge Duplicates, allow overriding protection of protected words and senses.",
"label": "Allow data protection override in Merge Duplicates"
}
},
"invite": {
"inviteByEmailLabel": "Invite by Email",
Expand Down
37 changes: 34 additions & 3 deletions src/api/api/merge-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,15 @@ export const MergeApiAxiosParamCreator = function (
* @param {string} projectId
* @param {number} maxInList
* @param {number} maxLists
* @param {boolean} ignoreProtected
* @param {*} [options] Override http request option.
* @throws {RequiredError}
*/
findPotentialDuplicates: async (
projectId: string,
maxInList: number,
maxLists: number,
ignoreProtected: boolean,
options: any = {}
): Promise<RequestArgs> => {
// verify required parameter 'projectId' is not null or undefined
Expand All @@ -127,11 +129,21 @@ export const MergeApiAxiosParamCreator = function (
assertParamExists("findPotentialDuplicates", "maxInList", maxInList);
// verify required parameter 'maxLists' is not null or undefined
assertParamExists("findPotentialDuplicates", "maxLists", maxLists);
// verify required parameter 'ignoreProtected' is not null or undefined
assertParamExists(
"findPotentialDuplicates",
"ignoreProtected",
ignoreProtected
);
const localVarPath =
`/v1/projects/{projectId}/merge/finddups/{maxInList}/{maxLists}`
`/v1/projects/{projectId}/merge/finddups/{maxInList}/{maxLists}/{ignoreProtected}`
.replace(`{${"projectId"}}`, encodeURIComponent(String(projectId)))
.replace(`{${"maxInList"}}`, encodeURIComponent(String(maxInList)))
.replace(`{${"maxLists"}}`, encodeURIComponent(String(maxLists)));
.replace(`{${"maxLists"}}`, encodeURIComponent(String(maxLists)))
.replace(
`{${"ignoreProtected"}}`,
encodeURIComponent(String(ignoreProtected))
);
// use dummy base URL string because the URL constructor only accepts absolute URLs.
const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL);
let baseOptions;
Expand Down Expand Up @@ -519,13 +531,15 @@ export const MergeApiFp = function (configuration?: Configuration) {
* @param {string} projectId
* @param {number} maxInList
* @param {number} maxLists
* @param {boolean} ignoreProtected
* @param {*} [options] Override http request option.
* @throws {RequiredError}
*/
async findPotentialDuplicates(
projectId: string,
maxInList: number,
maxLists: number,
ignoreProtected: boolean,
options?: any
): Promise<
(axios?: AxiosInstance, basePath?: string) => AxiosPromise<void>
Expand All @@ -535,6 +549,7 @@ export const MergeApiFp = function (configuration?: Configuration) {
projectId,
maxInList,
maxLists,
ignoreProtected,
options
);
return createRequestFunction(
Expand Down Expand Up @@ -744,17 +759,25 @@ export const MergeApiFactory = function (
* @param {string} projectId
* @param {number} maxInList
* @param {number} maxLists
* @param {boolean} ignoreProtected
* @param {*} [options] Override http request option.
* @throws {RequiredError}
*/
findPotentialDuplicates(
projectId: string,
maxInList: number,
maxLists: number,
ignoreProtected: boolean,
options?: any
): AxiosPromise<void> {
return localVarFp
.findPotentialDuplicates(projectId, maxInList, maxLists, options)
.findPotentialDuplicates(
projectId,
maxInList,
maxLists,
ignoreProtected,
options
)
.then((request) => request(axios, basePath));
},
/**
Expand Down Expand Up @@ -903,6 +926,13 @@ export interface MergeApiFindPotentialDuplicatesRequest {
* @memberof MergeApiFindPotentialDuplicates
*/
readonly maxLists: number;

/**
*
* @type {boolean}
* @memberof MergeApiFindPotentialDuplicates
*/
readonly ignoreProtected: boolean;
}

/**
Expand Down Expand Up @@ -1074,6 +1104,7 @@ export class MergeApi extends BaseAPI {
requestParameters.projectId,
requestParameters.maxInList,
requestParameters.maxLists,
requestParameters.ignoreProtected,
options
)
.then((request) => request(this.axios, this.basePath));
Expand Down
6 changes: 6 additions & 0 deletions src/api/models/project.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ export interface Project {
* @memberof Project
*/
autocompleteSetting: OffOnSetting;
/**
*
* @type {OffOnSetting}
* @memberof Project
*/
protectedDataMergeAvoidEnabled: OffOnSetting;
/**
*
* @type {OffOnSetting}
Expand Down
6 changes: 4 additions & 2 deletions src/backend/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -372,10 +372,12 @@ export async function graylistAdd(wordIds: string[]): Promise<void> {
/** Start finding list of potential duplicates for merging. */
export async function findDuplicates(
maxInList: number,
maxLists: number
maxLists: number,
ignoreProtected = false
): Promise<void> {
const projectId = LocalStorage.getProjectId();
await mergeApi.findPotentialDuplicates(
{ projectId: LocalStorage.getProjectId(), maxInList, maxLists },
{ ignoreProtected, maxInList, maxLists, projectId },
defaultOptions()
);
}
Expand Down
Loading
Loading