Skip to content

[Linter] Linting R Code for Reproducibility Issues #1487

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
30 changes: 30 additions & 0 deletions src/linter/linter-format.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import type { FlowrSearchLike } from '../search/flowr-search-builder';
import type { FlowrSearchElement, FlowrSearchElements } from '../search/flowr-search';
import type { MergeableRecord } from '../util/objects';
import type { GeneratorNames } from '../search/search-executor/search-generators';
import type { TransformerNames } from '../search/search-executor/search-transformer';
import type { ParentInformation } from '../r-bridge/lang-4.x/ast/model/processing/decorate';
import type { LintingRuleConfig, LintingRuleNames } from './linter-rules';

export interface LintingRule<Result extends LintingResult, Config extends MergeableRecord = never, Info = ParentInformation, Elements extends FlowrSearchElement<Info>[] = FlowrSearchElement<Info>[]> {
readonly createSearch: (config: Config) => FlowrSearchLike<Info, GeneratorNames, TransformerNames[], FlowrSearchElements<Info, Elements>>
// between these two, there's a chance for the search for multiple rules to be combined or optimized maybe
readonly processSearchResult: (elements: FlowrSearchElements<Info, Elements>, config: Config) => Result[]
readonly prettyPrint: (result: Result) => string
readonly defaultConfig: NoInfer<Config>
}

export interface LintingResult {
readonly certainty: LintingCertainty
}

export interface ConfiguredLintingRule<Name extends LintingRuleNames = LintingRuleNames> {
readonly name: Name
readonly config: LintingRuleConfig<Name>
}


export enum LintingCertainty {
Maybe = 'maybe',
Definitely = 'definitely'
}
11 changes: 11 additions & 0 deletions src/linter/linter-rules.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { R1_DEPRECATED_FUNCTIONS } from './rules/1-deprecated-functions';
import type { LintingRule } from './linter-format';

export const LintingRules = {
'deprecated-functions': R1_DEPRECATED_FUNCTIONS
} as const;

export type LintingRuleNames = keyof typeof LintingRules

export type LintingRuleResult<Name extends LintingRuleNames> = typeof LintingRules[Name] extends LintingRule<infer Result, infer _Config, infer _Info, infer _Elements> ? Result : never
export type LintingRuleConfig<Name extends LintingRuleNames> = typeof LintingRules[Name] extends LintingRule<infer _Result, infer Config, infer _Info, infer _Elements> ? Config : never
46 changes: 46 additions & 0 deletions src/linter/rules/1-deprecated-functions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import type { LintingResult, LintingRule } from '../linter-format';
import { LintingCertainty } from '../linter-format';
import { Q } from '../../search/flowr-search-builder';
import type { MergeableRecord } from '../../util/objects';
import { formatRange } from '../../util/mermaid/dfg';
import { Enrichment, enrichmentContent } from '../../search/search-executor/search-enrichers';
import type { SourceRange } from '../../util/range';
import type { Identifier } from '../../dataflow/environments/identifier';

export interface DeprecatedFunctionsResult extends LintingResult {
function: string
range: SourceRange
}

export interface DeprecatedFunctionsConfig extends MergeableRecord {
deprecatedFunctions: string[]
}

export const R1_DEPRECATED_FUNCTIONS = {
createSearch: (_config) => Q.all().with(Enrichment.CallTargets),
processSearchResult: (elements, config) => elements.getElements()
.flatMap(element => {
const targets = enrichmentContent(element, Enrichment.CallTargets).targets;
// if there is a call target that is not built-in (ie a custom function), we don't want to mark it as deprecated
// eventually we'd want to solve this with an argument to the CallTargets enrichment like satisfiesCallTargets does!
if(targets.some(t => typeof t !== 'string')) {
return [];
}
return targets.map(target => {
return ({
range: element.node.info.fullRange as SourceRange,
target: target as Identifier
});
});
})
.filter(element => config.deprecatedFunctions.includes(element.target))
.map(element => ({
certainty: LintingCertainty.Definitely,
function: element.target,
range: element.range
})),
prettyPrint: result => `Function ${result.function} at ${formatRange(result.range)}`,
defaultConfig: {
deprecatedFunctions: ['all_equal', 'arrange_all', 'distinct_all', 'filter_all', 'group_by_all', 'summarise_all', 'mutate_all', 'select_all', 'vars', 'all_vars', 'id', 'failwith', 'select_vars', 'rename_vars', 'select_var', 'current_vars', 'bench_tbls', 'compare_tbls', 'compare_tbls2', 'eval_tbls', 'eval_tbls2', 'location', 'changes', 'combine', 'do', 'funs', 'add_count_', 'add_tally_', 'arrange_', 'count_', 'distinct_', 'do_', 'filter_', 'funs_', 'group_by_', 'group_indices_', 'mutate_', 'tally_', 'transmute_', 'rename_', 'rename_vars_', 'select_', 'select_vars_', 'slice_', 'summarise_', 'summarize_', 'summarise_each', 'src_local', 'tbl_df', 'add_rownames', 'group_nest', 'group_split', 'with_groups', 'nest_by', 'progress_estimated', 'recode', 'sample_n', 'top_n', 'transmute', 'fct_explicit_na', 'aes_', 'aes_auto', 'annotation_logticks', 'is.Coord', 'coord_flip', 'coord_map', 'is.facet', 'fortify', 'is.ggproto', 'guide_train', 'is.ggplot', 'qplot', 'is.theme', 'gg_dep', 'liply', 'isplit2', 'list_along', 'cross', 'invoke', 'at_depth', 'prepend', 'rerun', 'splice', '`%@%`', 'rbernoulli', 'rdunif', 'when', 'update_list', 'map_raw', 'accumulate', 'reduce_right', 'flatten', 'map_dfr', 'as_vector', 'transpose', 'melt_delim', 'melt_fwf', 'melt_table', 'read_table2', 'str_interp', 'as_tibble', 'data_frame', 'tibble_', 'data_frame_', 'lst_', 'as_data_frame', 'as.tibble', 'frame_data', 'trunc_mat', 'is.tibble', 'tidy_names', 'set_tidy_names', 'repair_names', 'extract_numeric', 'complete_', 'drop_na_', 'expand_', 'crossing_', 'nesting_', 'extract_', 'fill_', 'gather_', 'nest_', 'separate_rows_', 'separate_', 'spread_', 'unite_', 'unnest_', 'extract', 'gather', 'nest_legacy', 'separate_rows', 'separate', 'spread',]
}
} as const satisfies LintingRule<DeprecatedFunctionsResult, DeprecatedFunctionsConfig>;
Original file line number Diff line number Diff line change
Expand Up @@ -140,5 +140,9 @@ export const CallContextQueryDefinition = {
includeUndefinedFiles: Joi.boolean().optional().description('If `fileFilter` is set, but a nodes `file` attribute is `undefined`, should we include it in the results? Defaults to `true`.')
}).optional().description('Filter that, when set, a node\'s file attribute must match to be considered'),
linkTo: Joi.alternatives(CallContextQueryLinkTo, Joi.array().items(CallContextQueryLinkTo)).optional().description('Links the current call to the last call of the given kind. This way, you can link a call like `points` to the latest graphics plot etc.')
}).description('Call context query used to find calls in the dataflow graph')
}).description('Call context query used to find calls in the dataflow graph'),
flattenInvolvedNodes: (queryResults: BaseQueryResult) => {
const out = queryResults as CallContextQueryResult;
return Object.values(out.kinds).flatMap(({ subkinds }) => Object.values(subkinds).flatMap(subkinds => subkinds.map(subkind => subkind.id)));
}
} as const;
7 changes: 6 additions & 1 deletion src/queries/catalog/cluster-query/cluster-query-format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import type { DataflowGraphClusters } from '../../../dataflow/cluster';
import { executeDataflowClusterQuery } from './cluster-query-executor';
import { graphToMermaidUrl } from '../../../util/mermaid/dfg';
import { summarizeIdsIfTooLong } from '../../query-print';
import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id';

/**
* Calculates and returns all clusters encountered in the dataflow graph.
Expand Down Expand Up @@ -39,5 +40,9 @@ export const ClusterQueryDefinition = {
},
schema: Joi.object({
type: Joi.string().valid('dataflow-cluster').required().description('The type of the query.'),
}).description('The cluster query calculates and returns all clusters in the dataflow graph.')
}).description('The cluster query calculates and returns all clusters in the dataflow graph.'),
flattenInvolvedNodes: (queryResults: BaseQueryResult): NodeId[] => {
const out = queryResults as QueryResults<'dataflow-cluster'>['dataflow-cluster'];
return out.clusters.flatMap(({ members }) => members);
}
} as const satisfies SupportedQuery<'dataflow-cluster'>;
3 changes: 2 additions & 1 deletion src/queries/catalog/config-query/config-query-format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,6 @@ export const ConfigQueryDefinition = {
},
schema: Joi.object({
type: Joi.string().valid('config').required().description('The type of the query.'),
}).description('The config query retrieves the current configuration of the flowR instance.')
}).description('The config query retrieves the current configuration of the flowR instance.'),
flattenInvolvedNodes: () => []
} as const;
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ export const DataflowLensQueryDefinition = {
},
schema: Joi.object({
type: Joi.string().valid('dataflow-lens').required().description('The type of the query.'),
}).description('The dataflow-lens query returns a simplified view on the dataflow graph')
}).description('The dataflow-lens query returns a simplified view on the dataflow graph'),
flattenInvolvedNodes: () => []
} as const satisfies SupportedQuery<'dataflow-lens'>;
11 changes: 10 additions & 1 deletion src/queries/catalog/dataflow-query/dataflow-query-format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { printAsMs } from '../../../util/time';
import { graphToMermaidUrl } from '../../../util/mermaid/dfg';
import Joi from 'joi';
import type { QueryResults, SupportedQuery } from '../../query';
import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id';

/**
* Simple re-returns the dataflow graph of the analysis.
Expand All @@ -29,5 +30,13 @@ export const DataflowQueryDefinition = {
},
schema: Joi.object({
type: Joi.string().valid('dataflow').required().description('The type of the query.'),
}).description('The dataflow query simply returns the dataflow graph, there is no need to pass it multiple times!')
}).description('The dataflow query simply returns the dataflow graph, there is no need to pass it multiple times!'),
flattenInvolvedNodes: queryResults => {
const flattened: NodeId[] = [];
const out = queryResults as QueryResults<'dataflow'>['dataflow'];
for(const id of out.graph.idMap?.keys() ?? []) {
flattened.push(id);
}
return flattened;
}
} as const satisfies SupportedQuery<'dataflow'>;
Original file line number Diff line number Diff line change
Expand Up @@ -243,5 +243,14 @@ export const DependenciesQueryDefinition = {
sourceFunctions: functionInfoSchema.description('The set of source functions to search for.'),
readFunctions: functionInfoSchema.description('The set of data reading functions to search for.'),
writeFunctions: functionInfoSchema.description('The set of data writing functions to search for.'),
}).description('The dependencies query retrieves and returns the set of all dependencies in the dataflow graph, which includes libraries, sourced files, read data, and written data.')
}).description('The dependencies query retrieves and returns the set of all dependencies in the dataflow graph, which includes libraries, sourced files, read data, and written data.'),
flattenInvolvedNodes: (queryResults: BaseQueryResult): NodeId[] => {
const out = queryResults as QueryResults<'dependencies'>['dependencies'];
return [
...out.libraries.map(library => library.nodeId),
...out.sourcedFiles.map(sourced => sourced.nodeId),
...out.readData.map(read => read.nodeId),
...out.writtenData.map(write => write.nodeId)
];
}
} as const satisfies SupportedQuery<'dependencies'>;
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,6 @@ export const HappensBeforeQueryDefinition = {
type: Joi.string().valid('happens-before').required().description('The type of the query.'),
a: Joi.string().required().description('The first slicing criterion.'),
b: Joi.string().required().description('The second slicing criterion.')
}).description('Happens-Before tracks whether a always happens before b.')
}).description('Happens-Before tracks whether a always happens before b.'),
flattenInvolvedNodes: () => []
} as const satisfies SupportedQuery<'happens-before'>;
3 changes: 2 additions & 1 deletion src/queries/catalog/id-map-query/id-map-query-format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ export const IdMapQueryDefinition = {
},
schema: Joi.object({
type: Joi.string().valid('id-map').required().description('The type of the query.'),
}).description('The id map query retrieves the id map from the normalized AST.')
}).description('The id map query retrieves the id map from the normalized AST.'),
flattenInvolvedNodes: () => []
} as const satisfies SupportedQuery<'id-map'>;
6 changes: 5 additions & 1 deletion src/queries/catalog/lineage-query/lineage-query-format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,9 @@ export const LineageQueryDefinition = {
schema: Joi.object({
type: Joi.string().valid('lineage').required().description('The type of the query.'),
criterion: Joi.string().required().description('The slicing criterion of the node to get the lineage of.')
}).description('Lineage query used to find the lineage of a node in the dataflow graph')
}).description('Lineage query used to find the lineage of a node in the dataflow graph'),
flattenInvolvedNodes: (queryResults: BaseQueryResult): NodeId[] => {
const out = queryResults as QueryResults<'lineage'>['lineage'];
return Object.values(out.lineages).flatMap(lineage => [...lineage]);
}
} as const satisfies SupportedQuery<'lineage'>;
38 changes: 38 additions & 0 deletions src/queries/catalog/linter-query/linter-query-executor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import type { BasicQueryData } from '../../base-query-format';
import type { LinterQuery, LinterQueryResult } from './linter-query-format';
import { runSearch } from '../../../search/flowr-search-executor';
import { FlowrSearchElements } from '../../../search/flowr-search';
import type { LintingRuleNames, LintingRuleResult } from '../../../linter/linter-rules';
import { LintingRules } from '../../../linter/linter-rules';
import { log } from '../../../util/log';
import type { ConfiguredLintingRule } from '../../../linter/linter-format';


export function executeLinterQuery({ ast, dataflow }: BasicQueryData, queries: readonly LinterQuery[]): LinterQueryResult {
const flattened = queries.flatMap(q => q.rules ?? (Object.keys(LintingRules) as LintingRuleNames[]));
const distinct = new Set(flattened);
if(distinct.size !== flattened.length) {
const pretty = [...distinct].filter(r => flattened.indexOf(r) !== flattened.lastIndexOf(r)).map(r => typeof r === 'string' ? r : r.name).join(', ');
log.warn(`Linter query collection contains duplicate rules ${pretty}, only linting for each rule once`);
}

const results: { [L in LintingRuleNames]?: LintingRuleResult<L>[] } = {};

const start = Date.now();

for(const entry of distinct) {
const ruleName = typeof entry === 'string' ? entry : entry.name;
const rule = LintingRules[ruleName];
const config = (entry as ConfiguredLintingRule)?.config ?? rule.defaultConfig;
const ruleSearch = rule.createSearch(config);
const searchResult = runSearch(ruleSearch, { normalize: ast, dataflow });
results[ruleName] = rule.processSearchResult(new FlowrSearchElements(searchResult), config);
}

return {
'.meta': {
timing: Date.now() - start
},
results
};
}
55 changes: 55 additions & 0 deletions src/queries/catalog/linter-query/linter-query-format.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import type { BaseQueryFormat, BaseQueryResult } from '../../base-query-format';
import type { QueryResults, SupportedQuery } from '../../query';
import { bold } from '../../../util/ansi';
import { printAsMs } from '../../../util/time';
import Joi from 'joi';
import { executeLinterQuery } from './linter-query-executor';
import type { LintingRuleNames, LintingRuleResult } from '../../../linter/linter-rules';
import { LintingRules } from '../../../linter/linter-rules';
import type { ConfiguredLintingRule } from '../../../linter/linter-format';
import { LintingCertainty } from '../../../linter/linter-format';

export interface LinterQuery extends BaseQueryFormat {
readonly type: 'linter';
/**
* The rules to lint for. If unset, all rules will be included.
*/
readonly rules?: (LintingRuleNames | ConfiguredLintingRule)[];
}

export interface LinterQueryResult extends BaseQueryResult {
readonly results: { [L in LintingRuleNames]?: LintingRuleResult<L>[] }
}

export const LinterQueryDefinition = {
executor: executeLinterQuery,
asciiSummarizer: (formatter, _processed, queryResults, result) => {
const out = queryResults as QueryResults<'linter'>['linter'];
result.push(`Query: ${bold('linter', formatter)} (${printAsMs(out['.meta'].timing, 0)})`);
for(const [ruleName, results] of Object.entries(out.results)) {
const rule = LintingRules[ruleName as LintingRuleNames];
result.push(` ╰ ${ruleName}:`);
for(const certainty of [LintingCertainty.Definitely, LintingCertainty.Maybe]) {
const certaintyResults = results.filter(r => r.certainty === certainty);
if(certaintyResults.length) {
result.push(` ╰ ${certainty}:`);
for(const res of certaintyResults) {
result.push(` ╰ ${rule.prettyPrint(res)}`);
}
}
}
}
return true;
},
schema: Joi.object({
type: Joi.string().valid('linter').required().description('The type of the query.'),
rules: Joi.array().items(
Joi.string().valid(...Object.keys(LintingRules)),
Joi.object({
name: Joi.string().valid(...Object.keys(LintingRules)).required(),
config: Joi.object()
})
).description('The rules to lint for. If unset, all rules will be included.')
}).description('The linter query lints for the given set of rules and returns the result.'),
flattenInvolvedNodes: () => []
} as const satisfies SupportedQuery<'linter'>;
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,6 @@ export const LocationMapQueryDefinition = {
},
schema: Joi.object({
type: Joi.string().valid('location-map').required().description('The type of the query.'),
}).description('The location map query retrieves the location of every id in the ast.')
}).description('The location map query retrieves the location of every id in the ast.'),
flattenInvolvedNodes: () => []
} as const;
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@ export const NormalizedAstQueryDefinition = {
},
schema: Joi.object({
type: Joi.string().valid('normalized-ast').required().description('The type of the query.'),
}).description('The normalized AST query simply returns the normalized AST, there is no need to pass it multiple times!')
}).description('The normalized AST query simply returns the normalized AST, there is no need to pass it multiple times!'),
flattenInvolvedNodes: () => []
} as const satisfies SupportedQuery<'normalized-ast'>;
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import type { BasicQueryData } from '../../base-query-format';

export function executeProjectQuery({ dataflow }: BasicQueryData, queries: readonly ProjectQuery[]): ProjectQueryResult {
if(queries.length !== 1) {
log.warn('Id-Map query expects only up to one query, but got', queries.length);
log.warn('Project query expects only up to one query, but got', queries.length);
}
return {
'.meta': {
Expand Down
3 changes: 2 additions & 1 deletion src/queries/catalog/project-query/project-query-format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,6 @@ export const ProjectQueryDefinition = {
},
schema: Joi.object({
type: Joi.string().valid('project').required().description('The type of the query.'),
}).description('The project query provides information on the analyzed project.')
}).description('The project query provides information on the analyzed project.'),
flattenInvolvedNodes: () => []
} as const satisfies SupportedQuery<'project'>;
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,6 @@ export const ResolveValueQueryDefinition = {
schema: Joi.object({
type: Joi.string().valid('resolve-value').required().description('The type of the query.'),
criteria: Joi.array().items(Joi.string()).min(1).required().description('The slicing criteria to use.'),
}).description('The resolve value query used to get definitions of an identifier')
}).description('The resolve value query used to get definitions of an identifier'),
flattenInvolvedNodes: () => []
} as const satisfies SupportedQuery<'resolve-value'>;
6 changes: 5 additions & 1 deletion src/queries/catalog/search-query/search-query-format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,9 @@ export const SearchQueryDefinition = {
schema: Joi.object({
type: Joi.string().valid('search').required().description('The type of the query.'),
search: Joi.object().required().description('The search query to execute.')
}).description('The search query searches the normalized AST and dataflow graph for nodes that match the given search query.')
}).description('The search query searches the normalized AST and dataflow graph for nodes that match the given search query.'),
flattenInvolvedNodes: (queryResults: BaseQueryResult): NodeId[] => {
const out = queryResults as QueryResults<'search'>['search'];
return out.results.flatMap(({ ids }) => ids);
}
} as const satisfies SupportedQuery<'search'>;
Loading
Loading