Skip to content

Commit 9b3d9d0

Browse files
authored
feat(lax-source): provide replacement options (#1625)
* feat: provide replacement options * refactor: rename apply replacements for clarity
1 parent 9bf0dd2 commit 9b3d9d0

File tree

3 files changed

+81
-32
lines changed

3 files changed

+81
-32
lines changed

src/config.ts

+52-27
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,54 @@ export enum DropPathsOption {
4242
Once = 'once',
4343
/** try to drop every folder of the path */
4444
All = 'all'
45-
};
45+
}
46+
47+
export interface FlowrLaxSourcingOptions extends MergeableRecord {
48+
/**
49+
* search for filenames matching in the lowercase
50+
*/
51+
readonly ignoreCapitalization: boolean
52+
/**
53+
* try to infer the working directory from the main or any script to analyze.
54+
*/
55+
readonly inferWorkingDirectory: InferWorkingDirectory
56+
/**
57+
* Additionally search in these paths
58+
*/
59+
readonly searchPath: readonly string[]
60+
/**
61+
* Allow to drop the first or all parts of the sourced path,
62+
* if it is relative.
63+
*/
64+
readonly dropPaths: DropPathsOption
65+
/**
66+
* How often the same file can be sourced within a single run?
67+
* Please be aware: in case of cyclic sources this may not reach a fixpoint so give this a sensible limit.
68+
*/
69+
readonly repeatedSourceLimit?: number
70+
/**
71+
* sometimes files may have a different name in the source call (e.g., due to later replacements),
72+
* with this setting you can provide a list of replacements to apply for each sourced file.
73+
* Every replacement consists of a record that maps a regex to a replacement string.
74+
*
75+
* @example
76+
* ```ts
77+
* [
78+
* { }, // no replacement -> still try the original name/path
79+
* { '.*\\.R$': 'main.R' }, // replace all .R files with main.R
80+
* { '\s' : '_' }, // replace all spaces with underscores
81+
* { '\s' : '-', 'oo': 'aa' }, // replace all spaces with dashes and oo with aa
82+
* ]
83+
* ```
84+
*
85+
* Given a `source("foo bar.R")` this configuration will search for (in this order):
86+
* - `foo bar.R` (original name)
87+
* - `main.R` (replaced with main.R)
88+
* - `foo_bar.R` (replaced spaces)
89+
* - `foo-bar.R` (replaced spaces and oo)
90+
*/
91+
readonly applyReplacements?: Record<string, string>[]
92+
}
4693

4794
export interface FlowrConfigOptions extends MergeableRecord {
4895
/**
@@ -97,30 +144,7 @@ export interface FlowrConfigOptions extends MergeableRecord {
97144
* based on the configurations you give it.
98145
* This option is only in effect if {@link ignoreSourceCalls} is set to false.
99146
*/
100-
readonly resolveSource?: {
101-
/**
102-
* search for filenames matching in the lowercase
103-
*/
104-
readonly ignoreCapitalization: boolean
105-
/**
106-
* try to infer the working directory from the main or any script to analyze.
107-
*/
108-
readonly inferWorkingDirectory: InferWorkingDirectory
109-
/**
110-
* Additionally search in these paths
111-
*/
112-
readonly searchPath: readonly string[]
113-
/**
114-
* Allow to drop the first or all parts of the sourced path,
115-
* if it is relative.
116-
*/
117-
readonly dropPaths: DropPathsOption
118-
/**
119-
* How often the same file can be sourced within a single run?
120-
* Please be aware: in case of cyclic sources this may not reach a fixpoint so give this a sensible limit.
121-
*/
122-
readonly repeatedSourceLimit?: number
123-
},
147+
readonly resolveSource?: FlowrLaxSourcingOptions,
124148
/**
125149
* The configuration for flowR's slicer
126150
*/
@@ -146,7 +170,7 @@ export interface TreeSitterEngineConfig extends MergeableRecord {
146170
/**
147171
* Whether to use the lax parser for parsing R code (allowing for syntax errors). If this is undefined, the strict parser will be used.
148172
*/
149-
readonly lax?: boolean
173+
readonly lax?: boolean
150174
}
151175

152176
export interface RShellEngineConfig extends MergeableRecord {
@@ -231,7 +255,8 @@ export const flowrConfigFileSchema = Joi.object({
231255
ignoreCapitalization: Joi.boolean().description('Search for filenames matching in the lowercase.'),
232256
inferWorkingDirectory: Joi.string().valid(...Object.values(InferWorkingDirectory)).description('Try to infer the working directory from the main or any script to analyze.'),
233257
searchPath: Joi.array().items(Joi.string()).description('Additionally search in these paths.'),
234-
repeatedSourceLimit: Joi.number().optional().description('How often the same file can be sourced within a single run? Please be aware: in case of cyclic sources this may not reach a fixpoint so give this a sensible limit.')
258+
repeatedSourceLimit: Joi.number().optional().description('How often the same file can be sourced within a single run? Please be aware: in case of cyclic sources this may not reach a fixpoint so give this a sensible limit.'),
259+
applyReplacements: Joi.array().items(Joi.object()).description('Provide name replacements for loaded files')
235260
}).optional().description('If lax source calls are active, flowR searches for sourced files much more freely, based on the configurations you give it. This option is only in effect if `ignoreSourceCalls` is set to false.'),
236261
slicer: Joi.object({
237262
threshold: Joi.number().optional().description('The maximum number of iterations to perform on a single function call during slicing.')

src/dataflow/internal/process/functions/call/built-in/built-in-source.ts

+21-3
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import { dataflowLogger } from '../../../../../logger';
2222
import { RType } from '../../../../../../r-bridge/lang-4.x/ast/model/type';
2323
import { overwriteEnvironment } from '../../../../../environments/overwrite';
2424
import type { NoInfo } from '../../../../../../r-bridge/lang-4.x/ast/model/model';
25-
import { expensiveTrace, log } from '../../../../../../util/log';
25+
import { expensiveTrace, log, LogLevel } from '../../../../../../util/log';
2626
import fs from 'fs';
2727
import { normalize, normalizeTreeSitter } from '../../../../../../r-bridge/lang-4.x/ast/parser/json/parser';
2828
import { RShellExecutor } from '../../../../../../r-bridge/shell-executor';
@@ -66,6 +66,16 @@ function returnPlatformPath(p: string): string {
6666
return p.replaceAll(AnyPathSeparator, path.sep);
6767
}
6868

69+
function applyReplacements(path: string, replacements: readonly Record<string, string>[]): string[] {
70+
const results = [];
71+
for(const replacement of replacements) {
72+
const newPath = Object.entries(replacement).reduce((acc, [key, value]) => acc.replace(new RegExp(key, 'g'), value), path);
73+
results.push(newPath);
74+
}
75+
76+
return results;
77+
}
78+
6979
/**
7080
* Tries to find sourced by a source request and returns the first path that exists
7181
* @param seed - the path originally requested in the `source` call
@@ -80,7 +90,7 @@ export function findSource(seed: string, data: { referenceChain: readonly RParse
8090
...(inferWdFromScript(config?.inferWorkingDirectory ?? InferWorkingDirectory.No, data.referenceChain))
8191
];
8292

83-
const tryPaths = [seed];
93+
let tryPaths = [seed];
8494
switch(config?.dropPaths ?? DropPathsOption.No) {
8595
case DropPathsOption.Once: {
8696
const first = platformBasename(seed);
@@ -103,6 +113,12 @@ export function findSource(seed: string, data: { referenceChain: readonly RParse
103113
break;
104114
}
105115

116+
if(config?.applyReplacements) {
117+
const r = config.applyReplacements;
118+
tryPaths = tryPaths.flatMap(t => applyReplacements(t, r));
119+
}
120+
121+
106122
const found: string[] = [];
107123
for(const explore of [undefined, ...explorePaths]) {
108124
for(const tryPath of tryPaths) {
@@ -115,7 +131,9 @@ export function findSource(seed: string, data: { referenceChain: readonly RParse
115131
}
116132
}
117133
}
118-
log.info(`Found sourced file ${JSON.stringify(seed)} at ${JSON.stringify(found)}`);
134+
if(log.settings.minLevel >= LogLevel.Info) {
135+
log.info(`Found sourced file ${JSON.stringify(seed)} at ${JSON.stringify(found)}`);
136+
}
119137
return found;
120138
}
121139

test/functionality/slicing/configuration/source-finding.test.ts

+8-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ describe('source finding', () => {
2020
[`a${path.sep}b.txt`]: 'f <- function() { function() 3 }',
2121
'c.txt': 'f <- function() { x <<- 3 }',
2222
[`x${path.sep}y${path.sep}z${path.sep}b.txt`]: 'x <- 3',
23-
[`x${path.sep}y${path.sep}b.txt`]: 'x <- 3'
23+
[`x${path.sep}y${path.sep}b.txt`]: 'x <- 3',
24+
'with-spaces.txt': 'x <- 3',
2425
};
2526
beforeAll(() => {
2627
setSourceProvider(requestProviderFromText(sources));
@@ -31,7 +32,11 @@ describe('source finding', () => {
3132
dropPaths: DropPathsOption.All,
3233
ignoreCapitalization: true,
3334
inferWorkingDirectory: InferWorkingDirectory.ActiveScript,
34-
searchPath: []
35+
searchPath: [],
36+
applyReplacements: [
37+
{ },
38+
{ ' ': '-' }
39+
]
3540
}
3641
}
3742
});
@@ -53,4 +58,5 @@ describe('source finding', () => {
5358
assertSourceFound('b.txt', [`a${path.sep}b.txt`], [{ request: 'file', content: `a${path.sep}x.txt` }]);
5459
assertSourceFound('b.txt', [`x${path.sep}y${path.sep}z${path.sep}b.txt`], [{ request: 'file', content: `x${path.sep}y${path.sep}z${path.sep}g.txt` }]);
5560
assertSourceFound(`..${path.sep}b.txt`, [`x${path.sep}y${path.sep}b.txt`], [{ request: 'file', content: `x${path.sep}y${path.sep}z${path.sep}g.txt` }]);
61+
assertSourceFound('with spaces.txt', ['with-spaces.txt']); // space replacements
5662
});

0 commit comments

Comments
 (0)