Skip to content

Commit 96e0d4b

Browse files
mihailoale-dbcloud-fan
authored andcommitted
[SPARK-54352][SQL] Introduce SQLConf.canonicalize to centralize normalization of strings
### What changes were proposed in this pull request? In this PR I propose that we introduce `SQLConf.canonicalize` to centralize normalization of strings. This will help us avoid the code duplication and ease the development of the single-pass analyzer. ### Why are the changes needed? To ease the development of the single-pass analyzer (avoid code duplication). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #53063 from mihailoale-db/canonicalizer. Authored-by: mihailoale-db <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent 0a42f55 commit 96e0d4b

File tree

2 files changed

+18
-14
lines changed

2 files changed

+18
-14
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,6 @@ object ResolveLambdaVariables extends Rule[LogicalPlan] {
4141

4242
type LambdaVariableMap = Map[String, NamedExpression]
4343

44-
private def canonicalizer = {
45-
if (!conf.caseSensitiveAnalysis) {
46-
// scalastyle:off caselocale
47-
s: String => s.toLowerCase
48-
// scalastyle:on caselocale
49-
} else {
50-
s: String => s
51-
}
52-
}
53-
5444
override def apply(plan: LogicalPlan): LogicalPlan = {
5545
plan.resolveOperatorsWithPruning(
5646
_.containsAnyPattern(HIGH_ORDER_FUNCTION, LAMBDA_FUNCTION, LAMBDA_VARIABLE), ruleId) {
@@ -80,11 +70,11 @@ object ResolveLambdaVariables extends Rule[LogicalPlan] {
8070
"actualNumArgs" -> argInfo.size.toString))
8171
}
8272

83-
if (names.map(a => canonicalizer(a.name)).distinct.size < names.size) {
73+
if (names.map(a => conf.canonicalize(a.name)).distinct.size < names.size) {
8474
e.failAnalysis(
8575
errorClass = "INVALID_LAMBDA_FUNCTION_CALL.DUPLICATE_ARG_NAMES",
8676
messageParameters = Map(
87-
"args" -> names.map(a => canonicalizer(a.name)).map(toSQLId(_)).mkString(", "),
77+
"args" -> names.map(a => conf.canonicalize(a.name)).map(toSQLId(_)).mkString(", "),
8878
"caseSensitiveConfig" -> toSQLConf(SQLConf.CASE_SENSITIVE.key)))
8979
}
9080

@@ -124,11 +114,11 @@ object ResolveLambdaVariables extends Rule[LogicalPlan] {
124114
l
125115

126116
case l: LambdaFunction if !l.hidden =>
127-
val lambdaMap = l.arguments.map(v => canonicalizer(v.name) -> v).toMap
117+
val lambdaMap = l.arguments.map(v => conf.canonicalize(v.name) -> v).toMap
128118
l.mapChildren(resolve(_, parentLambdaMap ++ lambdaMap))
129119

130120
case u @ UnresolvedNamedLambdaVariable(name +: nestedFields) =>
131-
parentLambdaMap.get(canonicalizer(name)) match {
121+
parentLambdaMap.get(conf.canonicalize(name)) match {
132122
case Some(lambda) =>
133123
nestedFields.foldLeft(lambda: Expression) { (expr, fieldName) =>
134124
ExtractValue(expr, Literal(fieldName), conf.resolver)

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7220,6 +7220,20 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
72207220
}
72217221
}
72227222

7223+
/**
7224+
* Returns the lower case representation of a string if `caseSensitiveAnalysis` is enabled.
7225+
* Otherwise, returns the original string.
7226+
*/
7227+
def canonicalize(s: String): String = {
7228+
if (!caseSensitiveAnalysis) {
7229+
// scalastyle:off caselocale
7230+
s.toLowerCase
7231+
// scalastyle:on caselocale
7232+
} else {
7233+
s
7234+
}
7235+
}
7236+
72237237
/**
72247238
* Returns the error handler for handling hint errors.
72257239
*/

0 commit comments

Comments
 (0)