Skip to content

Commit

Permalink
[SPARK-43168][SQL] Remove get PhysicalDataType method from Datatype c…
Browse files Browse the repository at this point in the history
…lass

### What changes were proposed in this pull request?

DataType is public API while we can leave PhysicalDataType as internal API/implementation thus we can remove PhysicalDataType from DataType. So DataType does not need to have a class dependency on PhysicalDataType.

### Why are the changes needed?

Simplify DataType.

### Does this PR introduce _any_ user-facing change?

NO
### How was this patch tested?

UT

Closes apache#40826 from amaliujia/catalyst_datatype_refactor_8.

Authored-by: Rui Wang <[email protected]>
Signed-off-by: Herman van Hovell <[email protected]>
  • Loading branch information
amaliujia authored and hvanhovell committed Apr 18, 2023
1 parent 61e8c5b commit db2625c
Show file tree
Hide file tree
Showing 34 changed files with 18 additions and 84 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public static Object read(
DataType dataType,
boolean handleNull,
boolean handleUserDefinedType) {
PhysicalDataType physicalDataType = dataType.physicalDataType();
PhysicalDataType physicalDataType = PhysicalDataType.apply(dataType);
if (handleNull && (obj.isNullAt(ordinal) || physicalDataType instanceof PhysicalNullType)) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public static boolean isFixedLength(DataType dt) {
if (dt instanceof UserDefinedType) {
return isFixedLength(((UserDefinedType<?>) dt).sqlType());
}
PhysicalDataType pdt = dt.physicalDataType();
PhysicalDataType pdt = PhysicalDataType.apply(dt);
if (pdt instanceof PhysicalDecimalType) {
return ((DecimalType) dt).precision() <= Decimal.MAX_LONG_DIGITS();
} else {
Expand All @@ -88,7 +88,7 @@ public static boolean isMutable(DataType dt) {
if (dt instanceof UserDefinedType) {
return isMutable(((UserDefinedType<?>) dt).sqlType());
}
PhysicalDataType pdt = dt.physicalDataType();
PhysicalDataType pdt = PhysicalDataType.apply(dt);
return pdt instanceof PhysicalPrimitiveType || pdt instanceof PhysicalDecimalType ||
pdt instanceof PhysicalCalendarIntervalType;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public InternalRow copy() {
row.setNullAt(i);
} else {
DataType dt = columns[i].dataType();
PhysicalDataType pdt = dt.physicalDataType();
PhysicalDataType pdt = PhysicalDataType.apply(dt);
if (pdt instanceof PhysicalBooleanType) {
row.setBoolean(i, getBoolean(i));
} else if (pdt instanceof PhysicalByteType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public InternalRow copy() {
row.setNullAt(i);
} else {
DataType dt = data.getChild(i).dataType();
PhysicalDataType pdt = dt.physicalDataType();
PhysicalDataType pdt = PhysicalDataType.apply(dt);
if (pdt instanceof PhysicalBooleanType) {
row.setBoolean(i, getBoolean(i));
} else if (pdt instanceof PhysicalByteType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ object InternalRow {
def getAccessor(dt: DataType, nullable: Boolean = true): (SpecializedGetters, Int) => Any = {
val getValueNullSafe: (SpecializedGetters, Int) => Any = dt match {
case u: UserDefinedType[_] => getAccessor(u.sqlType, nullable)
case _ => dt.physicalDataType match {
case _ => PhysicalDataType(dt) match {
case PhysicalBooleanType => (input, ordinal) => input.getBoolean(ordinal)
case PhysicalByteType => (input, ordinal) => input.getByte(ordinal)
case PhysicalShortType => (input, ordinal) => input.getShort(ordinal)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ object InterpretedUnsafeProjection {
// Create the basic writer.
val unsafeWriter: (SpecializedGetters, Int) => Unit = dt match {
case udt: UserDefinedType[_] => generateFieldWriter(writer, udt.sqlType, nullable)
case _ => dt.physicalDataType match {
case _ => PhysicalDataType(dt) match {
case PhysicalBooleanType => (v, i) => writer.write(i, v.getBoolean(i))

case PhysicalByteType => (v, i) => writer.write(i, v.getByte(i))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1628,7 +1628,7 @@ object CodeGenerator extends Logging {
dataType match {
case udt: UserDefinedType[_] => getValue(input, udt.sqlType, ordinal)
case _ if isPrimitiveType(jt) => s"$input.get${primitiveTypeName(jt)}($ordinal)"
case _ => dataType.physicalDataType match {
case _ => PhysicalDataType(dataType) match {
case _: PhysicalArrayType => s"$input.getArray($ordinal)"
case PhysicalBinaryType => s"$input.getBinary($ordinal)"
case PhysicalCalendarIntervalType => s"$input.getInterval($ordinal)"
Expand Down Expand Up @@ -1909,7 +1909,7 @@ object CodeGenerator extends Logging {
case udt: UserDefinedType[_] => javaType(udt.sqlType)
case ObjectType(cls) if cls.isArray => s"${javaType(ObjectType(cls.getComponentType))}[]"
case ObjectType(cls) => cls.getName
case _ => dt.physicalDataType match {
case _ => PhysicalDataType(dt) match {
case _: PhysicalArrayType => "ArrayData"
case PhysicalBinaryType => "byte[]"
case PhysicalBooleanType => JAVA_BOOLEAN
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ object Literal {
case _ if v == null => true
case ObjectType(cls) => cls.isInstance(v)
case udt: UserDefinedType[_] => doValidate(v, udt.sqlType)
case dt => dataType.physicalDataType match {
case dt => PhysicalDataType(dataType) match {
case PhysicalArrayType(et, _) =>
v.isInstanceOf[ArrayData] && {
val ar = v.asInstanceOf[ArrayData]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ object FileSourceMetadataAttribute {
* supported (including most primitive types). Notably unsupported types include [[ObjectType]],
* [[UserDefinedType]], and the complex types ([[StructType]], [[MapType]], [[ArrayType]]).
*/
def isSupportedType(dataType: DataType): Boolean = dataType.physicalDataType match {
def isSupportedType(dataType: DataType): Boolean = PhysicalDataType(dataType) match {
// PhysicalPrimitiveType covers: Boolean, Byte, Double, Float, Integer, Long, Null, Short
case _: PhysicalPrimitiveType | _: PhysicalDecimalType => true
case PhysicalBinaryType | PhysicalStringType | PhysicalCalendarIntervalType => true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import scala.reflect.runtime.universe.typeTag
import org.apache.spark.sql.catalyst.expressions.{Ascending, BoundReference, InterpretedOrdering, SortOrder}
import org.apache.spark.sql.catalyst.util.{ArrayData, SQLOrderingUtil}
import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteExactNumeric, ByteType, DataType, DateType, DayTimeIntervalType, Decimal, DecimalExactNumeric, DecimalType, DoubleExactNumeric, DoubleType, FloatExactNumeric, FloatType, FractionalType, IntegerExactNumeric, IntegerType, IntegralType, LongExactNumeric, LongType, MapType, NullType, NumericType, ShortExactNumeric, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, YearMonthIntervalType}
import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteExactNumeric, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, Decimal, DecimalExactNumeric, DecimalType, DoubleExactNumeric, DoubleType, FloatExactNumeric, FloatType, FractionalType, IntegerExactNumeric, IntegerType, IntegralType, LongExactNumeric, LongType, MapType, NullType, NumericType, ShortExactNumeric, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, VarcharType, YearMonthIntervalType}
import org.apache.spark.unsafe.types.{ByteArray, UTF8String}

sealed abstract class PhysicalDataType {
Expand All @@ -39,6 +39,8 @@ object PhysicalDataType {
case ShortType => PhysicalShortType
case IntegerType => PhysicalIntegerType
case LongType => PhysicalLongType
case VarcharType(_) => PhysicalStringType
case CharType(_) => PhysicalStringType
case StringType => PhysicalStringType
case FloatType => PhysicalFloatType
case DoubleType => PhysicalDoubleType
Expand All @@ -47,6 +49,7 @@ object PhysicalDataType {
case BinaryType => PhysicalBinaryType
case TimestampType => PhysicalLongType
case TimestampNTZType => PhysicalLongType
case CalendarIntervalType => PhysicalCalendarIntervalType
case DayTimeIntervalType(_, _) => PhysicalLongType
case YearMonthIntervalType(_, _) => PhysicalIntegerType
case DateType => PhysicalIntegerType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package org.apache.spark.sql.types
import org.json4s.JsonDSL._

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalArrayType, PhysicalDataType}
import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat

/**
Expand Down Expand Up @@ -88,9 +87,6 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT
*/
override def defaultSize: Int = 1 * elementType.defaultSize

private[sql] override def physicalDataType: PhysicalDataType =
PhysicalArrayType(elementType, containsNull)

override def simpleString: String = s"array<${elementType.simpleString}>"

override def catalogString: String = s"array<${elementType.catalogString}>"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark.sql.types

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalBinaryType, PhysicalDataType}

/**
* The data type representing `Array[Byte]` values.
Expand All @@ -31,8 +30,6 @@ class BinaryType private() extends AtomicType {
*/
override def defaultSize: Int = 100

private[sql] override def physicalDataType: PhysicalDataType = PhysicalBinaryType

private[spark] override def asNullable: BinaryType = this
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark.sql.types

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalBooleanType, PhysicalDataType}

/**
* The data type representing `Boolean` values. Please use the singleton `DataTypes.BooleanType`.
Expand All @@ -32,8 +31,6 @@ class BooleanType private() extends AtomicType {
*/
override def defaultSize: Int = 1

private[sql] override def physicalDataType: PhysicalDataType = PhysicalBooleanType

private[spark] override def asNullable: BooleanType = this
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark.sql.types

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalByteType, PhysicalDataType}

/**
* The data type representing `Byte` values. Please use the singleton `DataTypes.ByteType`.
Expand All @@ -32,8 +31,6 @@ class ByteType private() extends IntegralType {
*/
override def defaultSize: Int = 1

private[sql] override def physicalDataType: PhysicalDataType = PhysicalByteType

override def simpleString: String = "tinyint"

private[spark] override def asNullable: ByteType = this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark.sql.types

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalCalendarIntervalType, PhysicalDataType}

/**
* The data type representing calendar intervals. The calendar interval is stored internally in
Expand All @@ -38,8 +37,6 @@ class CalendarIntervalType private() extends DataType {

override def defaultSize: Int = 16

private[sql] override def physicalDataType: PhysicalDataType = PhysicalCalendarIntervalType

override def typeName: String = "interval"

private[spark] override def asNullable: CalendarIntervalType = this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,12 @@
package org.apache.spark.sql.types

import org.apache.spark.annotation.Experimental
import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalStringType}

@Experimental
case class CharType(length: Int) extends AtomicType {
require(length >= 0, "The length of char type cannot be negative.")

override def defaultSize: Int = length
private[sql] override def physicalDataType: PhysicalDataType = PhysicalStringType
override def typeName: String = s"char($length)"
override def toString: String = s"CharType($length)"
private[spark] override def asNullable: CharType = this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.analysis.Resolver
import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.catalyst.types.DataTypeUtils
import org.apache.spark.sql.catalyst.util.DataTypeJsonUtils.{DataTypeJsonDeserializer, DataTypeJsonSerializer}
import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
import org.apache.spark.sql.errors.QueryCompilationErrors
Expand Down Expand Up @@ -107,8 +107,6 @@ abstract class DataType extends AbstractDataType {

override private[sql] def acceptsType(other: DataType): Boolean =
DataTypeUtils.sameType(this, other)

private[sql] def physicalDataType: PhysicalDataType = UninitializedPhysicalType
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark.sql.types

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalIntegerType}

/**
* The date type represents a valid date in the proleptic Gregorian calendar.
Expand All @@ -34,8 +33,6 @@ class DateType private() extends DatetimeType {
*/
override def defaultSize: Int = 4

private[sql] override def physicalDataType: PhysicalDataType = PhysicalIntegerType

private[spark] override def asNullable: DateType = this
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark.sql.types

import org.apache.spark.annotation.Unstable
import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalLongType}
import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.types.DayTimeIntervalType.fieldToString

Expand Down Expand Up @@ -47,8 +46,6 @@ case class DayTimeIntervalType(startField: Byte, endField: Byte) extends AnsiInt
*/
override def defaultSize: Int = 8

private[sql] override def physicalDataType: PhysicalDataType = PhysicalLongType

private[spark] override def asNullable: DayTimeIntervalType = this

override val typeName: String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import scala.annotation.tailrec

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalDecimalType}
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
import org.apache.spark.sql.internal.SQLConf

Expand Down Expand Up @@ -101,9 +100,6 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
*/
override def defaultSize: Int = if (precision <= Decimal.MAX_LONG_DIGITS) 8 else 16

private[sql] override def physicalDataType: PhysicalDataType =
PhysicalDecimalType(precision, scale)

override def simpleString: String = s"decimal($precision,$scale)"

private[spark] override def asNullable: DecimalType = this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package org.apache.spark.sql.types
import scala.util.Try

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalDoubleType}

/**
* The data type representing `Double` values. Please use the singleton `DataTypes.DoubleType`.
Expand All @@ -34,8 +33,6 @@ class DoubleType private() extends FractionalType {
*/
override def defaultSize: Int = 8

private[sql] override def physicalDataType: PhysicalDataType = PhysicalDoubleType

private[spark] override def asNullable: DoubleType = this
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ package org.apache.spark.sql.types
import scala.util.Try

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalFloatType}

/**
* The data type representing `Float` values. Please use the singleton `DataTypes.FloatType`.
Expand All @@ -34,8 +33,6 @@ class FloatType private() extends FractionalType {
*/
override def defaultSize: Int = 4

private[sql] override def physicalDataType: PhysicalDataType = PhysicalFloatType

private[spark] override def asNullable: FloatType = this
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark.sql.types

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalIntegerType}

/**
* The data type representing `Int` values. Please use the singleton `DataTypes.IntegerType`.
Expand All @@ -32,8 +31,6 @@ class IntegerType private() extends IntegralType {
*/
override def defaultSize: Int = 4

private[sql] override def physicalDataType: PhysicalDataType = PhysicalIntegerType

override def simpleString: String = "int"

private[spark] override def asNullable: IntegerType = this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark.sql.types

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalLongType}

/**
* The data type representing `Long` values. Please use the singleton `DataTypes.LongType`.
Expand All @@ -32,8 +31,6 @@ class LongType private() extends IntegralType {
*/
override def defaultSize: Int = 8

private[sql] override def physicalDataType: PhysicalDataType = PhysicalLongType

override def simpleString: String = "bigint"

private[spark] override def asNullable: LongType = this
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import org.json4s.JsonAST.JValue
import org.json4s.JsonDSL._

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalMapType}
import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat

/**
Expand Down Expand Up @@ -68,9 +67,6 @@ case class MapType(
*/
override def defaultSize: Int = 1 * (keyType.defaultSize + valueType.defaultSize)

private[sql] override def physicalDataType: PhysicalDataType =
PhysicalMapType(keyType, valueType, valueContainsNull)

override def simpleString: String = s"map<${keyType.simpleString},${valueType.simpleString}>"

override def catalogString: String = s"map<${keyType.catalogString},${valueType.catalogString}>"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.apache.spark.sql.types

import org.apache.spark.annotation.Stable
import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalNullType}

/**
* The data type representing `NULL` values. Please use the singleton `DataTypes.NullType`.
Expand All @@ -32,8 +31,6 @@ class NullType private() extends DataType {
// Defined with a private constructor so the companion object is the only possible instantiation.
override def defaultSize: Int = 1

private[sql] override def physicalDataType: PhysicalDataType = PhysicalNullType

private[spark] override def asNullable: NullType = this

override def typeName: String = "void"
Expand Down
Loading

0 comments on commit db2625c

Please sign in to comment.