diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 8ad01e54ae9b6..eaee334a01cbd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -144,10 +144,10 @@ case class CatalogStorageFormat( locationUri: Option[URI], inputFormat: Option[String], outputFormat: Option[String], - serdeName: Option[String], serde: Option[String], compressed: Boolean, - properties: Map[String, String]) extends MetadataMapSupport { + properties: Map[String, String], + serdeName: Option[String] = None) extends MetadataMapSupport { override def toString: String = { toLinkedHashMap.map { case (key, value) => @@ -181,7 +181,7 @@ case class CatalogStorageFormat( object CatalogStorageFormat { /** Empty storage format for default values and copies. */ val empty = CatalogStorageFormat(locationUri = None, inputFormat = None, outputFormat = None, - serdeName = None, serde = None, compressed = false, properties = Map.empty) + serde = None, compressed = false, properties = Map.empty) } /** @@ -616,11 +616,11 @@ case class CatalogTable( inputFormat: Option[String] = storage.inputFormat, outputFormat: Option[String] = storage.outputFormat, compressed: Boolean = false, - serdeName: Option[String] = storage.serdeName, serde: Option[String] = storage.serde, - properties: Map[String, String] = storage.properties): CatalogTable = { + properties: Map[String, String] = storage.properties, + serdeName: Option[String] = storage.serdeName): CatalogTable = { copy(storage = CatalogStorageFormat( - locationUri, inputFormat, outputFormat, serdeName, serde, compressed, properties)) + locationUri, inputFormat, outputFormat, serde, compressed, properties, serdeName)) } def toJsonLinkedHashMap: mutable.LinkedHashMap[String, JValue] = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala index 12e9cd790ca6f..f332393e503f9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogEventSuite.scala @@ -247,7 +247,6 @@ class ExternalCatalogEventSuite extends SparkFunSuite { locationUri = Some(tableUri), inputFormat = Some("tableInputFormat"), outputFormat = Some("tableOutputFormat"), - serdeName = None, serde = None, compressed = false, properties = Map.empty) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala index 0a9e370e6b219..f06128f236792 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala @@ -911,7 +911,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite { tableType = CatalogTableType.EXTERNAL, storage = CatalogStorageFormat( Some(Utils.createTempDir().toURI), - None, None, None, None, false, Map.empty), + None, None, None, false, Map.empty), schema = new StructType().add("a", "int").add("b", "string"), provider = Some(defaultProvider) ) @@ -959,7 +959,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite { Map("partCol1" -> "7", "partCol2" -> "8"), CatalogStorageFormat( Some(tempPath.toURI), - None, None, None, None, false, Map.empty)) + None, None, None, false, Map.empty)) catalog.createPartitions("db1", "tbl", Seq(partWithExistingDir), ignoreIfExists = false) tempPath.delete() @@ -968,7 +968,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite { Map("partCol1" -> "9", "partCol2" -> "10"), CatalogStorageFormat( Some(tempPath.toURI), - None, None, None, None, false, Map.empty)) + None, None, None, false, Map.empty)) catalog.createPartitions("db1", "tbl", Seq(partWithNonExistingDir), ignoreIfExists = false) assert(tempPath.exists()) } @@ -1030,7 +1030,6 @@ abstract class CatalogTestUtils { locationUri = None, inputFormat = Some(tableInputFormat), outputFormat = Some(tableOutputFormat), - serdeName = None, serde = None, compressed = false, properties = Map.empty) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/AlwaysPersistedConfigsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/AlwaysPersistedConfigsSuite.scala index ac438320f967b..bfadf1eb1885a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/AlwaysPersistedConfigsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/AlwaysPersistedConfigsSuite.scala @@ -158,7 +158,7 @@ class AlwaysPersistedConfigsSuite extends QueryTest with SharedSparkSession { val catalogTable = new CatalogTable( identifier = TableIdentifier(testViewName), tableType = CatalogTableType.VIEW, - storage = CatalogStorageFormat(None, None, None, None, None, false, Map.empty), + storage = CatalogStorageFormat(None, None, None, None, false, Map.empty), schema = new StructType(), properties = Map.empty[String, String] ) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 8d7eeff6edead..4f7a06206e6f0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -317,7 +317,7 @@ trait DDLSuiteBase extends SQLTestUtils { spec: TablePartitionSpec, tableName: TableIdentifier): Unit = { val part = CatalogTablePartition( - spec, CatalogStorageFormat(None, None, None, None, None, false, Map())) + spec, CatalogStorageFormat(None, None, None, None, false, Map())) catalog.createPartitions(tableName, Seq(part), ignoreIfExists = false) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index ea5651238f458..d9aa50d1fb3a6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -944,7 +944,6 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { locationUri = None, inputFormat = None, outputFormat = None, - serdeName = None, serde = None, compressed = false, properties = Map.empty), @@ -973,7 +972,6 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { locationUri = None, inputFormat = None, outputFormat = None, - serdeName = None, serde = None, compressed = false, properties = Map.empty), diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index bfff91b6fc6bf..766624a8c48b2 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -177,23 +177,23 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log val options = storage.properties + (ParquetOptions.MERGE_SCHEMA -> SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING).toString) storage.copy( - serdeName = None, serde = None, - properties = options + properties = options, + serdeName = None ) } else { val options = storage.properties if (SQLConf.get.getConf(SQLConf.ORC_IMPLEMENTATION) == "native") { storage.copy( - serdeName = None, serde = None, - properties = options + properties = options, + serdeName = None ) } else { storage.copy( - serdeName = None, serde = None, - properties = options + properties = options, + serdeName = None ) } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 482d12207b024..898469221796b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -557,11 +557,11 @@ private[hive] class HiveClientImpl( outputFormat = Option(h.getTTable.getSd.getOutputFormat).orElse { Option(h.getStorageHandler).map(_.getOutputFormatClass.getName) }, - serdeName = Option(h.getTTable.getSd.getSerdeInfo.getName), serde = Option(h.getSerializationLib), compressed = h.getTTable.getSd.isCompressed, properties = Option(h.getTTable.getSd.getSerdeInfo.getParameters) - .map(_.asScala.toMap).orNull + .map(_.asScala.toMap).orNull, + serdeName = Option(h.getTTable.getSd.getSerdeInfo.getName).filter(_.nonEmpty) ), // For EXTERNAL_TABLE, the table properties has a particular field "EXTERNAL". This is added // in the function toHiveTable. @@ -1202,7 +1202,7 @@ private[hive] object HiveClientImpl extends Logging { hiveTable.getTTable.getSd.setLocation(loc)} table.storage.inputFormat.map(toInputFormat).foreach(hiveTable.setInputFormatClass) table.storage.outputFormat.map(toOutputFormat).foreach(hiveTable.setOutputFormatClass) - table.storage.serdeName.foreach(hiveTable.getSd.getSerdeInfo.setName) + table.storage.serdeName.foreach(hiveTable.getTTable.getSd.getSerdeInfo.setName) hiveTable.setSerializationLib( table.storage.serde.getOrElse("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")) table.storage.properties.foreach { case (k, v) => hiveTable.setSerdeParam(k, v) } @@ -1283,11 +1283,11 @@ private[hive] object HiveClientImpl extends Logging { locationUri = Option(CatalogUtils.stringToURI(apiPartition.getSd.getLocation)), inputFormat = Option(apiPartition.getSd.getInputFormat), outputFormat = Option(apiPartition.getSd.getOutputFormat), - serdeName = Option(apiPartition.getSd.getSerdeInfo.getName), serde = Option(apiPartition.getSd.getSerdeInfo.getSerializationLib), compressed = apiPartition.getSd.isCompressed, properties = Option(apiPartition.getSd.getSerdeInfo.getParameters) - .map(_.asScala.toMap).orNull), + .map(_.asScala.toMap).orNull, + serdeName = Option(apiPartition.getSd.getSerdeInfo.getName).filter(_.nonEmpty)), createTime = apiPartition.getCreateTime.toLong * 1000, lastAccessTime = apiPartition.getLastAccessTime.toLong * 1000, parameters = properties, diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala index 671a705b53905..ce4ea4c52dca8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala @@ -513,4 +513,16 @@ class DataSourceWithHiveMetastoreCatalogSuite assert(tableWithSerdeName.storage.serdeName === Some("testSerdeName")) } } + + test("SPARK-55645: serdeName should be None for tables without an explicit serde name") { + withTable("t") { + sql("CREATE TABLE t (d1 DECIMAL(10,3), d2 STRING) STORED AS TEXTFILE") + + val hiveTable = + sessionState.catalog.getTableMetadata(TableIdentifier("t", Some("default"))) + // Hive Metastore returns "" for tables without an explicit serde name. + // This should be mapped to None, not Some(""). + assert(hiveTable.storage.serdeName === None) + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala index 089b131d3c272..5e521565383ce 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala @@ -107,7 +107,6 @@ class HiveSchemaInferenceSuite locationUri = Option(dir.toURI), inputFormat = serde.inputFormat, outputFormat = serde.outputFormat, - serdeName = None, serde = serde.serde, compressed = false, properties = Map("serialization.format" -> "1")), diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index b0afd8c2376d3..772db8dff615f 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -756,7 +756,6 @@ class MetastoreDataSourcesSuite extends QueryTest locationUri = None, inputFormat = None, outputFormat = None, - serdeName = None, serde = None, compressed = false, properties = Map( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala index 623410c4e69df..7db9632c87b9d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala @@ -82,7 +82,6 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) { locationUri = None, inputFormat = Some(classOf[TextInputFormat].getName), outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName), - serdeName = None, serde = Some(classOf[LazySimpleSerDe].getName), compressed = false, properties = Map.empty @@ -370,7 +369,6 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) { locationUri = None, inputFormat = None, outputFormat = None, - serdeName = None, serde = None, compressed = false, properties = Map.empty) @@ -385,7 +383,6 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) { locationUri = None, inputFormat = Some(classOf[TextInputFormat].getName), outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName), - serdeName = None, serde = Some(classOf[LazySimpleSerDe].getName), compressed = false, properties = Map.empty diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala index ddcd2aea06dff..fae01d6cbc451 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala @@ -63,7 +63,6 @@ class HivePartitionFilteringSuite(version: String) locationUri = None, inputFormat = Some(classOf[TextInputFormat].getName), outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName), - serdeName = None, serde = Some(classOf[LazySimpleSerDe].getName()), compressed = false, properties = Map.empty diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index a73736fbde7ae..43f15a12cad19 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -84,7 +84,6 @@ class HiveDDLSuite locationUri = Some(catalog.defaultTablePath(name)), inputFormat = serde.get.inputFormat, outputFormat = serde.get.outputFormat, - serdeName = None, serde = serde.get.serde, compressed = false, properties = Map.empty) @@ -93,7 +92,6 @@ class HiveDDLSuite locationUri = Some(catalog.defaultTablePath(name)), inputFormat = Some("org.apache.hadoop.mapred.SequenceFileInputFormat"), outputFormat = Some("org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"), - serdeName = None, serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"), compressed = false, properties = Map("serialization.format" -> "1"))