root
|-- id: string (nullable = true)
|-- name: string (nullable = true)

image
用下面这行代码便可以打印注释信息:
df.schema.foreach(s=>println(s.name,s.metadata))
(id,{"comment":"ID","HIVE_TYPE_STRING":"string"})
(name,{"comment":"名字","HIVE_TYPE_STRING":"string"})

image
3、读取Oracle表并打印DataFrmae的元数据信息
3.1 新建Oracle测试表(带注释)
CREATE TABLE ORA_TEST (ID VARCHAR2(100),
NAME VARCHAR2(100)
);COMMENT ON COLUMN ORA_TEST.ID IS 'ID';COMMENT ON COLUMN ORA_TEST.NAME IS '名字';COMMENT ON TABLE ORA_TEST IS '测试';

image
3.2 读取Oracle表,并打印元数据
代码:
package com.dkl.leanring.spark.sql.Oracleimport org.apache.spark.sql.SparkSessionobject OracleSchemaDemo { def main(args: Array[String]): Unit = { val spark = SparkSession.builder().appName("OracleSchemaDemo").master("local").getOrCreate() val df = spark.read
.format("jdbc")
.option("url", "jdbc:oracle:thin:@192.168.44.128:1521:orcl")
.option("dbtable", "ORA_TEST")
.option("user", "bigdata")
.option("password", "bigdata")
.option("driver", "oracle.jdbc.driver.OracleDriver")
.load()
df.schema.foreach(s => println(s.name, s.metadata))
spark.stop
}
}
(ID,{"name":"ID","scale":0})
(NAME,{"name":"NAME","scale":0})

image
注:Spark2.3.0和Spark2.2.1的元数据不太一样,上面的结果是Spark2.2.1(也是我写博客测试用的),项目中用的Spark2.3.0,2.3.0的元数据是空的,如下
(ID,{})
(NAME,{})
可见并没有注释信息
3.3 给DataFrame添加注释
import org.apache.spark.sql.types._val commentMap = Map("ID" -> "ID", "NAME" -> "名字")val schema = df.schema.map(s => {
s.withComment(commentMap(s.name))
})//根据添加了注释的schema,新建DataFrameval new_df = spark.createDataFrame(df.rdd, StructType(schema)).repartition(160)
new_df.schema.foreach(s => println(s.name, s.metadata))
(ID,{"comment":"ID","name":"ID","scale":0})
(NAME,{"comment":"名字","name":"NAME","scale":0})

image
4、 测试写到Hive表有没有注释
需将前面代码中的spark改为支持hive,即加上enableHiveSupport()
spark.sql("use test")
new_df.write.mode("overwrite").saveAsTable("ORA_TEST")
然后在hive里看一下,是否有注释

image
可以看到,成功的把注释也保存到里hive里
5、附录
附上在Eclipse运行的完整代码
package com.dkl.leanring.spark.sql.Oracleimport org.apache.spark.sql.SparkSessionimport org.apache.spark.sql.types._object OracleSchemaDemo { def main(args: Array[String]): Unit = { val spark = SparkSession.builder().appName("OracleSchemaDemo").master("local").enableHiveSupport().getOrCreate() val df = spark.read
.format("jdbc")
.option("url", "jdbc:oracle:thin:@192.168.44.128:1521:orcl")
.option("dbtable", "ORA_TEST")
.option("user", "bigdata")
.option("password", "bigdata")
.option("driver", "oracle.jdbc.driver.OracleDriver")
.load()
df.schema.foreach(s => println(s.name, s.metadata)) val commentMap = Map("ID" -> "ID", "NAME" -> "名字") val schema = df.schema.map(s => {
s.withComment(commentMap(s.name))
}) //根据添加了注释的schema,新建DataFrame
val new_df = spark.createDataFrame(df.rdd, StructType(schema)).repartition(160)
new_df.schema.foreach(s => println(s.name, s.metadata))
spark.sql("use test") //保存到hive
new_df.write.mode("overwrite").saveAsTable("ORA_TEST")
spark.stop
}
}
作者:董可伦
链接:https://www.jianshu.com/p/e4c90dc08935