iceberg初实践


1.在spark安装目录jar文件夹下导入iceberg包

2.在spark bin目录下运行./spark-shell,执行下面代码

import org.apache.hadoop.conf.Configuration
import org.apache.iceberg.hadoop.HadoopCatalog
import org.apache.iceberg.catalog.TableIdentifier
import org.apache.iceberg.spark.SparkSchemaUtil

val conf=new Configuration
val warehousePath = "hdfs://dc1:8020/..."
val catalog=new HadoopCatalog(conf,warehousePath)

val name=TableIdentifier.of("default","test")
val data=Seq((1,"a"),(2,"b"),(3,"c")).toDF("id","data")
val schema=SparkSchemaUtil.convert(data.schema)
val table=catalog.createTable(name,schema)

val df=spark.read.format("iceberg").load("hdfs://dc1:8020/.../default/test")
df.printSchema
df.show()
df.createTempView("test")
spark.sql("select * from test").show()

data.write.format("iceberg").mode("append").save("hdfs://dc1:8020/.../default/test")