//prepare csv
year,make,model,comment,blank "2012","Tesla","S","No comment", "1997","Ford,E350","Go get one now they are going fast", "2015","Chevy","Volt"
//Processing and inserting data in hive without schema
import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.orc._ val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc) val df = hiveContext.read.format("com.databricks.spark.csv").option("header", "true").option("inferSchema", "true").load("/tmp/cars.csv") val selectedData = df.select("year", "model") selectedData.write.format("orc").option("header", "true").save("/tmp/newcars")
//permission issues as user hive
// org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.security.AccessControlException: Permission denied: user=hive, access=WRITE, inode="/tmp/newcars":hdfs:hdfs:drwxr-xr-x
//Updated /tmp/newcars_orc_cust17 directory permissions
hiveContext.sql("create external table newcars_orc_ext_cust17(year string,model string) stored as orc location ‘/tmp/newcars‘") hiveContext.sql("show tables").collect().foreach(println)
hiveContext.sql("select * from newcars").collect().foreach(println)
时间: 2024-10-05 11:54:00