package com.dataguise.test;
import java.io.IOException;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.apache.spark.SparkContext;
import org.apache.spark.SparkJobInfo;
import org.apache.spark.SparkStageInfo;
import org.apache.spark.SparkStatusTracker;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.JavaSparkStatusTracker;
import org.apache.spark.launcher.SparkAppHandle;
import org.apache.spark.launcher.SparkAppHandle.State;
import org.apache.spark.launcher.SparkLauncher;
import org.apache.spark.scheduler.SparkListener;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
public class Dataframetest {
public static void main(String[] args) throws IOException, InterruptedException {
// TODO Auto-generated method stub
SparkSession sess = SparkSession.builder().appName("dataframetest").master("local[*]").getOrCreate();
sess.conf().set(key, value);
sess.sparkContext().hadoopConfiguration().set(key, value);
Gson gson = new GsonBuilder().setPrettyPrinting().create();
String inputPaths = "abfss://folder1/testing.orc";
String[] inputFiles = inputPaths.split(",");
Dataset<Row> csvRead = sess.read().format("orc").load(inputFiles).
withColumn("dg_filename", org.apache.spark.sql.functions.input_file_name())
.withColumn("dg_metadata", org.apache.spark.sql.functions.lit(null).cast(DataTypes.StringType));
csvRead.show(1000, false);
}
With this program, we are successfully able to submit the job on the cluster and it is completing successfully. But I am not able to get the job status and group ID in the code. I need to get the job status in the program for internal use.
Anyone, please help me with this.