java代码提交一个hive sql到yarn上查看
时间: 2024-02-21 15:56:57 浏览: 104
你可以使用Java代码提交Hive SQL到YARN上运行,以下是一个示例:
```java
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.client.api.YarnClient;
import org.apache.hadoop.yarn.client.api.YarnClientApplication;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.util.Apps;
import org.apache.hadoop.yarn.util.ConverterUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
public class SubmitHiveSqlToYarn {
public static void main(String[] args) throws Exception {
// 初始化YARN配置
Configuration conf = new YarnConfiguration();
// 创建YARN客户端
YarnClient yarnClient = YarnClient.createYarnClient();
yarnClient.init(conf);
yarnClient.start();
// 创建YARN应用
YarnClientApplication app = yarnClient.createApplication();
// 设置应用名称
String appName = "Hive SQL on YARN";
app.setApplicationName(appName);
// 创建容器启动命令
String command = "hive -e \"SELECT * FROM my_table\"";
// 设置本地资源
Map<String, LocalResource> localResources = new HashMap<>();
File scriptFile = new File("path/to/hive_script.hql");
FileInputStream scriptFileStream = new FileInputStream(scriptFile);
LocalResource scriptFileResource = createLocalResource(scriptFileStream, conf);
localResources.put("hive_script.hql", scriptFileResource);
// 设置容器环境变量
Map<String, String> env = new HashMap<>();
env.put("CLASSPATH", "./*");
// 设置容器资源
Map<String, String> containerResources = new HashMap<>();
containerResources.put("hive_script.hql", scriptFile.getAbsolutePath());
// 设置容器启动命令
Apps.addToEnvironment(env, "HADOOP_USER_NAME", UserGroupInformation.getCurrentUser().getShortUserName());
Apps.addToEnvironment(env, "HADOOP_HOME", System.getenv("HADOOP_HOME"));
Apps.addToEnvironment(env, "HADOOP_CONF_DIR", System.getenv("HADOOP_CONF_DIR"));
String command = "hive -f hive_script.hql";
containerResources.put("script", scriptFile.getAbsolutePath());
// 设置应用主类
String mainClass = "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster";
// 设置应用主类参数
String[] mainArgs = new String[]{
"--jar", "path/to/your/jar",
"--shell_command", command,
"--shell_env", "CLASSPATH=./*",
"--shell_env", "HADOOP_USER_NAME=" + UserGroupInformation.getCurrentUser().getShortUserName(),
"--shell_env", "HADOOP_HOME=" + System.getenv("HADOOP_HOME"),
"--shell_env", "HADOOP_CONF_DIR=" + System.getenv("HADOOP_CONF_DIR"),
"--container_memory", "1024",
"--num_containers", "1"
};
// 提交应用
ApplicationId appId = app.getNewApplicationResponse().getApplicationId();
Apps.addToEnvironment(env, "APP_ID", appId.toString());
Apps.addToEnvironment(env, "SCRIPT_NAME", "hive_script.hql");
Apps.addToEnvironment(env, "CONTAINER_RESOURCES", containerResources.toString());
Apps.addToEnvironment(env, "CONTAINER_LAUNCH_COMMAND", command);
Apps.addToEnvironment(env, "CONTAINER_ENV", env.toString());
Apps.addToEnvironment(env, "MAIN_CLASS", mainClass);
Apps.addToEnvironment(env, "MAIN_ARGS", mainArgs.toString());
Apps.addToEnvironment(env, "LOCAL_RESOURCES", localResources.toString());
Apps.addToEnvironment(env, "APP_NAME", appName);
// 创建应用主类
YarnClientApplication clientApplication = yarnClient.createApplication();
ApplicationSubmissionContext appContext = clientApplication.getApplicationSubmissionContext();
appContext.setApplicationName(appName);
// 设置资源需求
ResourceRequest resourceRequest = ResourceRequest.newInstance(
Priority.newInstance(0),
ResourceRequest.ANY,
Resource.newInstance(1024, 1),
1
);
appContext.setAMContainerResourceRequest(resourceRequest);
// 设置应用主类信息
ContainerLaunchContext amContainer = ContainerLaunchContext.newInstance(
localResources,
env,
Collections.singletonList(command),
null,
null,
null
);
appContext.setAMContainerSpec(amContainer);
// 提交应用
yarnClient.submitApplication(appContext);
}
private static LocalResource createLocalResource(FileInputStream fileInputStream, Configuration configuration) throws IOException {
// 创建本地资源
LocalResource localResource = Records.newRecord(LocalResource.class);
// 设置本地资源类型
localResource.setType(LocalResourceType.FILE);
// 设置本地资源可见性
localResource.setVisibility(LocalResourceVisibility.APPLICATION);
// 设置本地资源大小
FileStatus fileStatus = FileSystem.get(configuration).getFileStatus(new Path("path/to/hive_script.hql"));
localResource.setSize(fileStatus.getLen());
// 设置本地资源时间戳
localResource.setTimestamp(fileStatus.getModificationTime());
// 设置本地资源位置
Path path = new Path("path/to/hive_script.hql");
URL url = ConverterUtils.getYarnUrlFromURI(path.toUri());
localResource.setResource(url);
return localResource;
}
}
```
上述代码中,主要步骤如下:
1. 初始化YARN配置
2. 创建YARN客户端
3. 创建YARN应用
4. 设置应用名称
5. 创建容器启动命令
6. 设置本地资源
7. 设置容器环境变量
8. 设置容器资源
9. 设置容器启动命令
10. 设置应用主类
11. 设置应用主类参数
12. 提交应用
这个示例中,我们使用了`hive -f`命令来执行Hive SQL脚本,你需要将`my_table`替换为你的表名,将`path/to/hive_script.hql`替换为你的Hive SQL脚本所在的路径,将`path/to/your/jar`替换为你的Java程序的jar包路径。
阅读全文