diff --git a/common/src/main/java/net/qihoo/hbox/api/HboxConstants.java b/common/src/main/java/net/qihoo/hbox/api/HboxConstants.java index fb0fe2e..98d18ce 100644 --- a/common/src/main/java/net/qihoo/hbox/api/HboxConstants.java +++ b/common/src/main/java/net/qihoo/hbox/api/HboxConstants.java @@ -106,6 +106,8 @@ enum Environment { HBOX_FILES_LOCATION("HBOX_FILES_LOCATION"), + HBOX_ARCHIVE_FILES_LOCATION("HBOX_ARCHIVE_FILES_LOCATION"), + HBOX_LIBJARS_LOCATION("HBOX_LIBJARS_LOCATION"), APP_JAR_LOCATION("APP_JAR_LOCATION"), diff --git a/core/src/main/java/net/qihoo/hbox/AM/ApplicationMaster.java b/core/src/main/java/net/qihoo/hbox/AM/ApplicationMaster.java index 240f818..79a8230 100644 --- a/core/src/main/java/net/qihoo/hbox/AM/ApplicationMaster.java +++ b/core/src/main/java/net/qihoo/hbox/AM/ApplicationMaster.java @@ -89,6 +89,8 @@ public class ApplicationMaster extends CompositeService { private Path appConfRemoteLocation; // location of files on HDFS private String appFilesRemoteLocation; + // location of archive files on HDFS + private String appArchiveFilesRemoteLocation; // location of lib jars on HDFS private String appLibJarsRemoteLocation; // location of cacheFiles on HDFS @@ -322,6 +324,11 @@ public void run() { LOG.info("Application files location: " + appFilesRemoteLocation); } + if (envs.containsKey(HboxConstants.Environment.HBOX_ARCHIVE_FILES_LOCATION.toString())) { + appArchiveFilesRemoteLocation = envs.get(HboxConstants.Environment.HBOX_ARCHIVE_FILES_LOCATION.toString()); + LOG.info("Application archive files location: " + appArchiveFilesRemoteLocation); + } + if (envs.containsKey(HboxConstants.Environment.HBOX_LIBJARS_LOCATION.toString())) { appLibJarsRemoteLocation = envs.get(HboxConstants.Environment.HBOX_LIBJARS_LOCATION.toString()); LOG.info("Application lib Jars location: " + appLibJarsRemoteLocation); @@ -1365,6 +1372,40 @@ private void buildContainerLocalResource() { } } + if (appArchiveFilesRemoteLocation != null) { + String[] archiveFiles = StringUtils.split(appArchiveFilesRemoteLocation, ","); + for (String archiveFile : archiveFiles) { + Path pathRemote; + String aliasName; + if (archiveFile.contains("#")) { + String[] paths = StringUtils.split(archiveFile, "#"); + if (paths.length != 2) { + throw new RuntimeException("Error cacheArchive path format " + appArchiveFilesRemoteLocation); + } + pathRemote = new Path(paths[0]); + aliasName = paths[1]; + } else { + pathRemote = new Path(archiveFile); + aliasName = pathRemote.getName(); + } + URI pathRemoteUri = pathRemote.toUri(); + if (Boolean.parseBoolean(conf.get(HboxConfiguration.HBOX_APPEND_DEFAULTFS_ENABLE, String.valueOf(HboxConfiguration.DEFAULT_HBOX_APPEND_DEFAULTFS_ENABLE)))) { + if (pathRemoteUri.getScheme() == null || pathRemoteUri.getHost() == null) { + pathRemote = new Path(defaultUri.toString(), pathRemote.toString()); + } + } + LOG.info("archive file remote path is " + pathRemote + " and alias name is " + aliasName); + containerLocalResource.put(aliasName, + Utilities.createApplicationResource(pathRemote.getFileSystem(conf), + pathRemote, + LocalResourceType.ARCHIVE, + LocalResourceVisibility.APPLICATION)); + if (hboxAppType.equals("MPI") || hboxAppType.equals("TENSORNET") || hboxAppType.equals("HOROVOD")) { + reLinkFiles.append(aliasName).append(","); + } + } + } + if (appLibJarsRemoteLocation != null) { String[] tfFiles = StringUtils.split(appLibJarsRemoteLocation, ","); for (String file : tfFiles) { @@ -1491,6 +1532,10 @@ private Map buildContainerEnv(String role) { } else { containerEnv.put("CLASSPATH", System.getenv("CLASSPATH") + ":" + libJarsClassPath); } + + // for libhdfs.so + containerEnv.computeIfPresent("CLASSPATH", (k, v) -> v + ":$(hadoop classpath --glob)"); + containerEnv.put(HboxConstants.Environment.APP_ATTEMPTID.toString(), applicationAttemptID.toString()); containerEnv.put(HboxConstants.Environment.APP_ID.toString(), applicationAttemptID.getApplicationId().toString()); @@ -1624,8 +1669,8 @@ private void launchMpiExec() throws IOException { // MPI related options // bind-to none option envLists.add("OMPI_MCA_hwloc_base_binding_policy=none"); - // -mca plm_rsh_agent /bin/echo - envLists.add("OMPI_MCA_plm_rsh_agent=/bin/echo"); + // -mca plm_rsh_agent /bin/true + envLists.add("OMPI_MCA_plm_rsh_agent=/bin/true"); // -mca plm_base_verbose 1 envLists.add("OMPI_MCA_plm_base_verbose=1"); // --oversubscribe @@ -1699,7 +1744,7 @@ public void run() { * @return */ private void processMpiExecOutput(String mpiExecOutput) { - if (mpiExecOutput.startsWith("command") || mpiExecOutput.contains("