// Copyright (C) 2011-2012 EDF R&D // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // See http://www.salome-platform.org/ or email : webmaster.salome@opencascade.com // // Authors : Guillaume Boulant (EDF) - 01/03/2011 #ifdef WIN32 #include #include #else #include #endif #include "MeshJobManager_i.hxx" #include #include CORBA_SERVER_HEADER(SALOME_Exception) #include "Basics_Utils.hxx" // For standard logging #undef LOG #include "SALOME_KernelServices.hxx" // For CORBA logging #undef LOG #define LOG STDLOG // // ==================================================================== // General purpose helper functions (to put elsewhere at least) // ==================================================================== // /*! * This function must be used to associate a datetime tag to a job */ #ifndef WIN32 static long timetag() { timeval tv; gettimeofday(&tv,0); long tag = tv.tv_usec + tv.tv_sec*1000000; return tag; } #endif /*! * This function returns true if the string text starts with the string * token. */ static bool myStartsWith(const std::string& text,const std::string& token){ if(text.length() < token.length()) return false; return (text.compare(0, token.length(), token) == 0); } // // ==================================================================== // Constructor/Destructor // ==================================================================== // MeshJobManager_i::MeshJobManager_i(CORBA::ORB_ptr orb, PortableServer::POA_ptr poa, PortableServer::ObjectId * contId, const char *instanceName, const char *interfaceName) : Engines_Component_i(orb, poa, contId, instanceName, interfaceName) { LOG("Activating MESHJOB::MeshJobManager object"); _thisObj = this ; _id = _poa->activate_object(_thisObj); _salomeLauncher = KERNEL::getSalomeLauncher(); if(CORBA::is_nil(_salomeLauncher)){ LOG("The SALOME launcher can't be reached ==> STOP"); throw KERNEL::createSalomeException("SALOME launcher can't be reached"); } _resourcesManager = KERNEL::getResourcesManager(); if(CORBA::is_nil(_resourcesManager)){ LOG("The SALOME resource manager can't be reached ==> STOP"); throw KERNEL::createSalomeException("The SALOME resource manager can't be reached"); } } MeshJobManager_i::~MeshJobManager_i() { LOG("MeshJobManager_i::~MeshJobManager_i()"); } // // ==================================================================== // Helper functions to deals with the local and remote file systems // ==================================================================== // #include // to get the file streams #ifdef WNT #include // to get _splitpath #include // to get _mkdir #else #include // to get basename #include // to get mkdir #include // to get mkdir options #endif #include // to get system and getenv static std::string OUTPUTFILE("output.med"); static std::string DATAFILE("data.txt"); static std::string SCRIPTFILE("padder.sh"); static std::string SEPARATOR(" "); static std::string USER(getenv("USER")); static std::string LOCAL_INPUTDIR("/tmp/spadder.local.inputdir."+USER); static std::string LOCAL_RESULTDIR("/tmp/spadder.local.resultdir."+USER); static std::string REMOTE_WORKDIR("/tmp/spadder.remote.workdir."+USER); /*! * This function creates the padder text input file containing the * input data (list of filenames and groupnames) and returns the path * of the created file. This function is the one that knows the format * of the padder input file. If the input file format changes, then * this function (and only this one) should be updated. */ const char * MeshJobManager_i::_writeDataFile(std::vector listConcreteMesh, std::vector listSteelBarMesh) { #ifdef WIN32 _mkdir(LOCAL_INPUTDIR.c_str()); #else mkdir(LOCAL_INPUTDIR.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); #endif // Make it static so that it's allocated once (constant name) static std::string * dataFilename = new std::string(LOCAL_INPUTDIR+"/"+DATAFILE); std::ofstream dataFile(dataFilename->c_str()); // We first specify the concrete mesh data (filename and groupname) std::string line; #ifdef WIN32 char fname[ _MAX_FNAME ]; _splitpath( listConcreteMesh[0].file_name, NULL, NULL, fname, NULL ); char* bname = &fname[0]; #else char* bname = basename(listConcreteMesh[0].file_name); #endif line = std::string(bname) + " " + std::string(listConcreteMesh[0].group_name); dataFile << line.c_str() << std::endl; // Note that we use here the basename because the files are supposed // to be copied in the REMOTE_WORKDIR for execution. // The, we can specify the steelbar mesh data, starting by the // number of meshes int nbSteelBarMesh=listSteelBarMesh.size(); line = std::string("nbSteelbarMesh") + SEPARATOR + ToString(nbSteelBarMesh); dataFile << line.c_str() << std::endl; for (int i=0; ic_str(); } /*! * This function creates a shell script that runs padder whith the * specified data file, and returns the path of the created script * file. The config id is used to retrieve the path to the binary file * and other required files. */ const char* MeshJobManager_i::_writeScriptFile(const char * dataFileName, const char * configId) { #ifdef WIN32 _mkdir(LOCAL_INPUTDIR.c_str()); #else mkdir(LOCAL_INPUTDIR.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); #endif // Make it static so that it's allocated once (constant name) static std::string * scriptFilename = new std::string(LOCAL_INPUTDIR+"/"+SCRIPTFILE); char * binpath = _configMap[configId].binpath; char * envpath = _configMap[configId].envpath; #ifdef WIN32 char fname[ _MAX_FNAME ]; _splitpath( dataFileName, NULL, NULL, fname, NULL ); const char* bname = &fname[0]; #else const char* bname = basename(dataFileName); #endif std::ofstream script(scriptFilename->c_str()); script << "#!/bin/sh" << std::endl; script << "here=$(dirname $0)" << std::endl; script << ". " << envpath << std::endl; script << binpath << " $here/" << bname << std::endl; // Note that we use the basename of the datafile because all data // files are supposed to have been copied in the REMOTE_WORKDIR. script.close(); return scriptFilename->c_str(); } // // ==================================================================== // Functions to initialize and supervise the mesh computation job // ==================================================================== // bool MeshJobManager_i::configure(const char *configId, const MESHJOB::ConfigParameter & configParameter) { beginService("MeshJobManager_i::configure"); _configMap[configId] = configParameter; LOG("Adding configuration for " << configId); LOG("- binpath = " << _configMap[configId].binpath); LOG("- envpath = " << _configMap[configId].envpath); endService("MeshJobManager_i::configure"); return true; } long MeshJobManager_i::JOBID_UNDEFINED = -1; /*! Initialize a smesh computation job and return the job identifier */ CORBA::Long MeshJobManager_i::initialize(const MESHJOB::MeshJobParameterList & meshJobParameterList, const char * configId) { beginService("MeshJobManager_i::initialize"); std::cerr << "##################################### initialize" << std::endl; std::cerr << "#####################################" << std::endl; // // We first analyse the CORBA sequence to store data in C++ vectors // std::vector listConcreteMesh; std::vector listSteelBarMesh; for(CORBA::ULong i=0; iremote_workdir = (REMOTE_WORKDIR + "." + ToString(jobDatetimeTag)).c_str(); // // Then, we have to create the padder input data file. This input // data is a text file containing the list of file names and group // names. // const char * dataFilename = this->_writeDataFile(listConcreteMesh, listSteelBarMesh); LOG("dataFilename = " << dataFilename); const char * scriptFilename = this->_writeScriptFile(dataFilename, configId); LOG("scriptFilename = " << scriptFilename); // // Then, the following instructions consists in preparing the job // parameters to request the SALOME launcher for creating a new // job. // Engines::JobParameters_var jobParameters = new Engines::JobParameters; jobParameters->job_type = CORBA::string_dup("command"); // CAUTION: the job_file must be a single filename specifying a // self-consistent script to be executed without any argument on the // remote host. jobParameters->job_file = CORBA::string_dup(scriptFilename); // // Specification of the working spaces: // // - local_directory: can be used to specify where to find the input // files on the local resource. It's optionnal if you specify the // absolute path name of input files. // // - result_directory: must be used to specify where to download the // output files on the local resources // // - work_directory: must be used to specify the remote directory // where to put all the stuff to run the job. Note that the job // will be executed from within this directory, i.e. a change // directory toward this working directory is done by the batch // system before running the specified job script. // jobParameters->local_directory = CORBA::string_dup(""); jobParameters->result_directory = CORBA::string_dup(jobPaths->local_resultdir); jobParameters->work_directory = CORBA::string_dup(jobPaths->remote_workdir); // We specify the input files that are required to execute the // job_file. If basenames are specified, then the files are supposed // to be located in local_directory. int nbFiles = listSteelBarMesh.size()+2; // The number of input file is: // (nb. of steelbar meshfile) // + (1 concrete meshfile) // + (1 padder input file) // = nb steelbar meshfile + 2 jobParameters->in_files.length(nbFiles); jobParameters->in_files[0] = CORBA::string_dup(listConcreteMesh[0].file_name); for (int i=0; iin_files[1+i] = CORBA::string_dup(listSteelBarMesh[i].file_name); } jobParameters->in_files[1+listSteelBarMesh.size()] = CORBA::string_dup(dataFilename); // Note that all these input files will be copied in the // REMOTE_WORKDIR on the remote host // Then, we have to specify the existance of an output // filenames. The path is supposed to be a path on the remote // resource, i.e. where the job is executed. jobParameters->out_files.length(1); std::string outputfile_name = std::string(jobPaths->remote_workdir)+"/"+OUTPUTFILE; jobParameters->out_files[0] = CORBA::string_dup(outputfile_name.c_str()); // CAUTION: the maximum duration has to be set with a format like "hh:mm" jobParameters->maximum_duration = CORBA::string_dup("01:00"); jobParameters->queue = CORBA::string_dup(""); // Setting resource and additionnal properties (if needed) // The resource parameters can be initiated from scratch, for // example by specifying the values in hard coding: // >>> //jobParameters->resource_required.name = CORBA::string_dup("localhost"); //jobParameters->resource_required.hostname = CORBA::string_dup("localhost"); //jobParameters->resource_required.mem_mb = 1024 * 10; //jobParameters->resource_required.nb_proc = 1; // <<< // But it's better to initiate these parameters from a resource // definition known by the resource manager. This ensures that the // resource will be available: //const char * resourceName = "localhost"; //const char * resourceName = "boulant@claui2p1"; //const char * resourceName = "nepal@nepal"; const char * resourceName = _configMap[configId].resname; Engines::ResourceDefinition * resourceDefinition = _resourcesManager->GetResourceDefinition(resourceName); // CAUTION: This resource should have been defined in the // CatalogResource.xml associated to the SALOME application. // // Then, the values can be used to initiate the resource parameters // of the job: jobParameters->resource_required.name = CORBA::string_dup(resourceDefinition->name.in()); // CAUTION: the additionnal two following parameters MUST be // specified explicitly, because they are not provided by the // resource definition: jobParameters->resource_required.mem_mb = resourceDefinition->mem_mb; jobParameters->resource_required.nb_proc = resourceDefinition->nb_proc_per_node; // CAUTION: the parameter mem_mb specifies the maximum memory value // that could be allocated for executing the job. This takes into // account not only the data that could be loaded by the batch // process but also the linked dynamic library. // // A possible problem, for exemple in the case where you use the ssh // emulation of a batch system, is to get an error message as below // when libBatch try to run the ssh command: // // ## /usr/bin/ssh: error while loading shared libraries: libcrypto.so.0.9.8: failed // ## to map segment from shared object: Cannot allocate memory // // In this exemple, the mem_mb was set to 1MB, value that is not // sufficient to load the dynamic libraries linked to the ssh // executable (libcrypto.so in the error message). // // So, even in the case of a simple test shell script, you should // set this value at least to a standard threshold as 500MB int jobId = JOBID_UNDEFINED; try { std::cerr << "#####################################" << std::endl; std::cerr << "#####################################" << std::endl; std::cerr << "jobUndef = " << JOBID_UNDEFINED << std::endl; jobId = _salomeLauncher->createJob(jobParameters); std::cerr << "#####################################" << std::endl; std::cerr << "#####################################" << std::endl; std::cerr << "#####################################" << std::endl; std::cerr << "jobId = " << jobId << std::endl; // We register the datetime tag of this job _jobDateTimeMap[jobId]=jobDatetimeTag; _jobPathsMap[jobId] = jobPaths; } catch (const SALOME::SALOME_Exception & ex) { LOG("SALOME Exception in createJob !" <launchJob(jobId); } catch (const SALOME::SALOME_Exception & ex) { LOG("SALOME Exception in launchjob !" <getJobState(jobId); } catch (const SALOME::SALOME_Exception & ex) { LOG("SALOME Exception in getJobState !"); state = ex.details.text; } catch (const CORBA::SystemException& ex) { LOG("Receive SALOME System Exception: " << ex); state="SALOME System Exception - see logs"; } LOG("jobId="<getPaths(jobId); std::string local_resultdir(jobPaths->local_resultdir); result->results_dirname = local_resultdir.c_str(); try { _salomeLauncher->getJobResults(jobId, local_resultdir.c_str()); // __BUG__: to prevent from a bug of the MED driver (SALOME // 5.1.5), we change the basename of the output file to force the // complete reloading of data by the med driver. long jobDatetimeTag = _jobDateTimeMap[jobId]; std::string outputFileName = "output"+ToString(jobDatetimeTag)+".med"; rename((local_resultdir+"/"+OUTPUTFILE).c_str(), (local_resultdir+"/"+outputFileName).c_str()); result->outputmesh_filename = outputFileName.c_str(); result->status = "OK"; } catch (const SALOME::SALOME_Exception & ex) { LOG("SALOME Exception in getResults !"); result->status = "SALOME Exception in getResults !"; } catch (const CORBA::SystemException& ex) { LOG("Receive CORBA System Exception: " << ex); result->status = "Receive CORBA System Exception: see log"; } endService("MeshJobManager_i::getResults"); return result; } /*! Clean all data associated to this job and remove the job from the launch manager */ bool MeshJobManager_i::clean(CORBA::Long jobId) { beginService("MeshJobManager_i::clean"); // __GBO__ WORK IN PROGRESS: we just clean the temporary local // directories. The remote working directories are tag with the // execution datetime and the we prevent the task from conflict // with files of another task. MESHJOB::MeshJobPaths * jobPaths = this->getPaths(jobId); if ( jobPaths == NULL ) return false; // WARN: !!!!! // For safety reason (and prevent from bug that could erase the // filesystem), we cancel the operation in the case where the // directories to delete are not in the /tmp folder. std::string shell_command("rm -rf "); std::string inputdir(jobPaths->local_inputdir); std::string resultdir(jobPaths->local_resultdir); if ( !myStartsWith(inputdir,"/tmp/") ) { LOG("WRN: The directory "< * MeshJobManager_i::_getResourceNames() { // // These part is just to control the available resources // Engines::ResourceParameters params; KERNEL::getLifeCycleCORBA()->preSet(params); Engines::ResourceList * resourceList = _resourcesManager->GetFittingResources(params); Engines::ResourceDefinition * resourceDefinition = NULL; LOG("### resource list:"); std::vector* resourceNames = new std::vector(); if (resourceList) { for (int i = 0; i < resourceList->length(); i++) { const char* aResourceName = (*resourceList)[i]; resourceNames->push_back(std::string(aResourceName)); LOG("resource["<GetResourceDefinition(aResourceName); LOG("protocol["<protocol); } } // Note: a ResourceDefinition is used to create a batch configuration // in the Launcher. This operation is done at Launcher startup from // the configuration file CatalogResources.xml provided by the // SALOME application. // In the code instructions, you just have to choose a resource // configuration by its name and then define the ResourceParameters // that specify additionnal properties for a specific job submission // (use the attribute resource_required of the JobParameters). return resourceNames; } // // ========================================================================== // Factory services // ========================================================================== // extern "C" { PortableServer::ObjectId * MeshJobManagerEngine_factory( CORBA::ORB_ptr orb, PortableServer::POA_ptr poa, PortableServer::ObjectId * contId, const char *instanceName, const char *interfaceName) { LOG("PortableServer::ObjectId * MeshJobManagerEngine_factory()"); MeshJobManager_i * myEngine = new MeshJobManager_i(orb, poa, contId, instanceName, interfaceName); return myEngine->getId() ; } }