Logo coherent WaveBurst  
Library Reference Guide
Logo
cwb_condor_rescue.C
Go to the documentation of this file.
1 /*
2 # Copyright (C) 2019 Gabriele Vedovato
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17 
18 
19 // used by the cwb_condor_rescue
20 
21 {
22  #include <vector>
23 
25 
26  TB.checkFile(gSystem->Getenv("CWB_ROOTLOGON_FILE"));
27  TB.checkFile(gSystem->Getenv("CWB_PARAMETERS_FILE"));
28  TB.checkFile(gSystem->Getenv("CWB_UPARAMETERS_FILE"));
29 
30  char full_condor_dir[1024];
31  sprintf(full_condor_dir,"%s/%s",work_dir,condor_dir);
32 
33  vector<int> jobList=TB.getCondorJobList(full_condor_dir, data_label);
34 
35  int max_jobs = jobList.size();
36 
37  int jobStart[max_jobs+1];
38  int jobStop[max_jobs+1];
39 
40  bool jobIdStatus[max_jobs+1];
41  for (int i=0;i<max_jobs+1;i++) jobIdStatus[i]=false;
42 
43  char tag[256];sprintf(tag,"%s.dag.rescue.",data_label);
44  vector<TString> fileList = TB.getFileListFromDir(condor_dir, tag, "", "_wave");
45  int iversion=0;
46  for(int i=0;i<fileList.size();i++) {
47  //cout << i << " " << fileList[i].Data() << endl;
48  TObjArray* token = TString(fileList[i]).Tokenize(TString("."));
49  TObjString* srescueID = (TObjString*)token->At(token->GetEntries()-1);
50  if(srescueID->GetString().IsDigit()) {
51  cout << i << " " << fileList[i].Data() << endl;
52  int rescueID = srescueID->GetString().Atoi();
53  if(iversion<rescueID) iversion=rescueID;
54  }
55  }
56  iversion++;
57 
58  char ofile[1024];
59  sprintf(ofile,"%s/%s.dag.rescue.%d",condor_dir,data_label,iversion);
60 
61  // Check if file exist
62  Long_t id,size,flags,mt;
63  int estat = gSystem->GetPathInfo(ofile,&id,&size,&flags,&mt);
64  if (estat==0) {
65  char answer[256];
66  strcpy(answer,"");
67  do {
68  cout << "File \"" << ofile << "\" already exist" << endl;
69  cout << "Do you want to overwrite the file ? (y/n) ";
70  cin >> answer;
71  cout << endl << endl;
72  } while ((strcmp(answer,"y")!=0)&&(strcmp(answer,"n")!=0));
73  if (strcmp(answer,"n")==0) {
74  exit(0);
75  }
76  }
77 
78  int nrescue=0;
79  char ifile_name[1024];
80  for (int i=1;i<=max_jobs;i++) {
81  sprintf(ifile_name,"%s/%s/%d_%s.err",work_dir,log_dir,jobList[i],data_label);
82  //cout << ifile_name << endl;
83  Long_t id,size,flags,mt;
84  int estat = gSystem->GetPathInfo(ifile_name,&id,&size,&flags,&mt);
85  if (estat==0) {
86  if (size>0) cout << ifile_name << endl;
87  //cout << size << endl;
88  if (size>0) {jobIdStatus[i]=true;nrescue++;}
89  }
90  }
91  if(nrescue==0) {
92  cout << "Unfinished Error Jobs : " << 0 << "/" << max_jobs << endl;
93  exit(0);
94  }
95  cout << endl;
96  cout << "New Rescue File " << endl;
97  cout << ofile << endl;
98 
99  char full_condor_dir[1024];
100  sprintf(full_condor_dir,"%s/%s",work_dir,condor_dir);
101 
102  ofstream out;
103  out.open(ofile,ios::out);
104  int cnt = 0;
105  for (int i=0;i<=max_jobs;i++) {
106  if (jobIdStatus[i]) {
107  cnt++;
108  char ostring[256];
109  int jobID=jobList[i];
110  sprintf(ostring,"JOB A%i %s/%s.sub",jobID,full_condor_dir,data_label);
111  out << ostring << endl;
112  sprintf(ostring,"VARS A%i PID=\"%i\"",jobID,jobID);
113  out << ostring << endl;
114  sprintf(ostring,"RETRY A%i 3000",jobID);
115  out << ostring << endl;
116  }
117  }
118  out.close();
119 
120  cout << "Unfinished Error Jobs : " << cnt << "/" << max_jobs << endl;
121  cout << endl;
122  cout << "To submit condor rescued jobs type :" << endl;
123  cout << "cd " << condor_dir << endl;
124  sprintf(ofile,"%s.dag.rescue.%d",data_label,iversion);
125  cout << "condor_submit_dag " << ofile << endl;
126  cout << endl;
127 
128  exit(0);
129 }
char ofile[1024]
static vector< TString > getFileListFromDir(TString dir_name, TString endString="", TString beginString="", TString containString="", bool fast=false)
Definition: Toolbox.cc:5108
TString("c")
vector< int > jobList
ofstream out
Definition: cwb_merge.C:214
Long_t size
CWB::Toolbox TB
int estat
i drho i
static bool checkFile(TString fName, bool question=false, TString message="")
Definition: Toolbox.cc:4670
int jobID
Definition: cwb_net.C:195
Long_t id
char data_label[512]
Definition: test_config1.C:160
int max_jobs
int jobStop[max_jobs+1]
TObjArray * token
char ifile_name[1024]
char log_dir[512]
Definition: test_config1.C:151
sprintf(full_condor_dir,"%s/%s", work_dir, condor_dir)
char tag[256]
Definition: cwb_merge.C:92
Long_t flags
int jobStart[max_jobs+1]
char answer[256]
static vector< int > getCondorJobList(TString condor_dir, TString label)
Definition: Toolbox.cc:1398
cout<< "Starting reading output directory ..."<< endl;vector< TString > fileList
iversion
strcpy(RunLabel, RUN_LABEL)
int cnt
int nrescue
char condor_dir[512]
Definition: test_config1.C:148
Long_t mt
char work_dir[512]
Definition: test_config1.C:143
char full_condor_dir[1024]
bool jobIdStatus[max_jobs+1]
exit(0)