Logo coherent WaveBurst  
Library Reference Guide
Logo
cwb_condor_check.C
Go to the documentation of this file.
1 /*
2 # Copyright (C) 2019 Gabriele Vedovato
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17 
18 
19 // check job finished (look history saved in the output root file) : used by the cwb_condor command
20 
21 {
22  #include <vector>
23 
25 
26  TB.checkFile(gSystem->Getenv("CWB_ROOTLOGON_FILE"));
27  TB.checkFile(gSystem->Getenv("CWB_PARAMETERS_FILE"));
28  TB.checkFile(gSystem->Getenv("CWB_UPARAMETERS_FILE"));
29 
30 /*
31  vector<slag> slagList;
32  vector<slag> rslagList;
33 
34  vector<TString> ifos(nIFO);
35  for(int n=0;n<nIFO;n++) ifos[n]=ifo[n];
36 
37  char* slagFile = new char[256];
38  sprintf(slagFile,"%s/%s.slag",dump_dir,data_label);
39 
40  vector<waveSegment> cat1List=TB.readSegList(nDQF, DQF, CWB_CAT1);
41  int slagSegs=TB.getSlagJobList(cat1List, segLen).size();
42 
43  slagList=TB.getSlagList(nIFO, slagSize, slagSegs, slagOff, slagMin, slagMax, slagSite);
44  rslagList=TB.getSlagList( slagList, ifos, segLen, segMLS, segEdge, nDQF, DQF, CWB_CAT1);
45  rslagList=TB.getSlagList(rslagList, ifos, segLen, segTHR, segEdge, nDQF, DQF, CWB_CAT2);
46 
47  bool* bslag = new bool[slagList.size()];
48  for(int i=0;i<slagList.size();i++) bslag[i]=false;
49  int* jobStatus = new int[slagList.size()];
50  for(int i=0;i<slagList.size();i++) jobStatus[i]=0;
51  for(int i=0;i<rslagList.size();i++) {
52 // printf("%14d %14d", rslagList[i].jobId, rslagList[i].slagId[0]);
53 // for (int n=0; n<nIFO; n++) printf("%14d",rslagList[i].segId[n]);
54 // printf("\n");
55  bslag[rslagList[i].jobId]=true;
56  jobStatus[rslagList[i].jobId]=1;
57  }
58 */
59 
60  if(nfactor<=0) nfactor=1; // fix nfactor when nfactor is not defined
61  // get the number of job submit by condor
62  char full_condor_dir[1024];
63  sprintf(full_condor_dir,"%s/%s",work_dir,condor_dir);
64  char condor_dag_file[1024];
65  sprintf(condor_dag_file,"%s/%s%s.dag",full_condor_dir,data_label,"");
66  Long_t id,size=0,flags,mt;
67  int estat = gSystem->GetPathInfo(condor_dag_file,&id,&size,&flags,&mt);
68  vector<int> jobList;
69  int* jobStatus = NULL;
70  int ncondor_jobs = 0;
71  if (estat==0) {
72  jobList=TB.getCondorJobList(full_condor_dir, data_label);
73  ncondor_jobs = jobList.size();
74  jobStatus = new int[ncondor_jobs+1];
75  for(int i=0;i<=ncondor_jobs;i++) jobStatus[i]=1;
76  } else {
77  cout << "cwb_condor_check: condor dag file not exist, exit" << endl;
78  exit(1);
79  }
80 
81  cout << "Starting reading output directory ..." << endl;
82  vector<TString> fileList = TB.getFileListFromDir(output_dir,".root","","wave_");
83  for(int n=0;n<fileList.size();n++) {
84  //cout << n << " " << fileList[n].Data()<< endl;
85 
86  if (n%1000==0) cout << "cwb_condor check - " << n << "/" << fileList.size() << " files" << endl;
87 
88  TFile ifile(fileList[n]);
89  if(!ifile.IsOpen()) {cout << "Failed to open " << fileList[n].Data() << endl;exit(-1);}
90 
91  CWB::History* ihistory = (CWB::History*)ifile.Get("history");
92  if(ihistory==NULL) ihistory = (CWB::History*)ifile.Get("CWB::History"); // for back compatibility
93  if(ihistory==NULL) { cout << "Error : history is not present!!!" << endl;exit(1); }
94 
95  int log_size = ihistory->GetLogSize((char*)"FULL");
96  TString log = ihistory->GetLog((char*)"FULL",log_size-1);
97 
98  int jobId = TB.getJobId(fileList[n]); // Get JOB ID
99  //cout << jobId << " " << fileList[n].Data() << endl;
100  if(jobId>ncondor_jobs) continue;
101  if(ifile.IsZombie()) {
102  jobStatus[jobId]=2;
103  } else {
104  // Check if "STOP JOB" is in the log history
105  if(log=="STOP JOB") jobStatus[jobId]=3;
106  }
107  }
108 
109  int njobCondor=0;
110  int njobZombie=0;
112  for(int i=1;i<=ncondor_jobs;i++) {
113  if(jobStatus[i]==1) njobCondor++;
114  if(jobStatus[i]==2) njobZombie++;
115  if(jobStatus[i]==3) njobProcessed++;
116  }
117 // cout << "njobZombie : " << njoZombie << endl;
118  cout << endl;
119  cout << "Number of Jobs : " << nfactor*ncondor_jobs << endl;
120  cout << "Number of Processed Jobs : " << njobProcessed << endl;
121  cout << "Number of Jobs to be Processed : " << njobCondor << endl;
122  cout << endl;
123 
124  delete [] jobStatus;
125 
126  exit(0);
127 }
int njobProcessed
exit(1)
static vector< TString > getFileListFromDir(TString dir_name, TString endString="", TString beginString="", TString containString="", bool fast=false)
Definition: Toolbox.cc:5108
char condor_dag_file[1024]
int n
Definition: cwb_net.C:28
TString("c")
char full_condor_dir[1024]
CWB::Toolbox TB
Long_t flags
Long_t size
i drho i
static bool checkFile(TString fName, bool question=false, TString message="")
Definition: Toolbox.cc:4670
char data_label[512]
Definition: test_config1.C:160
int ncondor_jobs
vector< int > jobList
char * GetLog(char *Stage, int index)
Definition: History.cc:304
int GetLogSize(char *Stage)
Definition: History.cc:298
bool log
Definition: WaveMDC.C:41
TFile * ifile
static int getJobId(TString file, TString fext="root")
Definition: Toolbox.cc:6697
int njobZombie
static vector< int > getCondorJobList(TString condor_dir, TString label)
Definition: Toolbox.cc:1398
cout<< "Starting reading output directory ..."<< endl;vector< TString > fileList
int estat
int nfactor
Definition: test_config1.C:83
Long_t mt
int jobId
Long_t id
char condor_dir[512]
Definition: test_config1.C:148
char work_dir[512]
Definition: test_config1.C:143
sprintf(full_condor_dir,"%s/%s", work_dir, condor_dir)
char output_dir[512]
Definition: test_config1.C:146
CWB::History * ihistory
int * jobStatus
int njobCondor