cwb_xgboost
This page provides a description of the cwb_xgboost command.
cwb_xgboost(no arguments) Prints help.
cwb_xgboost xgb_type 'xgb_options'Runs the required XGBoost functionality.
xgb_type = datasplit/tuning/training/mlabel(prediction)/reportThe different XGBoost functionality types available with
cwb_xgboostFor each
xgb_type, the followingxgb_optionscan be passed while running thecwb_xgboostcommand.
- datasplit - xgb_options = '--mlabel merge_label --ifrac (trainig[ifrac]/testing[1-ifrac]) (--ulabel user_label --verbose false/true)' (--cfile chunk list --wavecuts training selection cuts (wave)) (--mdccuts training selection cuts (mdc)) - tuning - xgb_options = '--nfile wave_bkg.root/.py --sfile wave_sim.root /.py --ofile output.txt --search bbh/imbhb/blf/bhf/bld --nfrac bkg_fraction_selection --sfrac sim_fraction_selection (--config xgb_config.py --verbose false/true) --learning-rate 0.03 --max-depth 8 --min-child-weight 5.0 --gamma 1 --colsample-bytree 0.9 --subsample 0.6' - training - xgb_options = '--nfile wave_bkg.root/.py --sfile wave_sim.root/.py --model xgb_model.dat --search bbh/imbhb/blf/bhf/bld --nfrac bkg_fraction_selection --sfrac sim_fraction_selection (--config xgb_config.py --verbose false/true)' --dump false/true)' - mlabel (prediction) - xgb_options = '--model xgb_model.dat --ulabel user_label --rthr output_rhor_threshold' --search bbh/imbhb/blf/bhf/bld (--fgetrhor user_getrhor --verbose false/true --config xgb_config.py)' - report - xgb_options = '--type tuning/prediction(training not yet implemented, all training report plots are stored when --dump true is set during training) (--ulabel user_label --verbose false/true --config config.py)' note: --config option is used only for prediction. To get more help type: 'cwb_xgboost report help'
Datasplit
cwb_xgboost datasplit '--mlabel M1.V_hvetoLH.C_xgb_cut --ifrac -100 --verbose true --cfile $CWB_CONFIG/O3a/CHUNKS/BBH//Chunk_List.lst'Tuning
cwb_xgboost tuning '--nfile nfile.py --sfile sfile.py --ofile data/output.out --nfrac 0.2 --sfrac 0.2 --search bbh --config config/user_xgboost_config_r1.py --learning-rate 0.03 --max-depth 13 --min-child-weight 10.0 --gamma 2 --colsample-bytree 1 --subsample 0.6 --balance-slope q=1 --balance-balance A=10 --caps-rho0 11'Training
cwb_xgboost training '--nfile nfile.py --sfile sfile.py --model xgb/v1/xgb_model_name_v1.dat --nfrac 1.0 --sfrac 1.0 --search bbh --verbose true --dump true --config config/user_xgboost_config_v1.py'Prediction/Testing
cwb_xgboost M1.V_hvetoLH.C_xgb_cut.XK_Test_Y100_TXXXX '--model /out_path/XGB/model_dir_XGB/xgb/v1/xgb_model_name_v1.dat --ulabel v1 --rthr 0 --search bbh'Report
cwb_xgboost report '--type prediction --subtype hrho/lrho/hchirp/qaqp/roc/efreq --config report_config.py'cwb_xgboost report '--type tuning --ulabel r2 --verbose true'
cwb_xgboost report help# -----------------------------------------------------------------------------------------------------------
#
# Report Prediction Example:
#
# cwb_xgboost report '--type prediction --subtype type --config report_config.py'
#
# where: type = hrho, lrho, hchirp, qaqp, roc, efreq
#
# to select a more subtype plots uses (eg: roc,efreq):
#
# cwb_xgboost report '--type prediction --subtype roc/efreq --config report_config.py'
#
# -----------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------
# report configuration file: report_config.py
# --------------------------------------------------------------------------------
# ----------------------------------
# input root files
# ----------------------------------
wave_rhor_fname = 'merge/wave_wlabel.M1.V_hvetoLH.XGB_rthr3_v0.S_ifar.root' # wave root file with xgb statistic (all)
wave_rho0_fname = 'merge/wave_wlabel.M1.V_hvetoLH.S_bin1a_cut.S_bin1b_cut.S_bin2_cut.root' # wave root file with pp-cuts statistic (roc/efreq)
mdc_rhor_fname=wave_rhor_fname.replace("wave_","mdc_") # mdc root file with xgb statistic (roc)
mdc_rho0_fname=wave_rho0_fname.replace("wave_","mdc_") # mdc root file with pp-cuts statistic (roc)
sim_rhor_fname = wave_rhor_fname
bkg_rhor_fname = 'path/wlabel/merge/wave_wlabel.M1.V_hvetoLH.C_xgb_cut.X_Test_F50.XGB_rthr3_v0.root' # background root file (hrho/lrho/hchirp/qaqp)
# -----------------------------------------------------------
# roc: efficiency vs far
#
# plot_fname: output plot file name
# far_inf: far inferior limit
# far_sup: far superior limit
# -----------------------------------------------------------
PLOT['roc'] = 'plot_fname=xgb/v0/roc_new_format.png far_inf=0.001 far_sup=10'
ROC['GA:rhor'] = 'wave_fname='+wave_rhor_fname+' line_color=red wave_cuts=type[1]>=1&&type[1]<=4&&factor!=0'
ROC['GA:rho0'] = 'wave_fname='+wave_rho0_fname+' line_color=blue wave_cuts=type[1]>=1&&type[1]<=4&&factor!=0 trials_factor=3'
ROC['GA:rhor'] += ' mdc_fname='+mdc_rhor_fname+' mdc_cuts=type>=1&&type<=4&&factor!=0'
ROC['GA:rho0'] += ' mdc_fname='+mdc_rho0_fname+' mdc_cuts=type>=1&&type<=4&&factor!=0'
# -----------------------------------------------------------
# qaqp: Qa vs Qp
#
# plot_fname: output plot file name
# rho_name: rho name (default = 'rho0') used to select the background entries (rho_name>rho_thr)
# rho_thr: rho threshold (default = 8) used to select the background entries (rho_name>rho_thr)
# rho_label: rho label (default = 'rho0') used in the legend
# qfactor: qfactor value (default = 0.15) used to draw the dashed line Qa = qfactor/(Qp-qoffset)
# qoffset: qoffset value (default = 0.8) used to draw the dashed line Qa = qfactor/(Qp-qoffset)
# -----------------------------------------------------------
PLOT['qaqp'] = 'plot_fname=xgb/v0/qaqp_new_format.png rho_name=rho0 rho_thr=9 rho_label=rho0 qfactor=0.15 qoffset=0.6 qa_sup=6.0 qp_sup=10.0'
QAQP['sim'] = 'wave_fname='+wave_rhor_fname+' wave_cuts=type[1]>=1&&type[1]<=4&&factor!=0'
QAQP['bkg'] = 'wave_fname='+bkg_rhor_fname+' marker_color=red marker_size=150'
# -----------------------------------------------------------
# efreq: efficiency vs frequency[0]
#
# plot_fname: output plot file name
# fmin: xaxis minimum frequency
# fmax: xaxis maximum frequency
# fbin: xaxis number of bins
# ifar: ifar threshold
# -----------------------------------------------------------
PLOT['efreq'] = 'plot_fname=xgb/v0/efreq_new_format.png fmin=20 fmax=1000 fbin=44 ifar=100'
EFREQ['rho_r'] = 'ifname='+wave_rhor_fname+' color=red'
EFREQ['rho_0'] = 'ifname='+wave_rho0_fname+' color=blue'
# -----------------------------------------------------------
# hrho: sim/bkg rho histogram
#
# plot_fname: output plot file name
# rho_label: rho label (default = 'rho0') used in the legend
# -----------------------------------------------------------
PLOT['hrho'] = 'plot_fname=xgb/v0/hrho_new_format.png rho_label=$rho_r$'
HRHO['sim'] = 'ifname='+sim_rhor_fname+' rhoid=1 color=green'
HRHO['bkg'] = 'ifname='+bkg_rhor_fname+' rhoid=1 color=blue'
# -----------------------------------------------------------
# lrho: likelihood vs rho
#
# plot_fname: output plot file name
# rho_label: rho label (default = 'rho0') used in the legend
# -----------------------------------------------------------
PLOT['lrho'] = 'plot_fname=xgb/v0/lrho_new_format.png rho_label=$rho_r$'
LRHO['sim'] = 'ifname='+sim_rhor_fname+' rhoid=1 color=green'
LRHO['bkg'] = 'ifname='+bkg_rhor_fname+' rhoid=1 color=blue rho0_capvalue=8'
# -----------------------------------------------------------
# hchirp: sim/bkg chirp histogram
#
# plot_fname: output plot file name
# rho_label: rho label (default = 'rho0') used in the legend
# -----------------------------------------------------------
PLOT['hchirp'] = 'plot_fname=xgb/v0/hchirp_new_format.png rho_label=$rho_r$'
HCHIRP['sim'] = 'ifname='+sim_rhor_fname+' color=green'
HCHIRP['bkg'] = 'ifname='+bkg_rhor_fname+' color=blue'