-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathML_train_luigi.cfg
72 lines (63 loc) · 2.09 KB
/
ML_train_luigi.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
[core]
no_lock = True
log_level = DEBUG
[worker]
keep_alive: True
ping_interval: 20
wait_interval: 20
max_reschedules: 1
[scheduler]
retry_count: 0
[DEFAULT]
name = ML_train
ENV_NAME = Base
; grid storage protocol and path usable from submitting machine and worker nodes of cluster
; job in- and output will be stored in $wlcg_path under subdirectory of analysis $name
wlcg_path = root://cmsdcache-kit-disk.gridka.de//store/user/${USER}/LAW_storage
; default htcondor job submission configuration (modifiable for each task)
htcondor_accounting_group = cms.higgs
htcondor_remote_job = True
htcondor_request_cpus = 1
; for all cores in total
htcondor_universe = docker
;image without GPU libraries
# htcondor_docker_image = mschnepf/slc7-condocker:latest
; create log files in htcondor jobs
transfer_logs = True
; set local scheduler
local_scheduler = True
; set tolerance for workflow success with failed branches
tolerance = 0.00
acceptance = 1.00
; submit only missing htcondor workflow branches (should always be true)
only_missing = True
; bootstrap file to be sourced at beginning of htcondor jobs (relative PATH to framework.py)
bootstrap_file = setup_law_remote.sh
[CreateTrainingDataShard]
htcondor_request_cpus = 1
htcondor_walltime = 36000
htcondor_request_memory = 4000
htcondor_request_disk = 2000000
htcondor_requirements = (TARGET.ProvidesEKPResources==True) && (TARGET.ProvidesIO==True)
additional_files = ["ml_configs", "sm-htt-analysis"]
[RunTraining]
ENV_NAME = ML_LAW
htcondor_request_cpus = 2
htcondor_request_gpus = 1
htcondor_walltime = 3600
htcondor_request_memory = 4000
htcondor_requirements = ( TARGET.CloudSite =?= "topas" )
#&& (Machine =?= "f03-001-179-e.gridka.de")
htcondor_request_disk = 1000000
additional_files = ["ml_configs", "sm-htt-analysis"]
[RunTesting]
ENV_NAME = ML_LAW
htcondor_request_cpus = 1
htcondor_request_gpus = 1
htcondor_walltime = 3600
htcondor_request_memory = 10000
htcondor_request_disk = 10000000
htcondor_requirements = ( TARGET.CloudSite =?= "topas" )
#&& (Machine =?= "f03-001-179-e.gridka.de")
additional_files = ["ml_configs", "sm-htt-analysis"]
[RunAllAnalysisTrainings]