diff --git a/model_training/benchmark/stressng.yaml b/model_training/benchmark/stressng.yaml index 7252e262..4b0f43d8 100644 --- a/model_training/benchmark/stressng.yaml +++ b/model_training/benchmark/stressng.yaml @@ -11,31 +11,73 @@ spec: template: spec: containers: - - name: stress - image: alexeiled/stress-ng + - name: stress-{{ index .stress 1 }} + image: quay.io/sustainability/stress-ng:0.16.02 imagePullPolicy: IfNotPresent env: - name: TIMEOUT - value: 120s + value: "30" + securityContext: + privileged: true command: - - /stress-ng - {{ if ne (index .stress 0) "0" -}} - - --cpu - - "{{ index .stress 0 }}" - {{ end -}} - {{ if ne (index .stress 1) "0" -}} - - --io - - "{{ index .stress 1 }}" - {{ end -}} - {{ if ne (index .stress 2) "0" -}} - - --vm - - "{{ index .stress 2 }}" - - --vm-bytes - - "{{ index .stress 3 }}" - {{ end -}} - - --timeout - - $(TIMEOUT) - - --metrics-brief + - /bin/sh + - -c + - | + {{ if eq (index .stress 1) "sleep" -}} + sleep $(TIMEOUT) + exit 0 + {{ end -}} + + # Different processes can be executed, so that to do CPU pinning properly we store the cpu index to be use in a file + # If the files does not exist, we populate the file with the CPU index + lscpu | grep "NUMA node"| tail -n +2 | while read -r line; do + cpus=$(echo $line | awk '{print $4}' | sed 's/,.*//g' | sed 's/-/ /g') + for i in $(seq $cpus); do + echo $i >> cpu-idx + done + done + lscpu | grep "NUMA node"| tail -n +2 | while read -r line; do + cpus=$(echo $line | awk '{print $4}' | sed 's/.*,//g' | sed 's/-/ /g') # the difference here is regex of the "," + for i in $(seq $cpus); do + echo $i >> cpu-idx + done + done + + # Store the CPU max freq to revert it back if changed + {{ if ne (index .stress 0) "none" -}} + mount -o remount,rw /sys/devices/system/cpu/cpufreq + cat "/sys/devices/system/cpu/cpufreq/policy0/scaling_max_freq" > max_freq + {{ end -}} + + for i in $(seq 1 {{ index .stress 2 }}); do + CPU=$(cat cpu-idx | tail -n 1) + cat cpu-idx | head -n -1 > tmp-cpu && rm -f cpu-idx && mv tmp-cpu cpu-idx + {{ if ne (index .stress 0) "none" -}} + echo {{ index .stress 0 }} | tee /sys/devices/system/cpu/cpufreq/policy${CPU}/scaling_max_freq + {{ end -}} + echo /usr/bin/stress-ng --{{ index .stress 1 }} 1 --taskset ${CPU} {{ if ne (index .stress 3) "none" -}} --{{ index .stress 3 }} "{{ index .stress 4 }}" {{ end -}} --timeout $(TIMEOUT) --aggressive --metrics-brief + /usr/bin/stress-ng --{{ index .stress 1}} 1 --taskset ${CPU} {{ if ne (index .stress 3) "none" -}} --{{ index .stress 3 }} "{{ index .stress 4 }}" {{ end -}} --timeout $(TIMEOUT) --aggressive --metrics-brief & + done + + wait + + # Revert the CPU freq back to max + {{ if ne (index .stress 0) "none" -}} + FREQ=$(cat max_freq) + echo $FREQ | tee /sys/devices/system/cpu/cpufreq/policy${CPU}/scaling_max_freq + {{ end -}} + echo "finished" + volumeMounts: + - mountPath: /sys/devices/system/cpu/cpufreq + name: system-cpu + propagation: bidirectional + readOnly: false + volumes: + - name: system-cpu + hostPath: + path: /sys/devices/system/cpu/cpufreq + # type: File + type: Directory restartPolicy: Never parserKey: stress repetition: 1 @@ -44,29 +86,212 @@ spec: iterations: - name: stress values: - - "0;0;1;500M" - - "0;0;4;500M" - - "0;0;8;500M" - - "0;0;16;500M" - - "0;0;32;500M" - - "0;0;1;1G" - - "0;0;4;1G" - - "0;0;8;1G" - - "0;0;16;1G" - - "0;0;32;1G" - - "0;0;1;2G" - - "0;0;4;2G" - - "0;0;8;2G" - - "0;0;16;2G" - - "0;0;32;2G" - - "1;0;0;0" - - "4;0;0;0" - - "8;0;0;0" - - "16;0;0;0" - - "32;0;0;0" - - "0;1;0;0" - - "0;4;0;0" - - "0;8;0;0" - - "0;16;0;0" - - "0;32;0;0" + # The baseline scenarios are used to calculate the OS/Backgroud/Idle and activation power. + # These two powers allows us to separate the dynamic power consumption from the user workloads by calculating the delta of scenarios. + # For each scenarios, each workload power = (scenarioPower - OS/Backgroud/IdlePower - activationPower) / numWorkloads + # This will be the best ground truth of the dynamic power to validate the estimated dynamic power consumption later. + # + # The max CPU frequency in baselineMachine is 3600000 and the min is 1200000. We define 3 slopes between the min and max freq as (max-min)/3 = 800000. + # Then we will test with frequencies of 2000000, 2800000 and 3600000. + # + # We first execute the baseline scenarios. + # cpuFrequency;useOrNotHT;mainWorkload;numInstances;extraParam;extraParamValue + - "none;sleep;none;none;none" # capture the OS/backgroud power consumption + - "3600000;cpu;1;none;none" # the incremental power from the previous scenarios is activation + workload power + - "3600000;cpu;2;none;none" # the incremental power from the previous scenarios is only the workload power + # + # Then we execute the all other workloads. + # cpu: is used to stress the CPU + # The baselineMachine has 32 CPUs with 2 hyperthreads + - "2000000;cpu;4;none;none" + - "2000000;cpu;8;none;none" + - "2000000;cpu;15;none;none" # max CPU cores in baselineMachine (15) + - "2000000;cpu;24;none;none" + - "2000000;cpu;32;none;none" # max HT cores in baselineMachine (32) + - "2800000;cpu;4;none;none" + - "2800000;cpu;8;none;none" + - "2800000;cpu;15;none;none" # max CPU cores in baselineMachine (15) + - "2800000;cpu;24;none;none" + - "2800000;cpu;32;none;none" # max HT cores in baselineMachine (32) + - "3600000;cpu;4;none;none" + - "3600000;cpu;8;none;none" + - "3600000;cpu;15;none;none" # max CPU cores in baselineMachine (15) + - "3600000;cpu;24;none;none" + - "3600000;cpu;32;none;none" # max HT cores in baselineMachine (32) + # + # branch: is used to stress branch by branch to 1024 randomly selected locations and hence exercise + # the CPU branch prediction logic + - "2000000;branch;4;none;none" + - "2000000;branch;8;none;none" + - "2000000;branch;15;none;none" # max CPU cores in baselineMachine (15) + - "2000000;branch;24;none;none" + - "2000000;branch;32;none;none" # max HT cores in baselineMachine (32) + - "2800000;branch;4;none;none" + - "2800000;branch;8;none;none" + - "2800000;branch;15;none;none" # max CPU cores in baselineMachine (15) + - "2800000;branch;24;none;none" + - "2800000;branch;32;none;none" # max HT cores in baselineMachine (32) + - "3600000;branch;4;none;none" + - "3600000;branch;8;none;none" + - "3600000;branch;15;none;none" # max CPU cores in baselineMachine (15) + - "3600000;branch;24;none;none" + - "3600000;branch;32;none;none" # max HT cores in baselineMachine (32) + # + # cyclic: is used to stress linux schedulers with cyclic nanosecond sleeps + - "2000000;cyclic;4;none;none" + - "2000000;cyclic;8;none;none" + - "2000000;cyclic;15;none;none" # max CPU cores in baselineMachine (15) + - "2000000;cyclic;24;none;none" + - "2000000;cyclic;32;none;none" # max HT cores in baselineMachine (32) + - "2800000;cyclic;4;none;none" + - "2800000;cyclic;8;none;none" + - "2800000;cyclic;15;none;none" # max CPU cores in baselineMachine (15) + - "2800000;cyclic;24;none;none" + - "2800000;cyclic;32;none;none" # max HT cores in baselineMachine (32) + - "3600000;cyclic;4;none;none" + - "3600000;cyclic;8;none;none" + - "3600000;cyclic;15;none;none" # max CPU cores in baselineMachine (15) + - "3600000;cyclic;24;none;none" + - "3600000;cyclic;32;none;none" # max HT cores in baselineMachine (32) + # + # regs: start N workers exercising CPU generic registers + - "2000000;regs;4;none;none" + - "2000000;regs;8;none;none" + - "2000000;regs;15;none;none" # max CPU cores in baselineMachine (15) + - "2000000;regs;24;none;none" + - "2000000;regs;32;none;none" # max HT cores in baselineMachine (32) + - "2800000;regs;4;none;none" + - "2800000;regs;8;none;none" + - "2800000;regs;15;none;none" # max CPU cores in baselineMachine (15) + - "2800000;regs;24;none;none" + - "2800000;regs;32;none;none" # max HT cores in baselineMachine (32) + - "3600000;regs;4;none;none" + - "3600000;regs;8;none;none" + - "3600000;regs;15;none;none" # max CPU cores in baselineMachine (15) + - "3600000;regs;24;none;none" + - "3600000;regs;32;none;none" # max HT cores in baselineMachine (32) + # + # l1cache: is used to stress CPU level 1 cache with reads and writes + - "2000000;l1cache;4;none;none" + - "2000000;l1cache;8;none;none" + - "2000000;l1cache;15;none;none" # max CPU cores in baselineMachine (15) + - "2000000;l1cache;24;none;none" + - "2000000;l1cache;32;none;none" # max HT cores in baselineMachine (32) + - "2800000;l1cache;4;none;none" + - "2800000;l1cache;8;none;none" + - "2800000;l1cache;15;none;none" # max CPU cores in baselineMachine (15) + - "2800000;l1cache;24;none;none" + - "2800000;l1cache;32;none;none" # max HT cores in baselineMachine (32) + - "3600000;l1cache;4;none;none" + - "3600000;l1cache;8;none;none" + - "3600000;l1cache;15;none;none" # max CPU cores in baselineMachine (15) + - "3600000;l1cache;24;none;none" + - "3600000;l1cache;32;none;none" # max HT cores in baselineMachine (32) + # + # cache: is used to stress the CPU cache with random wide spread memory read and writes to thrash the CPU cache + - "2000000;cache;4;none;none" + - "2000000;cache;8;none;none" + - "2000000;cache;15;none;none" # max CPU cores in baselineMachine (15) + - "2000000;cache;24;none;none" + - "2000000;cache;32;none;none" # max HT cores in baselineMachine (32) + - "2800000;cache;4;none;none" + - "2800000;cache;8;none;none" + - "2800000;cache;15;none;none" # max CPU cores in baselineMachine (15) + - "2800000;cache;24;none;none" + - "2800000;cache;32;none;none" # max HT cores in baselineMachine (32) + - "3600000;cache;4;none;none" + - "3600000;cache;8;none;none" + - "3600000;cache;15;none;none" # max CPU cores in baselineMachine (15) + - "3600000;cache;24;none;none" + - "3600000;cache;32;none;none" # max HT cores in baselineMachine (32) + # + # stream: "Sustainable Memory Bandwidth in High Performance Computers" benchmarking tool by John D. McCalpin + - "2000000;stream;4;none;none" + - "2000000;stream;8;none;none" + - "2000000;stream;15;none;none" # max CPU cores in baselineMachine (15) + - "2000000;stream;24;none;none" + - "2000000;stream;32;none;none" # max HT cores in baselineMachine (32) + - "2800000;stream;4;none;none" + - "2800000;stream;8;none;none" + - "2800000;stream;15;none;none" # max CPU cores in baselineMachine (15) + - "2800000;stream;24;none;none" + - "2800000;stream;32;none;none" # max HT cores in baselineMachine (32) + - "3600000;stream;4;none;none" + - "3600000;stream;8;none;none" + - "3600000;stream;15;none;none" # max CPU cores in baselineMachine (15) + - "3600000;stream;24;none;none" + - "3600000;stream;32;none;none" # max HT cores in baselineMachine (32) + # + # A common recommendation is to use around 80-90% of the available memory for stress testing. + # The baselineMachine has 20Gi free, we make two tests 80% + # --vm-rw: is used to stress the virtual memory subsystem by allocating memory pages and continuously + # writing and reading data to and from them. This simulates a scenario where memory is frequently used + # and modified. This test stress both memory allocation and data access. + - "2000000;vm-rw;4;vm-rw-bytes;16G" + - "2000000;vm-rw;8;vm-rw-bytes;16G" + - "2000000;vm-rw;15;vm-rw-bytes;16G" # max CPU cores in baselineMachine (15) + - "2000000;vm-rw;24;vm-rw-bytes;16G" + - "2000000;vm-rw;32;vm-rw-bytes;16G" # max HT cores in baselineMachine (32) + - "2800000;vm-rw;4;vm-rw-bytes;16G" + - "2800000;vm-rw;8;vm-rw-bytes;16G" + - "2800000;vm-rw;15;vm-rw-bytes;16G" # max CPU cores in baselineMachine (15) + - "2800000;vm-rw;24;vm-rw-bytes;16G" + - "2800000;vm-rw;32;vm-rw-bytes;16G" # max HT cores in baselineMachine (32) + - "3600000;vm-rw;4;vm-rw-bytes;16G" + - "3600000;vm-rw;8;vm-rw-bytes;16G" + - "3600000;vm-rw;15;vm-rw-bytes;16G" # max CPU cores in baselineMachine (15) + - "3600000;vm-rw;24;vm-rw-bytes;16G" + - "3600000;vm-rw;32;vm-rw-bytes;16G" # max HT cores in baselineMachine (32) + # + # --iomix: is used to stress a mix of sequential, random and memory mapped read/write operations as + # well as random copy file read/writes, forced sync'ing and (if run as root) cache dropping. + - "2000000;iomix;4;none;none" + - "2000000;iomix;8;none;none" + - "2000000;iomix;15;none;none" # max CPU cores in baselineMachine (15) + - "2000000;iomix;24;none;none" + - "2000000;iomix;32;none;none" # max HT cores in baselineMachine (32) + - "2800000;iomix;4;none;none" + - "2800000;iomix;8;none;none" + - "2800000;iomix;15;none;none" # max CPU cores in baselineMachine (15) + - "2800000;iomix;24;none;none" + - "2800000;iomix;32;none;none" # max HT cores in baselineMachine (32) + - "3600000;iomix;4;none;none" + - "3600000;iomix;8;none;none" + - "3600000;iomix;15;none;none" # max CPU cores in baselineMachine (15) + - "3600000;iomix;24;none;none" + - "3600000;iomix;32;none;none" # max HT cores in baselineMachine (32) + # + # pipe: is used to stress pipe write operations + - "2000000;pipe;4;none;none" + - "2000000;pipe;8;none;none" + - "2000000;pipe;15;none;none" # max CPU cores in baselineMachine (15) + - "2000000;pipe;24;none;none" + - "2000000;pipe;32;none;none" # max HT cores in baselineMachine (32) + - "2800000;pipe;4;none;none" + - "2800000;pipe;8;none;none" + - "2800000;pipe;15;none;none" # max CPU cores in baselineMachine (15) + - "2800000;pipe;24;none;none" + - "2800000;pipe;32;none;none" # max HT cores in baselineMachine (32) + - "3600000;pipe;4;none;none" + - "3600000;pipe;8;none;none" + - "3600000;pipe;15;none;none" # max CPU cores in baselineMachine (15) + - "3600000;pipe;24;none;none" + - "3600000;pipe;32;none;none" # max HT cores in baselineMachine (32) + # + # sctp: is used to stress the network performing SCTP send/receives + - "2000000;sctp;4;none;none" + - "2000000;sctp;8;none;none" + - "2000000;sctp;15;none;none" # max CPU cores in baselineMachine (15) + - "2000000;sctp;24;none;none" + - "2000000;sctp;32;none;none" # max HT cores in baselineMachine (32) + - "2800000;sctp;4;none;none" + - "2800000;sctp;8;none;none" + - "2800000;sctp;15;none;none" # max CPU cores in baselineMachine (15) + - "2800000;sctp;24;none;none" + - "2800000;sctp;32;none;none" # max HT cores in baselineMachine (32) + - "3600000;sctp;4;none;none" + - "3600000;sctp;8;none;none" + - "3600000;sctp;15;none;none" # max CPU cores in baselineMachine (15) + - "3600000;sctp;24;none;none" + - "3600000;sctp;32;none;none" # max HT cores in baselineMachine (32) sequential: true \ No newline at end of file diff --git a/src/server/model_server.py b/src/server/model_server.py index 7fef288e..121ad331 100644 --- a/src/server/model_server.py +++ b/src/server/model_server.py @@ -13,8 +13,8 @@ sys.path.append(util_path) from util.train_types import get_valid_feature_groups, ModelOutputType, FeatureGroups, FeatureGroup -from util.config import getConfig, model_toppath, ERROR_KEY, MODEL_SERVER_MODEL_REQ_PATH, MODEL_SERVER_MODEL_LIST_PATH, initial_pipeline_url, download_path -from util.loader import parse_filters, is_valid_model, load_json, load_weight, get_model_group_path, get_archived_file, METADATA_FILENAME, CHECKPOINT_FOLDERNAME, get_pipeline_path +from util.config import getConfig, model_toppath, ERROR_KEY, MODEL_SERVER_MODEL_REQ_PATH, MODEL_SERVER_MODEL_LIST_PATH, initial_pipeline_url +from util.loader import parse_filters, is_valid_model, load_json, load_weight, get_model_group_path, get_archived_file, METADATA_FILENAME, CHECKPOINT_FOLDERNAME, get_pipeline_path, any_node_type, is_matched_type ############################################### # model request @@ -42,16 +42,17 @@ def __init__(self, metrics, output_type, source='rapl', node_type=-1, weight=Fal MODEL_SERVER_PORT = getConfig('MODEL_SERVER_PORT', MODEL_SERVER_PORT) MODEL_SERVER_PORT = int(MODEL_SERVER_PORT) -def select_best_model(valid_groupath, filters, trainer_name="", node_type=-1, weight=False): +def select_best_model(valid_groupath, filters, trainer_name="", node_type=any_node_type, weight=False): model_names = [f for f in os.listdir(valid_groupath) if \ f != CHECKPOINT_FOLDERNAME \ and not os.path.isfile(os.path.join(valid_groupath,f)) \ - and (trainer_name == "" or trainer_name in f) \ - and (node_type == -1 or str(node_type) in f) ] + and (trainer_name == "" or trainer_name in f)] # Load metadata of trainers best_cadidate = None best_response = None for model_name in model_names: + if not is_matched_type(model_name, node_type): + continue model_savepath = os.path.join(valid_groupath, model_name) metadata = load_json(model_savepath, METADATA_FILENAME) if metadata is None or not is_valid_model(metadata, filters) or ERROR_KEY not in metadata: diff --git a/src/util/loader.py b/src/util/loader.py index 3c2091e9..e27d9a34 100644 --- a/src/util/loader.py +++ b/src/util/loader.py @@ -21,6 +21,7 @@ default_init_pipeline_name = "Linux-4.15.0-213-generic-x86_64_v0.6" default_trainer_name = "GradientBoostingRegressorTrainer" default_node_type = "1" +any_node_type = -1 default_feature_group = FeatureGroup.KubeletOnly def load_json(path, name): @@ -122,6 +123,11 @@ def is_valid_model(metadata, filters): def get_model_name(trainer_name, node_type): return "{}_{}".format(trainer_name, node_type) +def is_matched_type(model_name, node_type): + if node_type == any_node_type: + return True + return model_name.split("_")[-1] == str(node_type) + def get_pipeline_path(model_toppath, pipeline_name=DEFAULT_PIPELINE): return os.path.join(model_toppath, pipeline_name)