diff --git a/README.md b/README.md index 49132ba..639ccc6 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,12 @@ data folder contains multiple folders and files: - use current path as `pretrained_model` variable in script files 2. Clone the repository - git clone https://github.com/EhsanMashhadi/MSR2021-ProgramRepair.git -3. Train the model with MSR data +3. Install dependencies + - pip install torch==1.4.0 + - pip install transformers==2.5.0 +4. Train the model with MSR data - bash ./scripts/codebert/train.sh -4. Evaluate the model +5. Evaluate the model - bash ./scripts/codebert/test.sh ### Running Simple LSTM Experiments diff --git a/scripts/codebert/test.sh b/scripts/codebert/test.sh index bc0eb6a..1ef0d55 100644 --- a/scripts/codebert/test.sh +++ b/scripts/codebert/test.sh @@ -13,8 +13,9 @@ test_file=$data_dir/src-test.txt,$data_dir/tgt-test.txt test_model=$output_dir/checkpoint-best-ppl/pytorch_model.bin pretrained_model=./code-bert #CodeBert model path downloaded from Huggingface pretrained_model=../../codebert-model/codebert-base +CodeBERT=../../codebert -python ../codebert/run.py \ +python $CodeBERT/run.py \ --do_test \ --model_type roberta \ --model_name_or_path $pretrained_model \ diff --git a/scripts/codebert/train.sh b/scripts/codebert/train.sh index 9a2621e..55b30d8 100755 --- a/scripts/codebert/train.sh +++ b/scripts/codebert/train.sh @@ -15,8 +15,10 @@ train_file=$data_dir/src-train.txt,$data_dir/tgt-train.txt validate_file=$data_dir/src-val.txt,$data_dir/tgt-val.txt pretrained_model=./code-bert #CodeBert model path downloaded from Huggingface pretrained_model=../../codebert-model/codebert-base +CodeBERT=../../codebert -python ../codebert/run.py \ + +python $CodeBERT/run.py \ --do_train \ --do_eval \ --model_type roberta \