KoslickiLab · mfl15 · Jan 24, 2024 · Jan 24, 2024 · Jan 24, 2024
diff --git a/LLMFactCheck_ABOUT.md b/LLMFactCheck_ABOUT.md
@@ -62,6 +62,14 @@ The accuracy results are visually represented through pie charts for both standa
     <figcaption>Llama Model Accuracy on All Labeled Dataset</figcaption>
     <img src="./img/llama_accuracy_on_all_labeled_df.png" alt="Llama model accuracy on all labeled dataset" style="width: 100%; max-width: 400px;">
   </figure>
+  <h3 style="text-align: center">Llama-2-7B-GGUF - test size = 0.3</h3>
+<div style="display: flex; justify-content: center; flex-wrap: wrap; gap: 20px;">
+  <!-- Llama Model Accuracy -->
+  <figure style="text-align: center; border: 1px solid lightgrey; padding: 10px;">
+    <figcaption>Llama Model Accuracy on Test Dataset</figcaption>
+    <img src="./img/llama_7B_0.3_icl.png" alt="Llama model accuracy on test dataset" style="width: 100%; max-width: 400px;">
+  </figure>
+</div>
 
   <!-- GPT-3.5 Turbo Model Accuracy -->
   <figure style="text-align: center; border: 1px solid lightgrey; padding: 10px;">
@@ -107,15 +115,18 @@ The evaluation of model accuracies has provided the following insights:
 
 | Model                 | Dataset Type          | Accuracy |
 |-----------------------|-----------------------|---------:|
-| Llama                 | Test                  |    44%   |
+| Llama                 | test size = 0.3       |    44%   |
 | Llama                 | All Labeled           |    49%   |
-| GPT-3.5 Turbo         | Test (Standard)       |    64%   |
+| Llama-2-7B-GGUF       | test size = 0.3       |    71%   |
+| GPT-3.5 Turbo         | test size = 0.3       |    64%   |
 | GPT-3.5 Turbo         | All Labeled (Standard)|    58%   |
-| GPT-3.5 Turbo (ICL)   | Test                  |    73%   |
-| GPT-4.0               | Test (Standard)       |    48%   |
+| GPT-3.5 Turbo (ICL)   | test size = 0.3       |    73%   |
+| GPT-4.0               | test size = 0.3       |    48%   |
 | GPT-4.0               | All Labeled (Standard)|    59%   |
-| GPT-4.0 (ICL)         | Test                  |    73%   |
+| GPT-4.0 (ICL)         | test size = 0.3       |    73%   |
+
 
+>Llama-2-7B-GGUF - test size = 0.3
 - **Llama Model Accuracy**:
   - On the test dataset: 44%
   - On the all labeled dataset: 49%

diff --git a/img/llama_7B_0.3_icl.png b/img/llama_7B_0.3_icl.png
diff --git a/src/load_model.py b/src/load_model.py
@@ -27,8 +27,8 @@
     """
     if model_type == 'llama':
         # Load a Llama model
-        model_name = "TheBloke/Llama-2-13B-chat-GGML"
-        model_path = hf_hub_download(repo_id=model_name, filename="llama-2-13b-chat.ggmlv3.q5_1.bin")
+        model_name = "TheBloke/Llama-2-7B-GGUF"
+        model_path = hf_hub_download(repo_id=model_name, filename="llama-2-7b.Q4_K_M.gguf")
         model = Llama(model_path=model_path, n_threads=2, n_batch=512, n_gpu_layers=32)
         if use_icl:
             return prepare_icl(model, model_type)
@@ -60,11 +60,20 @@
 
     """
     df = pd.read_csv(file_path)
-    train_df, test_df = train_test_split(df, test_size=0.3, random_state=42)
-    test_df.to_csv(os.path.join('data', f'test_df_{model_type}_icl.csv'), index=False)
+
+    if not os.path.exists(os.path.join('data', f'test_df_{model_type}_icl.csv')) or \
+            not os.path.exists(os.path.join('data', f'train_df_{model_type}_icl.csv')):
+
+        train_df, test_df = train_test_split(df, test_size=0.7, random_state=42)
+
+        test_df.to_csv(os.path.join('data', f'test_df_{model_type}_icl.csv'), index=False)
+        train_df.to_csv(os.path.join('data', f'train_df_{model_type}_icl.csv'), index=False)
+    else:
+        train_df = pd.read_csv(os.path.join('data', f'train_df_{model_type}_icl.csv'))
+
     context_entries = train_df.sample(n=10)
 
     context = context_entries.apply(
         lambda row: f"{row['Question']} Answer: {'Yes' if row['Label'] else 'No'}\n",
         axis=1).str.cat()
-    return model, context
+    return model, context