diff --git a/LLMFactCheck_ABOUT.md b/LLMFactCheck_ABOUT.md index dd25ed9..9f4ddc9 100644 --- a/LLMFactCheck_ABOUT.md +++ b/LLMFactCheck_ABOUT.md @@ -62,6 +62,14 @@ The accuracy results are visually represented through pie charts for both standa
Llama Model Accuracy on All Labeled Dataset
Llama model accuracy on all labeled dataset +

Llama-2-7B-GGUF - test size = 0.3

+
+ +
+
Llama Model Accuracy on Test Dataset
+ Llama model accuracy on test dataset +
+
@@ -107,15 +115,18 @@ The evaluation of model accuracies has provided the following insights: | Model | Dataset Type | Accuracy | |-----------------------|-----------------------|---------:| -| Llama | Test | 44% | +| Llama | test size = 0.3 | 44% | | Llama | All Labeled | 49% | -| GPT-3.5 Turbo | Test (Standard) | 64% | +| Llama-2-7B-GGUF | test size = 0.3 | 71% | +| GPT-3.5 Turbo | test size = 0.3 | 64% | | GPT-3.5 Turbo | All Labeled (Standard)| 58% | -| GPT-3.5 Turbo (ICL) | Test | 73% | -| GPT-4.0 | Test (Standard) | 48% | +| GPT-3.5 Turbo (ICL) | test size = 0.3 | 73% | +| GPT-4.0 | test size = 0.3 | 48% | | GPT-4.0 | All Labeled (Standard)| 59% | -| GPT-4.0 (ICL) | Test | 73% | +| GPT-4.0 (ICL) | test size = 0.3 | 73% | + +>Llama-2-7B-GGUF - test size = 0.3 - **Llama Model Accuracy**: - On the test dataset: 44% - On the all labeled dataset: 49% diff --git a/img/llama_7B_0.3_icl.png b/img/llama_7B_0.3_icl.png new file mode 100644 index 0000000..ad26bdd Binary files /dev/null and b/img/llama_7B_0.3_icl.png differ diff --git a/src/load_model.py b/src/load_model.py index a7f3f16..d28340b 100644 --- a/src/load_model.py +++ b/src/load_model.py @@ -27,8 +27,8 @@ def load_model(model_type, use_icl): """ if model_type == 'llama': # Load a Llama model - model_name = "TheBloke/Llama-2-13B-chat-GGML" - model_path = hf_hub_download(repo_id=model_name, filename="llama-2-13b-chat.ggmlv3.q5_1.bin") + model_name = "TheBloke/Llama-2-7B-GGUF" + model_path = hf_hub_download(repo_id=model_name, filename="llama-2-7b.Q4_K_M.gguf") model = Llama(model_path=model_path, n_threads=2, n_batch=512, n_gpu_layers=32) if use_icl: return prepare_icl(model, model_type) @@ -60,11 +60,20 @@ def prepare_icl(model, model_type): """ df = pd.read_csv(file_path) - train_df, test_df = train_test_split(df, test_size=0.3, random_state=42) - test_df.to_csv(os.path.join('data', f'test_df_{model_type}_icl.csv'), index=False) + + if not os.path.exists(os.path.join('data', f'test_df_{model_type}_icl.csv')) or \ + not os.path.exists(os.path.join('data', f'train_df_{model_type}_icl.csv')): + + train_df, test_df = train_test_split(df, test_size=0.7, random_state=42) + + test_df.to_csv(os.path.join('data', f'test_df_{model_type}_icl.csv'), index=False) + train_df.to_csv(os.path.join('data', f'train_df_{model_type}_icl.csv'), index=False) + else: + train_df = pd.read_csv(os.path.join('data', f'train_df_{model_type}_icl.csv')) + context_entries = train_df.sample(n=10) context = context_entries.apply( lambda row: f"{row['Question']} Answer: {'Yes' if row['Label'] else 'No'}\n", axis=1).str.cat() - return model, context + return model, context \ No newline at end of file