forked from EGjoni/DRUGS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdrugs_conf.py
80 lines (59 loc) · 2.54 KB
/
drugs_conf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
system_prompt="""
""" #"""You are a helpful assistant."""
#paste huge thing of text here:
content = """Write an epic rap battle between William Rowe Hamilton and Lord Kelvin"""
#Write what you want it to do with the huge thing of text here.
instruction = """Write an epic rap battle between William Rowe Hamilton and Lord Kelvin
"""
questions_list = []
QA = [
]
q_prepend = ["A) ", "B) ", "C) ", "D) "]
def generateQs(iteration):
q_string ="""
Please answer the following questions about the text above.
"""
for q, answers in QA:
q_string += f"""
{q}
"""
for i in range(len(answers)):
selected = (i+iteration)%len(answers)
q_string += f"""
{q_prepend[i]}{answers[selected]}
"""
return q_string
import numpy as np
import torch
import torch.nn.functional as F
def print_top_k_logits_histogram(logits, tokenizer, top_k=10, max_width=100):
"""
Print a histogram of the top_k predicted next tokens' logits in the console,
after applying a softmax to convert logits to probabilities. The token text
and its probability are right-aligned.
:param logits: A numpy array of logits from a language model prediction.
:param tokenizer: The tokenizer used with the model to map indices to tokens.
:param top_k: Number of top logits to display in the histogram.
:param max_width: The maximum width of the histogram in characters.
"""
# Extract the last logits (for the last token)
last_logits = logits[0, -1, :]
# Apply softmax to convert logits to probabilities
probabilities = F.softmax(torch.tensor(last_logits), dim=-1).numpy()
# Get the indices of the top_k probabilities
top_k_indices = np.argsort(probabilities)[-top_k:]
# Extract the top_k probabilities
top_k_probs = probabilities[top_k_indices]
# Decode tokens and find the maximum length for formatting
tokens = [tokenizer.decode([idx]) for idx in top_k_indices]
max_token_length = max(len(token) for token in tokens)
# Normalize the probabilities to the max_width
max_prob = max(top_k_probs)
scaled_probs = (top_k_probs / max_prob) * max_width
# Print the histogram with right-aligned token text and probability
print("--- \n")
for token, prob, raw_prob in zip(tokens, scaled_probs, top_k_probs):
bar = '#' * int(prob)
prob_text = f"{raw_prob * 100:.2f}%".rjust(25) # Probability formatted to two decimal places
print(f"{bar.ljust(max_width)} {token.rjust(max_token_length*2)}{prob_text}")
print("---::::"+tokens[len(tokens)-1]+"::: \n")