Skip to content

Commit

Permalink
Add cost analysis functionality (#15)
Browse files Browse the repository at this point in the history
* Add requirement for token counting

* Fix formatting

* Add prototype of cost calculation using tiktoken

* Retrieves the messages, calculates total tokens and calculates the cost based on some current pricing from OpenAI

* Ask to proceed based on the cost price of action
  • Loading branch information
verovaleros authored Mar 22, 2024
1 parent ca5613c commit 72cf26c
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 3 deletions.
62 changes: 59 additions & 3 deletions hermeneisGPT.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import yaml
from dotenv import dotenv_values
from openai import OpenAI
import tiktoken
from lib.utils import get_current_commit
from lib.utils import get_file_sha256
from lib.utils import get_file_content
Expand Down Expand Up @@ -76,15 +77,64 @@ def load_and_parse_config(yaml_config_path):

return config

def calculate_cost_analysis(config, args):
"""
Calculate cost for messages
"""
logger.debug("Starting cost estimation")
limit = int(args.max_limit)
count = 1
total_tokens = 0
# cost in $ per 1k tokens as per 22.3.2024
# TODO: parametrize the cost functionality so the prices can
# be retrieved from OpenAI or through a configuration file
input_price = 0.0005
output_price = 0.0015
try:
logger.debug("Initializing the tokenizer")
encoding = tiktoken.encoding_for_model(config['model'])

logger.debug("Connecting to DB: %s", args.sqlite_db)
connection, cursor = get_db_connection(args.sqlite_db)

logger.debug("Retrieving messages for channel: %s", args.channel_name)
channel_messages = get_channel_messages(cursor, args.channel_name)

for message_id, message_text in channel_messages:
count = count + 1
logger.debug("Processing channel %s message %s (%s bytes)", args.channel_name, message_id, len(message_text))
if len(message_text) > 1:
logger.debug("Creating query to OpenAI")
translate_messages = [{"role":"system", "content": config['system']},
{"role":"user", "content": config['user']+message_text}]
tokens = len(encoding.encode(str(translate_messages)))
logger.debug("Tokens for message %s (+prompt): %s", message_id, tokens)
total_tokens = total_tokens + tokens
if count >= limit:
# Translation quota reached
logger.debug("Translation limit reached, stopping translation")
break
logger.debug("Total tokens for %s messages (+prompts): %s", count, total_tokens)

# The estimated total cost is calculated as the sum of the cost of the input messages
# and the cost of the output messages. These prices are per 1000 tokens.
estimated_total_cost = ((total_tokens*input_price)/1000)+((total_tokens*output_price)/1000)
logger.info("Estimated cost of translating %s messages: $ %.2f", count, estimated_total_cost)
connection.commit()
connection.close()
except KeyboardInterrupt:
connection.commit()
connection.close()
return

def translate_mode_automatic(client, config, args):
"""
Run the LLM translation in automatic mode using a
SQLite database. Translations will be written on
the same DB.
"""
limit=int(args.max_limit)
count=1
limit = int(args.max_limit)
count = 1
translation_tool_name = os.path.basename(__file__)
translation_tool_commit = get_current_commit()
translation_model = config['model']
Expand Down Expand Up @@ -303,7 +353,13 @@ def main():
return

# Run automatic mode with sqlite db
translate_mode_automatic(client, config, args)
calculate_cost_analysis(config, args)
print("Proceeding with the following actions will incur costs. Do you wish to continue? (Y/N)")
user_input = input()

if user_input == "Y" or user_input == "y":
# Run automatic mode with sqlite db
translate_mode_automatic(client, config, args)

except Exception as err:
logger.info("Exception in main()")
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
PyYAML
openai>1.10
python-dotenv
tiktoken

0 comments on commit 72cf26c

Please sign in to comment.