Add get_tokenizer() and add_bos() and bump version

thomasantony · Mar 29, 2023 · 67495bc · 67495bc
1 parent fd778f0
commit 67495bc
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 4 deletions.
diff --git a/examples/simple.py b/examples/simple.py
@@ -13,10 +13,10 @@ def progress_callback(progress):
 
 prompt = "A llama is a"
 prompt_tokens = model.tokenize(prompt, True)
+model.add_bos()
 model.update_input(prompt_tokens)
 
 model.ingest_all_pending_input()
-
 print(model.system_info())
 for i in range(20):
     model.eval()

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"
 
 [project]
 name = "llamacpp"
-version = "0.1.9"
+version = "0.1.10"
 description = "Python bindings for @ggerganov's llama.cpp"
 authors = [
     {name = "Thomas Antony", email= "mail@thomasantony.com"}

diff --git a/src/llama2.cpp b/src/llama2.cpp
@@ -233,7 +233,11 @@ class LlamaInference {
     {
         return llama.sample();
     }
-
+    // Add BOS token to the input
+    void add_bos()
+    {
+        llama.add_bos();
+    }
     // update input using tokens
     void update_input(const std::vector<llama_token>& tokens)
     {
@@ -341,6 +345,7 @@ PYBIND11_MODULE(llamacpp, m) {
         .def("update_input", py::overload_cast<const std::string&>(&LlamaInference::update_input), "Update the input with the provided text")
         .def("eval", &LlamaInference::eval, "Run the llama inference to obtain the logits and probabilities for the next token",
                 py::call_guard<py::gil_scoped_release>())
+        .def("add_bos", &LlamaInference::add_bos)
         .def("tokenize", &LlamaInference::tokenize, "Convert the provided text into tokens",
                 py::arg("text"), py::arg("add_bos"))
         .def("has_unconsumed_input", &LlamaInference::has_unconsumed_input, "Check if there is unconsumed input")
@@ -354,7 +359,8 @@ PYBIND11_MODULE(llamacpp, m) {
         .def("print_timings", &LlamaInference::print_timings, "Print the timings for the last call to eval()")
         .def("reset_timings", &LlamaInference::reset_timings, "Reset the timings for the last call to eval()")
         .def_static("system_info", &llama_print_system_info, "Print system information")
-        .def("sample", &LlamaInference::sample, "Sample a token from the logits");
+        .def("sample", &LlamaInference::sample, "Sample a token from the logits")
+        .def("get_tokenizer", &LlamaInference::get_tokenizer, "Get the tokenizer");
 
 
     // /* Wrapper for Tokenizer */