-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Refactor] Introduce EBNFScriptCreator (#133)
This PR adds a class EBNFScriptCreator to handle construction of ebnf in converters from other structures to ebnf, with auto renaming.
- Loading branch information
Showing
5 changed files
with
165 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
/*! | ||
* Copyright (c) 2023 by Contributors | ||
* \file tokenizer.cc | ||
*/ | ||
#include "ebnf_script_creator.h" | ||
|
||
#include <algorithm> | ||
#include <string> | ||
#include <unordered_set> | ||
#include <vector> | ||
|
||
#include "support/logging.h" | ||
|
||
namespace xgrammar { | ||
|
||
class EBNFScriptCreator::Impl { | ||
public: | ||
Impl() {} | ||
|
||
std::string AddRule(const std::string& rule_name_hint, const std::string& rule_body); | ||
std::string GetScript(); | ||
std::string GetRuleContent(const std::string& rule_name); | ||
|
||
private: | ||
std::string GetRuleName(const std::string& rule_name_hint); | ||
std::vector<std::pair<std::string, std::string>> rules_; | ||
std::unordered_set<std::string> rule_names_; | ||
const int NAME_SUFFIX_MAXIMUM = 10000; | ||
}; | ||
|
||
std::string EBNFScriptCreator::Impl::GetRuleName(const std::string& rule_name_hint) { | ||
if (rule_names_.find(rule_name_hint) == rule_names_.end()) { | ||
rule_names_.insert(rule_name_hint); | ||
return rule_name_hint; | ||
} | ||
for (int i = 0; i < NAME_SUFFIX_MAXIMUM; ++i) { | ||
std::string rule_name = rule_name_hint + "_" + std::to_string(i); | ||
if (rule_names_.find(rule_name) == rule_names_.end()) { | ||
rule_names_.insert(rule_name); | ||
return rule_name; | ||
} | ||
} | ||
XGRAMMAR_LOG(FATAL) << "Cannot find a unique rule name for " << rule_name_hint; | ||
} | ||
|
||
std::string EBNFScriptCreator::Impl::AddRule( | ||
const std::string& rule_name_hint, const std::string& rule_body | ||
) { | ||
std::string rule_name = GetRuleName(rule_name_hint); | ||
rules_.emplace_back(rule_name, rule_body); | ||
return rule_name; | ||
} | ||
|
||
std::string EBNFScriptCreator::Impl::GetScript() { | ||
std::string script = ""; | ||
for (const auto& rule : rules_) { | ||
script += rule.first + " ::= " + rule.second + "\n"; | ||
} | ||
return script; | ||
} | ||
|
||
std::string EBNFScriptCreator::Impl::GetRuleContent(const std::string& rule_name) { | ||
auto it = std::find_if(rules_.begin(), rules_.end(), [rule_name](const auto& rule) { | ||
return rule.first == rule_name; | ||
}); | ||
if (it != rules_.end()) { | ||
return it->second; | ||
} | ||
return ""; | ||
} | ||
|
||
EBNFScriptCreator::EBNFScriptCreator(EmptyConstructorTag) : pimpl_(std::make_shared<Impl>()) {} | ||
|
||
std::string EBNFScriptCreator::AddRule( | ||
const std::string& rule_name_hint, const std::string& rule_body | ||
) { | ||
return pimpl_->AddRule(rule_name_hint, rule_body); | ||
} | ||
|
||
std::string EBNFScriptCreator::GetScript() { return pimpl_->GetScript(); } | ||
|
||
std::string EBNFScriptCreator::GetRuleContent(const std::string& rule_name) { | ||
return pimpl_->GetRuleContent(rule_name); | ||
} | ||
|
||
} // namespace xgrammar |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/*! | ||
* Copyright (c) 2024 by Contributors | ||
* \file xgrammar/ebnf_script_creator.h | ||
* \brief The header for the creating EBNF script. | ||
*/ | ||
|
||
#ifndef XGRAMMAR_EBNF_SCRIPT_CREATOR_H_ | ||
#define XGRAMMAR_EBNF_SCRIPT_CREATOR_H_ | ||
|
||
#include <xgrammar/object.h> | ||
|
||
#include <string> | ||
|
||
namespace xgrammar { | ||
|
||
/*! | ||
* \brief A class for creating EBNF grammar scripts. | ||
* | ||
* This class helps build EBNF (Extended Backus-Naur Form) grammar scripts | ||
* by managing rules and their content. | ||
*/ | ||
class EBNFScriptCreator { | ||
public: | ||
/*! \brief Constructor using empty constructor tag pattern */ | ||
EBNFScriptCreator(EmptyConstructorTag); | ||
|
||
/*! | ||
* \brief Adds a new rule to the grammar | ||
* \param rule_name_hint Suggested name for the rule | ||
* \param rule_body The EBNF content/definition of the rule | ||
* \return The actual name assigned to the rule | ||
*/ | ||
std::string AddRule(const std::string& rule_name_hint, const std::string& rule_body); | ||
|
||
/*! | ||
* \brief Gets the complete EBNF grammar script | ||
* \return The full EBNF grammar script as a string | ||
*/ | ||
std::string GetScript(); | ||
|
||
/*! | ||
* \brief Retrieves the content/definition of a specific rule | ||
* \param rule_name The name of the rule to look up | ||
* \return The EBNF content/definition of the specified rule | ||
*/ | ||
std::string GetRuleContent(const std::string& rule_name); | ||
|
||
XGRAMMAR_DEFINE_PIMPL_METHODS(EBNFScriptCreator); | ||
}; | ||
|
||
} // namespace xgrammar | ||
|
||
#endif // XGRAMMAR_EBNF_SCRIPT_CREATOR_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters