diff --git a/pkg_src/retrain_pipelines/dataset/hf_utils/hf_utils.py b/pkg_src/retrain_pipelines/dataset/hf_utils/hf_utils.py
index 2b54018..d6cc3cd 100644
--- a/pkg_src/retrain_pipelines/dataset/hf_utils/hf_utils.py
+++ b/pkg_src/retrain_pipelines/dataset/hf_utils/hf_utils.py
@@ -43,7 +43,7 @@ def get_latest_commit(
Results:
- (dict):
- 'commit_hash', 'commit_date',
+ 'commit_hash', 'commit_datetime',
'branch_name', 'files'
"""
@@ -65,15 +65,15 @@ def get_latest_commit(
if regex_pattern.search(f)
]
if matching_files:
- commit_date = commit_data["created_at"]
+ commit_datetime = commit_data["created_at"]
if (
not latest_matching_commit
- or commit_date >
- latest_matching_commit["commit_date"]
+ or commit_datetime >
+ latest_matching_commit["commit_datetime"]
):
latest_matching_commit = {
"commit_hash": commit_hash,
- "commit_date": commit_date,
+ "commit_datetime": commit_datetime,
"branch_name": \
branch_data["branch_name"],
"files": matching_files,
@@ -100,11 +100,12 @@ def get_commit(
Particular "revision" of the dataset
to scan.
- files_filter (str):
- Only consider files matching this regex pattern.
+ Only consider files matching
+ this regex pattern.
Results:
- (dict):
- 'commit_hash', 'commit_date',
+ 'commit_hash', 'commit_datetime',
'branch_name', 'files'
"""
@@ -137,7 +138,7 @@ def get_commit(
if len(matching_files) > 0:
matching_commit = {
"commit_hash": commit_hash,
- "commit_date": \
+ "commit_datetime": \
branch_commit_data["created_at"],
"branch_name": \
branch_data["branch_name"],
@@ -200,9 +201,8 @@ def get_lazy_df(
- commit_hash (str):
gets handy when no input value
is given as input.
- - commit_date (str):
- 24hrs, UTC format.
- - lazydf (pl.lazyframe.frame.LazyFrame):
+ - commit_datetime (datetime)
+ - lazydf (pl.lazyframe.frame.LazyFrame)
"""
parquet_commit = get_commit(
@@ -234,7 +234,8 @@ def get_lazy_df(
return {
"repo_id": repo_id,
"commit_hash": parquet_commit['commit_hash'],
- "commit_utc_date_str": parquet_commit['commit_date'],
+ "commit_datetime": \
+ parquet_commit['commit_datetime'],
"lazy_df": lazy_df
}
diff --git a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme.py b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme.py
index 855fdde..53a4cfa 100644
--- a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme.py
+++ b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme.py
@@ -3,6 +3,7 @@
import json
from ast import literal_eval
+from datetime import datetime
from jinja2 import Environment, FileSystemLoader
@@ -24,7 +25,7 @@ def _dataset_readme_params(
augmentation_rate: float,
enrichment_rate: float,
version_label: str,
- utc_timestamp_str: str,
+ commit_datetime: datetime,
mf_flow_name: str,
mf_run_id: str,
engine:str = "cpu"
@@ -39,12 +40,12 @@ def _dataset_readme_params(
- hf_dataset_dict (dict):
- repo_id
- commit_hash
- - commit_utc_date_str
+ - commit_datetime
- lazy_df
- hf_enrich_dataset_dict (dict)
- repo_id
- commit_hash
- - commit_utc_date_str
+ - commit_datetime
- dataset_dict (DatasetDict):
the dataset version to be pushed
to the HF hub.
@@ -58,8 +59,8 @@ def _dataset_readme_params(
- version_label (str):
typical `retrain-pipelines`
version label are of format "major.minor"
- - utc_timestamp_str (str):
- timestampt for the new dataset version.
+ - commit_datetime (datetime):
+ timestamp for the new dataset version.
- mf_flow_name (str)
- mf_run_id (str)
- engine (str):
@@ -76,13 +77,13 @@ def _dataset_readme_params(
dataset_dict["supervised_finetuning"]["validation"].num_rows
size_category = get_size_category(records_count)
- main_commit_hash, main_commit_utc_date_str = \
+ main_commit_hash, main_commit_datetime = \
get_latest_README_commit(
repo_id=hf_dataset_dict["repo_id"],
target_commit_hash=hf_dataset_dict["commit_hash"],
repo_type="dataset"
)
- enrich_commit_hash, enrich_commit_utc_date_str = \
+ enrich_commit_hash, enrich_commit_datetime = \
get_latest_README_commit(
repo_id=hf_enrich_dataset_dict["repo_id"],
target_commit_hash=\
@@ -150,7 +151,7 @@ def _build_keys(d, parent='', output_str=''):
return {
"configs": dataset_dict_to_config_str(dataset_dict),
"new_version_label": version_label,
- "utc_timestamp": utc_timestamp_str,
+ "commit_datetime": commit_datetime,
"pretty_name": pretty_name,
@@ -162,10 +163,10 @@ def _build_keys(d, parent='', output_str=''):
"main_commit_hash": main_commit_hash,
"enrich_commit_hash": enrich_commit_hash,
- "main_commit_utc_date_str": \
- main_commit_utc_date_str,
- "enrich_commit_utc_date_str": \
- enrich_commit_utc_date_str,
+ "main_commit_datetime": \
+ main_commit_datetime,
+ "enrich_commit_datetime": \
+ enrich_commit_datetime,
"main_pretty_name": main_pretty_name,
"enrich_pretty_name": enrich_pretty_name,
@@ -198,7 +199,7 @@ def get_dataset_readme_content(
augmentation_rate: float,
enrichment_rate: float,
version_label: str,
- utc_timestamp_str: str,
+ commit_datetime: datetime,
mf_flow_name: str,
mf_run_id: str,
engine:str = "cpu"
@@ -218,12 +219,12 @@ def get_dataset_readme_content(
- hf_dataset_dict (dict):
- repo_id
- commit_hash
- - commit_utc_date_str
+ - commit_datetime
- lazy_df
- hf_enrich_dataset_dict (dict)
- repo_id
- commit_hash
- - commit_utc_date_str
+ - commit_datetime
- dataset_dict (DatasetDict):
the dataset version to be pushed
to the HF hub.
@@ -237,8 +238,8 @@ def get_dataset_readme_content(
- version_label (str):
typical `retrain-pipelines`
version label are of format "major.minor"
- - utc_timestamp_str (str):
- timestampt for the new dataset version.
+ - commit_datetime (datetime):
+ timestamp for the new dataset version.
- mf_flow_name (str)
- mf_run_id (str)
- engine (str):
@@ -255,7 +256,7 @@ def get_dataset_readme_content(
augmentation_rate=augmentation_rate,
enrichment_rate=enrichment_rate,
version_label=version_label,
- utc_timestamp_str=utc_timestamp_str,
+ commit_datetime=commit_datetime,
mf_flow_name=mf_flow_name,
mf_run_id=mf_run_id,
engine=engine
diff --git a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme_template.md b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme_template.md
index a1b9500..6186857 100644
--- a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme_template.md
+++ b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme_template.md
@@ -10,7 +10,7 @@
version: '{{ new_version_label }}'
-timestamp: {{ utc_timestamp }}
+timestamp: '{{ commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }}'
pretty_name: {{ pretty_name }}
@@ -18,7 +18,7 @@ source_datasets:
- {{ main_repo_id }}
- {{ enrich_repo_id }}
-license: {{license_label}}
+license: {{ license_label }}
language:
- en
@@ -41,14 +41,15 @@ size_categories:
# {{ pretty_name }}
-`version {{ new_version_label }}` - `{{ utc_timestamp }}`
+`version {{ new_version_label }}` - `{{ commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }}`
Source datasets :
- main :
- {{ main_pretty_name }}
`{{ main_repo_id }}`
({{ main_commit_hash[:7] }} - {{ main_commit_utc_date_str }})
+ target="_blank">{{ main_commit_hash[:7] }} -
+ {{ main_commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }})
license :
{% if main_license_label -%}
@@ -68,7 +69,8 @@ Source datasets :
- {{ enrich_pretty_name }}
`{{ enrich_repo_id }}`
({{ enrich_commit_hash[:7] }} - {{ enrich_commit_utc_date_str }})
+ target="_blank">{{ enrich_commit_hash[:7] }} -
+ {{ enrich_commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }})
license :
{% if enrich_license_label -%}
diff --git a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme.py b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme.py
index db8bd6e..922120e 100644
--- a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme.py
+++ b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme.py
@@ -3,6 +3,7 @@
import json
from ast import literal_eval
+from datetime import datetime
from jinja2 import Environment, FileSystemLoader
@@ -20,7 +21,7 @@ def _model_readme_params(
base_model_dict: dict,
training_dataset_dict: dict,
version_label: str,
- utc_timestamp_str: str,
+ commit_datetime: datetime,
mf_flow_name: str,
mf_run_id: str,
) -> dict:
@@ -34,14 +35,14 @@ def _model_readme_params(
- base_model_dict (dict)
- training_dataset_dict (dict):
- repo_id
+ - version_label
- commit_hash
- - commit_utc_date_str
- -
+ - commit_datetime
- version_label (str):
typical `retrain-pipelines`
version label are of format "major.minor"
- - utc_timestamp_str (str):
- timestampt for the new dataset version.
+ - commit_datetime (datetime):
+ timestamp for the new model version.
- mf_flow_name (str)
- mf_run_id (str)
@@ -51,7 +52,7 @@ def _model_readme_params(
pretty_name = "retrain-pipelines Function Caller"
- base_model_commit_hash, base_model_commit_utc_date_str = \
+ base_model_commit_hash, base_model_commit_datetime = \
get_latest_README_commit(
repo_id=base_model_dict["repo_id"],
target_commit_hash=base_model_dict["commit_hash"],
@@ -77,7 +78,7 @@ def _model_readme_params(
return {
"new_version_label": version_label,
- "utc_timestamp": utc_timestamp_str,
+ "commit_datetime": commit_datetime,
"pretty_name": pretty_name,
@@ -87,13 +88,13 @@ def _model_readme_params(
training_dataset_dict["version_label"],
"dataset_commit_hash": \
training_dataset_dict["commit_hash"],
- "dataset_utc_timestamp_str": \
- training_dataset_dict["utc_timestamp_str"],
+ "dataset_commit_datetime": \
+ training_dataset_dict["commit_datetime"],
"base_model_repo_id": base_model_dict["repo_id"],
"base_model_pretty_name": base_model_pretty_name,
"base_model_commit_hash": base_model_commit_hash,
- "base_model_commit_utc_date_str": base_model_commit_utc_date_str,
+ "base_model_commit_datetime": base_model_commit_datetime,
"base_model_arxiv_codes": base_model_arxiv_codes,
"base_model_license_label": base_model_license_label,
@@ -111,7 +112,7 @@ def get_model_readme_content(
training_dataset_dict: dict,
version_label: str,
- utc_timestamp_str: str,
+ commit_datetime: datetime,
mf_flow_name: str,
mf_run_id: str,
@@ -134,12 +135,12 @@ def get_model_readme_content(
- training_dataset_dict (dict)
- repo_id
- commit_hash
- - commit_utc_date_str
+ - commit_datetime
- version_label (str):
typical `retrain-pipelines`
version label are of format "major.minor"
- - utc_timestamp_str (str):
- timestampt for the new dataset version.
+ - commit_datetime (datetime):
+ timestamp for the new dataset version.
- mf_flow_name (str)
- mf_run_id (str)
@@ -151,7 +152,7 @@ def get_model_readme_content(
base_model_dict=base_model_dict,
training_dataset_dict=training_dataset_dict,
version_label=version_label,
- utc_timestamp_str=utc_timestamp_str,
+ commit_datetime=commit_datetime,
mf_flow_name=mf_flow_name,
mf_run_id=mf_run_id
)
diff --git a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme_template.md b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme_template.md
index 1ca94ef..b3fd103 100644
--- a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme_template.md
+++ b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme_template.md
@@ -4,7 +4,7 @@
version: '{{ new_version_label }}'
-timestamp: {{ utc_timestamp }}
+timestamp: '{{ commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }}'
model_name: {{ pretty_name }}
@@ -56,17 +56,21 @@ model-index:
---
- !!! TEMPLATE UNDER CONSTRUCTION !!!
-
# {{ pretty_name }}
-`version {{ new_version_label }}` - `{{ utc_timestamp }}`
+`version {{ new_version_label }}` - `{{ commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }}`
+(retraining
+source-code |
+pipeline-card)
-Training dataset :
+Training dataset :
{{ dataset_repo_id }}
v{{ dataset_version_label }}
({{ dataset_commit_hash[:7] }} - {{ dataset_utc_timestamp_str }})
+ target="_blank">{{ dataset_commit_hash[:7] }} -
+ {{ dataset_commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }})
Base model :
{% if base_model_arxiv_codes -%}
@@ -77,14 +81,14 @@ arxiv :
{% endfor -%}
{% endif -%}
-Source code :
-https://huggingface.co/retrain-pipelines/function_caller/tree/retrain-pipelines_source-code/{{ new_version_label }}
-
-Pipeline-card :
-https://huggingface.co/retrain-pipelines/function_caller/tree/retrain-pipelines_pipeline-card/{{ new_version_label }}
-
-
+
+
+
+ !! Section on Eval goes here !!
+
+
+