diff --git a/pkg_src/retrain_pipelines/dataset/hf_utils/hf_utils.py b/pkg_src/retrain_pipelines/dataset/hf_utils/hf_utils.py
index d6cc3cd..e0f2bd3 100644
--- a/pkg_src/retrain_pipelines/dataset/hf_utils/hf_utils.py
+++ b/pkg_src/retrain_pipelines/dataset/hf_utils/hf_utils.py
@@ -3,7 +3,6 @@
import re
import sys
import json
-import yaml
import random
import shutil
import tempfile
@@ -434,6 +433,8 @@ def dataset_dict_to_config_str(
def push_dataset_version_to_hub(
repo_id: str,
+ version_label: str,
+ timestamp_str: str,
dataset_dict: DatasetDict,
dataset_readme_content: str,
hf_token: str = None,
@@ -446,7 +447,7 @@ def push_dataset_version_to_hub(
Custom `retrain-pipelines` README.
Uploaded dataset version superseeds entirely
- any existing version (any previously file
+ any existing version (any previous file
not anymore present is excluded from
new remote dataset snapshot).
@@ -454,6 +455,12 @@ def push_dataset_version_to_hub(
- repo_id (str):
Path to the HuggingFace dataset version
(is created if needed and if authorized).
+ - version_label (str):
+ value associated to the version
+ to be published on the HF hub.
+ - timestamp_str (str):
+ value associated to the version
+ to be published on the HF hub
- dataset_dict (DatasetDict):
The new version to be pushed.
- dataset_readme_content (str):
@@ -492,13 +499,10 @@ def push_dataset_version_to_hub(
"w") as f:
f.write(dataset_readme_content)
- data = yaml.safe_load(
- dataset_readme_content.split('---')[1])
- version, timestamp = data['version'], data['timestamp']
- commit_message = f"v{version} - {timestamp} - " + \
- f"retrain-pipelines v{__version__} - "+ \
- "Upload multi-table dataset "+ \
- "with README."
+ commit_message = \
+ f"v{version_label} - {timestamp_str} - " + \
+ f"retrain-pipelines v{__version__} - "+ \
+ "Upload multi-table dataset with README."
print(commit_message)
dataset_version_commit_hash = \
diff --git a/pkg_src/retrain_pipelines/model/hf_utils.py b/pkg_src/retrain_pipelines/model/hf_utils.py
new file mode 100644
index 0000000..34ba9e3
--- /dev/null
+++ b/pkg_src/retrain_pipelines/model/hf_utils.py
@@ -0,0 +1,73 @@
+
+import os
+
+from retrain_pipelines import __version__
+from retrain_pipelines.utils.hf_utils import \
+ local_repo_folder_to_hub
+
+
+def push_model_version_to_hub(
+ repo_id: str,
+ version_label: str,
+ timestamp_str: str,
+ model_dir: str,
+ model_readme_content: str,
+ hf_token: str = None,
+) -> str:
+ """
+ Loads locally-serialized model safetensor
+ and tokenizer.
+ Includes `retrain-pipelines` README.
+
+ Uploaded model version superseeds entirely
+ any existing version (any previous file
+ not anymore present is excluded from
+ new remote model snapshot).
+
+ Params:
+ - repo_id (str):
+ Path to the HuggingFace model version
+ (is created if needed and if authorized).
+ - version_label (str):
+ value associated to the version
+ to be published on the HF hub.
+ - timestamp_str (str):
+ value associated to the version
+ to be published on the HF hub
+ - model_dir (str):
+ Path to the serialized
+ new version to be pushed.
+ - model_readme_content (str):
+ The full content (yaml header + body)
+ of the 'README.md' to be pushed
+ alongside the datafiles.
+ - hf_token (Optional, str):
+ "create on namespace" permission required.
+
+ Results:
+ - (str):
+ commit_hash on the HF hub
+ for the new model version
+ """
+
+ with open(os.path.join(model_dir, "README.md"),
+ "w") as f:
+ f.write(model_readme_content)
+
+ commit_message = \
+ f"v{version_label} - {timestamp_str} - " + \
+ f"retrain-pipelines v{__version__} - "+ \
+ "Upload model and tokenizer with README."
+ print(commit_message)
+
+ model_version_commit_hash = \
+ local_repo_folder_to_hub(
+ repo_id=repo_id,
+ local_folder=model_dir,
+ commit_message=commit_message,
+ repo_type="model",
+ hf_token=hf_token
+ )
+
+ return model_version_commit_hash
+
diff --git a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme_template.md b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme_template.md
index 6186857..23b6b4e 100644
--- a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme_template.md
+++ b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/dataset_readme_template.md
@@ -10,7 +10,7 @@
version: '{{ new_version_label }}'
-timestamp: '{{ commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }}'
+timestamp: {{ commit_datetime.strftime('%Y%m%d_%H%M%S') ~ '%03d'|format(commit_datetime.microsecond // 1000) ~ '_UTC' }}
pretty_name: {{ pretty_name }}
@@ -29,11 +29,14 @@ task_categories:
- reinforcement-learning
tags:
+- retrain-pipelines
- function-calling
- LLM Agent
- code
- synthetic
+thumbnail: https://cdn-avatars.huggingface.co/v1/production/uploads/651e93137b2a2e027f9e55df/96hzBved0YMjCq--s0kad.png
+
size_categories:
- {{ size_category }}
diff --git a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme.py b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme.py
index 922120e..58c8a08 100644
--- a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme.py
+++ b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme.py
@@ -61,16 +61,19 @@ def _model_readme_params(
base_model_pretty_name = get_pretty_name(
repo_id=base_model_dict["repo_id"],
+ repo_type="model",
commit_hash=base_model_commit_hash
)
base_model_arxiv_codes = get_arxiv_codes(
repo_id=base_model_dict["repo_id"],
+ repo_type="model",
commit_hash=base_model_commit_hash
)
base_model_license_label = get_license_label(
repo_id=base_model_dict["repo_id"],
+ repo_type="model",
commit_hash=base_model_commit_hash
)
if not base_model_license_label:
diff --git a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme_template.md b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme_template.md
index b3fd103..8cc4e73 100644
--- a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme_template.md
+++ b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/model_readme_template.md
@@ -2,14 +2,18 @@
# @see https://github.com/huggingface/hub-docs/blob/main/modelcard.md
# @see https://huggingface.co/docs/huggingface_hub/guides/model-cards#update-metadata
+{% set timestamp_str = commit_datetime.strftime('%Y%m%d_%H%M%S') ~ '%03d'|format(commit_datetime.microsecond // 1000) ~ '_UTC' -%}
+
version: '{{ new_version_label }}'
-timestamp: '{{ commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }}'
+timestamp: '{{ timestamp_str }}'
model_name: {{ pretty_name }}
base_model: {{ base_model_repo_id }}
library_name: peft
+datasets:
+- {{ dataset_repo_id }}
license: {{ base_model_license_label }}
@@ -20,17 +24,14 @@ task_categories:
- question-answering
tags:
+- retrain-pipelines
- function-calling
- LLM Agent
- code
- Unsloth
-
-
thumbnail: https://cdn-avatars.huggingface.co/v1/production/uploads/651e93137b2a2e027f9e55df/96hzBved0YMjCq--s0kad.png
-datasets:
-- {{ dataset_repo_id }}
# @see https://huggingface.co/docs/hub/models-widgets#enabling-a-widget
widget:
@@ -61,18 +62,21 @@ model-index:
`version {{ new_version_label }}` - `{{ commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }}`
(retraining
source-code |
+ href="https://huggingface.co/retrain-pipelines/function_caller/tree/retrain-pipelines_source-code/v{{ new_version_label }}_{{ timestamp_str }}">source-code |
pipeline-card)
+ href="https://huggingface.co/retrain-pipelines/function_caller/tree/retrain-pipelines_pipeline-card/v{{ new_version_label }}_{{ timestamp_str }}">pipeline-card)
Training dataset :
- {{ dataset_repo_id }}
-v{{ dataset_version_label }}
+- {{ dataset_repo_id }} v{{ dataset_version_label }}
({{ dataset_commit_hash[:7] }} -
{{ dataset_commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }})
Base model :
+- {{ base_model_repo_id }}
+({{ base_model_commit_hash[:7] }} -
+ {{ base_model_commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }})
{% if base_model_arxiv_codes -%}
arxiv :
{%- for base_model_arxiv_code in base_model_arxiv_codes %}
@@ -85,7 +89,13 @@ arxiv :
- !! Section on Eval goes here !!
+
+
+
+ !! Section on Eval goes here !!
+
+
+
diff --git a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/pipeline_card.py b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/pipeline_card.py
index d30acd1..3998167 100644
--- a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/pipeline_card.py
+++ b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/pipeline_card.py
@@ -79,12 +79,6 @@ def get_html(
##########################
# model training #
##########################
- if not params['buckets_dict']:
- buckets_table = NONE_HTML_STR
- else:
- buckets_table = pd.DataFrame(
- [params['buckets_dict']]).to_html(classes='wide',
- escape=False, index = False)
if params['hyperparameters_dict']:
hyperparameters_table = \
pd.DataFrame([params['hyperparameters_dict']]
@@ -277,6 +271,8 @@ def get_source(self, environment, template):
dataset_commit_datetime=\
params['dataset_commit_datetime'],
dataset_commit_hash=params['dataset_commit_hash'],
+ dataset_augmentation_rate=params['dataset_augmentation_rate'],
+ dataset_enrichment_rate=params['dataset_enrichment_rate'],
model_repo_id=params['model_repo_id'],
model_commit_hash=params['model_commit_hash'],
diff --git a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/template.html b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/template.html
index b9d2456..0161e50 100644
--- a/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/template.html
+++ b/pkg_src/retrain_pipelines/pipeline_card/mf_unsloth_func_call_litserve/template.html
@@ -578,23 +578,73 @@
+ training dataset : + | ++ {{ dataset_repo_id }} + v{{ dataset_version_label }} + + {{ dataset_commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }} + + + | ++ [HuggingFace 🤗] + | +
+ Data-augmentation rate : + +{{ (dataset_augmentation_rate * 100)|round(1) ~ '%' }}, + Data-enrichment rate : + +{{ (dataset_enrichment_rate * 100)|round(1) ~ '%' }} + | +||
+ model version : + | ++ {{ model_repo_id }} + v{{ model_version_label }} + + {{ model_commit_datetime.strftime("%Y-%m-%d %H:%M:%S UTC") }} + + + | ++ [HuggingFace 🤗] + | +