Skip to content

Commit

Permalink
Disable strictness for export of llama
Browse files Browse the repository at this point in the history
Strictness validates correctness but this results in loading the tensors
to memory. Disabling helps with loading speed.
  • Loading branch information
rsuderman committed Sep 5, 2024
1 parent 944e358 commit 6baad65
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions sharktank/sharktank/examples/export_paged_llm_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def main():
help="Include verbose logging",
action="store_true",
)
parser.add_argument(
"--strict",
help="Enables strictness during export",
action="store_true",
)

args = cli.parse(parser)
dataset = cli.get_input_dataset(args)
Expand Down Expand Up @@ -113,6 +118,7 @@ def generate_batch_prefill(bs: int):
name=f"prefill_bs{bs}",
args=(tokens, seq_lens, seq_block_ids, cache_state),
dynamic_shapes=dynamic_shapes,
strict=args.strict,
)
def _(model, tokens, seq_lens, seq_block_ids, cache_state):
sl = tokens.shape[1]
Expand Down Expand Up @@ -170,6 +176,7 @@ def generate_batch_decode(bs: int):
cache_state,
),
dynamic_shapes=dynamic_shapes,
strict=args.strict,
)
def _(
model,
Expand Down

0 comments on commit 6baad65

Please sign in to comment.