Skip to content

Commit

Permalink
Disable strictness for export of llama
Browse files Browse the repository at this point in the history
Strictness validates correctness but this results in loading the tensors
to memory. Disabling helps with loading speed.
  • Loading branch information
rsuderman committed Sep 5, 2024
1 parent 547ced4 commit 9213468
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions sharktank/sharktank/examples/export_paged_llm_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ def main():
help="Include verbose logging",
action="store_true",
)
parser.add_argument(
"--strict",
help="Enables strictness during export",
action="store_true",
)

args = cli.parse(parser)
dataset = cli.get_input_dataset(args)
Expand Down Expand Up @@ -113,6 +118,7 @@ def generate_batch_prefill(bs: int):
name=f"prefill_bs{bs}",
args=(tokens, seq_lens, seq_block_ids, cache_state),
dynamic_shapes=dynamic_shapes,
strict=args.strict,
)
def _(model, tokens, seq_lens, seq_block_ids, cache_state):
sl = tokens.shape[1]
Expand Down Expand Up @@ -170,6 +176,7 @@ def generate_batch_decode(bs: int):
cache_state,
),
dynamic_shapes=dynamic_shapes,
strict=args.strict,
)
def _(
model,
Expand Down

0 comments on commit 9213468

Please sign in to comment.