Skip to content

Commit

Permalink
NPU Adaption for Sanna (huggingface#10409)
Browse files Browse the repository at this point in the history
* NPU Adaption for Sanna


---------

Co-authored-by: J石页 <jiangshuo9@h-partners.com>
Co-authored-by: Sayak Paul <spsayakpaul@gmail.com>
  • Loading branch information
3 people authored Jan 24, 2025
1 parent 87252d8 commit 07860f9
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
15 changes: 13 additions & 2 deletions examples/dreambooth/train_dreambooth_lora_sana.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
is_wandb_available,
)
from diffusers.utils.hub_utils import load_or_create_model_card, populate_model_card
from diffusers.utils.import_utils import is_torch_npu_available
from diffusers.utils.torch_utils import is_compiled_module


Expand All @@ -74,6 +75,9 @@

logger = get_logger(__name__)

if is_torch_npu_available():
torch.npu.config.allow_internal_format = False


def save_model_card(
repo_id: str,
Expand Down Expand Up @@ -601,6 +605,7 @@ def parse_args(input_args=None):
)
parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank")
parser.add_argument("--enable_vae_tiling", action="store_true", help="Enabla vae tiling in log validation")
parser.add_argument("--enable_npu_flash_attention", action="store_true", help="Enabla Flash Attention for NPU")

if input_args is not None:
args = parser.parse_args(input_args)
Expand Down Expand Up @@ -924,8 +929,7 @@ def main(args):
image.save(image_filename)

del pipeline
if torch.cuda.is_available():
torch.cuda.empty_cache()
free_memory()

# Handle the repository creation
if accelerator.is_main_process:
Expand Down Expand Up @@ -988,6 +992,13 @@ def main(args):
# because Gemma2 is particularly suited for bfloat16.
text_encoder.to(dtype=torch.bfloat16)

if args.enable_npu_flash_attention:
if is_torch_npu_available():
logger.info("npu flash attention enabled.")
transformer.enable_npu_flash_attention()
else:
raise ValueError("npu flash attention requires torch_npu extensions and is supported only on npu device ")

# Initialize a text encoding pipeline and keep it to CPU for now.
text_encoding_pipeline = SanaPipeline.from_pretrained(
args.pretrained_model_name_or_path,
Expand Down
5 changes: 5 additions & 0 deletions src/diffusers/models/attention_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3154,6 +3154,11 @@ def __call__(
# scaled_dot_product_attention expects attention_mask shape to be
# (batch, heads, source_length, target_length)
attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
attention_mask = attention_mask.repeat(1, 1, hidden_states.shape[1], 1)
if attention_mask.dtype == torch.bool:
attention_mask = torch.logical_not(attention_mask.bool())
else:
attention_mask = attention_mask.bool()

if attn.group_norm is not None:
hidden_states = attn.group_norm(hidden_states.transpose(1, 2)).transpose(1, 2)
Expand Down

0 comments on commit 07860f9

Please sign in to comment.