add plot

2025-11-27 15:54:00 +00:00 · 2025-01-31 12:30:45 -08:00 · 2025-01-31 12:30:45 -08:00 · 59b1fa70fe
commit 59b1fa70fe
parent 54edc865e6
1 changed files with 31 additions and 1 deletions
--- a/whisper/model.py
+++ b/whisper/model.py
@ -1,5 +1,7 @@
 import base64
 import gzip
 import matplotlib.pyplot as plt
 import seaborn as sns
 from contextlib import contextmanager
 from dataclasses import dataclass
 from typing import Dict, Iterable, Optional, Tuple
@ -110,7 +112,7 @@ class MultiHeadAttention(nn.Module):
            v = kv_cache[self.value]
        wv, qk = self.qkv_attention(q, k, v, mask)
-        
+
        self.attn_weights = qk
        print(qk)
@ -299,6 +301,34 @@ class Whisper(nn.Module):
        self, mel: torch.Tensor, tokens: torch.Tensor
    ) -> Dict[str, torch.Tensor]:
        return self.decoder(tokens, self.encoder(mel))
    def get_attention_weights(self):
        """Retrieve stored attention weights from the decoder's layers."""
        attn_maps = []
        for block in self.decoder.blocks:
            if block.attn.attn_weights is not None:
                attn_maps.append(block.attn.attn_weights.detach().cpu().numpy())
        return attn_maps  # Returns a list of attention weight tensors
    def plot_attention_on_padded(self, seq_length: int = 100):
        """Plots attention weights focusing on padded regions"""
        attn_maps = self.get_attention_weights()
        if not attn_maps:
            print("No attention weights found!")
            return
        # Average over heads and layers
        avg_attn = np.mean(attn_maps, axis=0)  
        avg_attn = avg_attn[:, :, :seq_length, :seq_length]  
        plt.figure(figsize=(8, 6))
        sns.heatmap(avg_attn.mean(axis=1).squeeze(), cmap="Blues", annot=False)
        plt.xlabel("Input Positions")
        plt.ylabel("Output Positions")
        plt.title("Attention Weights on Padded Regions")
        plt.show()
    @property
    def device(self):