Spaces:

WordLift
/

brand-llms

Sleeping

App Files Files Community

cyberandy commited on Dec 2, 2024

Commit

44c881e

verified ·

1 Parent(s): 5ee5132

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -61

app.py CHANGED Viewed

@@ -1,75 +1,102 @@
 import requests
 import json
-from typing import Dict, List
-import numpy as np
-def get_activation_values(text: str, feature_id: int) -> Dict:
-    """Get activation values for a specific feature"""
-    url = "https://www.neuronpedia.org/api/activation/new"
-    data = {
-        "feature": {
-            "modelId": "gemma-2-2b",
-            "layer": "0-gemmascope-mlp-16k",
-            "index": str(feature_id)
-        },
-        "customText": text
     }
     response = requests.post(
-        url,
-        headers={"Content-Type": "application/json"},
-        json=data
     )
-    return response.json()
-def calculate_density(values: List[float], threshold: float = 0.5) -> float:
-    """Calculate activation density (% of tokens with activation > threshold)"""
-    return sum(1 for v in values if v > threshold) / len(values)
-def find_top_features_per_token(text: str, num_features: int = 5,
-                              max_density: float = 0.01, batch_size: int = 100) -> Dict:
-    """Find top features for each token with density filtering"""
-    # First get initial feature activations to get tokens
-    sample_activation = get_activation_values(text, 0)
-    tokens = sample_activation['tokens']
-    token_features = {token: [] for token in tokens}
-    # Process features in batches
-    for start_idx in range(0, 16384, batch_size):
-        for feature_id in range(start_idx, min(start_idx + batch_size, 16384)):
-            result = get_activation_values(text, feature_id)
-            values = result.get('values', [])
-            # Calculate density and skip if too high
-            density = calculate_density(values)
-            if density > max_density:
-                continue
-            # Add feature to each token's list if activated
-            for token_idx, (token, value) in enumerate(zip(tokens, values)):
-                if value > 0.5:  # Activation threshold
-                    token_features[token].append({
-                        'feature_id': feature_id,
-                        'activation': value,
-                        'density': density
-                    })
-    # Sort features for each token and keep top N
-    for token in token_features:
-        token_features[token].sort(key=lambda x: x['activation'], reverse=True)
-        token_features[token] = token_features[token][:num_features]
-    return token_features
-# Test the function
-text = "Nike - Just Do It"
-token_features = find_top_features_per_token(text)
-# Print results
-print(f"Text: {text}\n")
-for token, features in token_features.items():
-    if features:  # Only show tokens with active features
-        print(f"\nToken: {token}")
-        for feat in features:
-            print(f"  Feature {feat['feature_id']}: activation={feat['activation']:.3f}, density={feat['density']:.3%}")

+import gradio as gr
 import requests
 import json
+from typing import Dict, List, Tuple
+BRAND_EXAMPLES = [
+    "Nike - Just Do It. The power of determination.",
+    "Apple - Think Different. Innovation redefined.",
+    "McDonald's - I'm Lovin' It. Creating joy.",
+    "BMW - The Ultimate Driving Machine.",
+    "L'Oréal - Because You're Worth It."
+]
+def get_top_features(text: str, k: int = 5) -> Dict:
+    url = "https://www.neuronpedia.org/api/search-with-topk"
+    payload = {
+        "modelId": "gemma-2-2b",
+        "layer": "0-gemmascope-mlp-16k",
+        "sourceSet": "gemma-scope",
+        "text": text,
+        "k": k,
+        "maxDensity": 0.01,
+        "ignoreBos": True
     }
     response = requests.post(
+        url,
+        headers={"Content-Type": "application/json"},
+        json=payload
     )
+    return response.json() if response.status_code == 200 else None
+def format_output(data: Dict) -> Tuple[str, str, str]:
+    if not data:
+        return "Error analyzing text", "", ""
+    output = "# Neural Feature Analysis\n\n"
+    # Format token-feature analysis
+    for result in data['results']:
+        token = result['token']
+        if token == '<bos>':  # Skip BOS token
+            continue
+        features = result['top_features']
+        if features:
+            output += f"\n## Token: '{token}'\n"
+            for feat in features:
+                feat_index = feat['feature_index']
+                activation = feat['activation_value']
+                output += f"- **Feature {feat_index}**: activation = {activation:.2f}\n"
+    # Get highest activation feature for dashboard
+    max_activation = 0
+    max_feature = None
+    for result in data['results']:
+        for feature in result['top_features']:
+            if feature['activation_value'] > max_activation:
+                max_activation = feature['activation_value']
+                max_feature = feature['feature_index']
+    if max_feature:
+        dashboard_url = f"https://www.neuronpedia.org/gemma-2-2b/0-gemmascope-mlp-16k/{max_feature}?embed=true&embedexplanation=true&embedplots=true&embedtest=true&height=300"
+        iframe = f'<iframe src="{dashboard_url}" width="100%" height="600px" frameborder="0" style="border:1px solid #eee;border-radius:8px;"></iframe>'
+        feature_label = f"Feature {max_feature} Dashboard (Highest Activation: {max_activation:.2f})"
+    else:
+        iframe = ""
+        feature_label = "No significant features found"
+    return output, iframe, feature_label
+def create_interface():
+    with gr.Blocks() as interface:
+        gr.Markdown("# Neural Feature Analyzer")
+        gr.Markdown("Analyze text using Gemma's interpretable neural features\n\nShows top 5 most activated features for each token with density < 1%")
+        with gr.Row():
+            with gr.Column():
+                input_text = gr.Textbox(
+                    lines=5,
+                    placeholder="Enter text to analyze...",
+                    label="Input Text"
+                )
+                analyze_btn = gr.Button("Analyze Neural Features", variant="primary")
+                gr.Examples(BRAND_EXAMPLES, inputs=input_text)
+            with gr.Column():
+                output_text = gr.Markdown()
+                feature_label = gr.Text(show_label=False)
+                dashboard = gr.HTML()
+        analyze_btn.click(
+            fn=lambda text: format_output(get_top_features(text)),
+            inputs=input_text,
+            outputs=[output_text, dashboard, feature_label]
+        )
+    return interface
+if __name__ == "__main__":
+    create_interface().launch()