Spaces:

WordLift
/

brand-llms

Running

App Files Files Community

cyberandy commited on Nov 25, 2024

Commit

9186441

verified ·

1 Parent(s): deaf693

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -50

app.py CHANGED Viewed

@@ -48,40 +48,31 @@ MARKETING_FEATURES = [
         interpretation_guide="High activation suggests strong SEO potential",
         layer=20
     ),
-    # Add more relevant features as we discover them
 ]
 class MarketingAnalyzer:
     """Main class for analyzing marketing content using Gemma Scope"""
-    def __init__(self, model_size: str = "2b"):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self._initialize_model(model_size)
         self._load_saes()
-    def _initialize_model(self, model_size: str):
         """Initialize Gemma model and tokenizer"""
         try:
-            import os
-            model_name = f"google/gemma-{model_size}"
-            # Access HF token from environment variable
-            hf_token = os.environ.get('HF_TOKEN')
-            if not hf_token:
-                logger.warning("HF_TOKEN not found in environment variables")
-            # Initialize model and tokenizer with token
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_name,
-                token=hf_token,
-                device_map='auto'  # Automatically handle device placement
-            )
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                model_name,
-                token=hf_token
             )
-            self.model.eval()  # Set to evaluation mode
             logger.info(f"Initialized model: {model_name}")
         except Exception as e:
@@ -100,7 +91,7 @@ class MarketingAnalyzer:
                 )
                 params = np.load(path)
                 self.saes[feature.feature_id] = {
-                    'params': {k: torch.from_numpy(v).cuda() for k, v in params.items()},
                     'feature': feature
                 }
                 logger.info(f"Loaded SAE for feature {feature.feature_id}")
@@ -135,14 +126,23 @@ class MarketingAnalyzer:
                     feature.threshold
                 )
                 # Record results
                 feature_result = {
                     'name': feature.name,
                     'category': feature.category,
-                    'activation_score': float(activations.mean()),
-                    'max_activation': float(activations.max()),
                     'interpretation': self._interpret_activation(
-                        activations,
                         feature
                     )
                 }
@@ -177,14 +177,13 @@ class MarketingAnalyzer:
     def _interpret_activation(
         self,
-        activations: torch.Tensor,
         feature: MarketingFeature
     ) -> str:
         """Interpret activation patterns for a feature"""
-        mean_activation = float(activations.mean())
-        if mean_activation > 0.8:
             return f"Very strong presence of {feature.name.lower()}"
-        elif mean_activation > 0.5:
             return f"Moderate presence of {feature.name.lower()}"
         else:
             return f"Limited presence of {feature.name.lower()}"
@@ -193,21 +192,28 @@ class MarketingAnalyzer:
         """Generate content recommendations based on analysis"""
         recommendations = []
-        # Analyze technical complexity
-        tech_score = np.mean([
-            f['activation_score'] for f in results['features'].values()
-            if f['category'] == 'technical'
-        ])
-        if tech_score > 0.8:
-            recommendations.append(
-                "Consider simplifying technical language for broader audience"
-            )
-        elif tech_score < 0.3:
-            recommendations.append(
-                "Could benefit from more specific technical details"
-            )
-        # Add more recommendation logic as needed
         return recommendations
 def create_gradio_interface():
@@ -216,7 +222,6 @@ def create_gradio_interface():
         analyzer = MarketingAnalyzer()
     except Exception as e:
         logger.error(f"Failed to initialize analyzer: {str(e)}")
-        # Provide a more graceful fallback or error message in the interface
         return gr.Interface(
             fn=lambda x: "Error: Failed to initialize model. Please check authentication.",
             inputs=gr.Textbox(),
@@ -234,8 +239,9 @@ def create_gradio_interface():
         # Overall category scores
         output += "Category Scores:\n"
         for category, features in results['categories'].items():
-            avg_score = np.mean([f['activation_score'] for f in features])
-            output += f"{category.title()}: {avg_score:.2f}\n"
         # Feature details
         output += "\nFeature Details:\n"
@@ -245,13 +251,15 @@ def create_gradio_interface():
             output += f"Interpretation: {feature['interpretation']}\n"
         # Recommendations
-        output += "\nRecommendations:\n"
-        for rec in results['recommendations']:
-            output += f"- {rec}\n"
         return output
-    iface = gr.Interface(
         fn=analyze,
         inputs=gr.Textbox(
             lines=5,
@@ -264,10 +272,16 @@ def create_gradio_interface():
             ["WordLift is an AI-powered SEO tool"],
             ["Our advanced machine learning algorithms optimize your content"],
             ["Simple and effective website optimization"]
-        ]
     )
-    return iface
 if __name__ == "__main__":
     iface = create_gradio_interface()

         interpretation_guide="High activation suggests strong SEO potential",
         layer=20
     ),
 ]
 class MarketingAnalyzer:
     """Main class for analyzing marketing content using Gemma Scope"""
+    def __init__(self):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # Store model size as instance variable
+        self.model_size = "2b"
+        self._initialize_model()
         self._load_saes()
+    def _initialize_model(self):
         """Initialize Gemma model and tokenizer"""
         try:
+            model_name = f"google/gemma-{self.model_size}"
+            # Initialize model and tokenizer with token from environment
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_name,
+                device_map='auto'
             )
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            self.model.eval()
             logger.info(f"Initialized model: {model_name}")
         except Exception as e:
                 )
                 params = np.load(path)
                 self.saes[feature.feature_id] = {
+                    'params': {k: torch.from_numpy(v).to(self.device) for k, v in params.items()},
                     'feature': feature
                 }
                 logger.info(f"Loaded SAE for feature {feature.feature_id}")
                     feature.threshold
                 )
+                # Skip BOS token and handle empty activations
+                activations = activations[:, 1:]  # Skip BOS token
+                if activations.numel() > 0:
+                    mean_activation = float(activations.mean())
+                    max_activation = float(activations.max())
+                else:
+                    mean_activation = 0.0
+                    max_activation = 0.0
                 # Record results
                 feature_result = {
                     'name': feature.name,
                     'category': feature.category,
+                    'activation_score': mean_activation,
+                    'max_activation': max_activation,
                     'interpretation': self._interpret_activation(
+                        mean_activation,
                         feature
                     )
                 }
     def _interpret_activation(
         self,
+        activation: float,
         feature: MarketingFeature
     ) -> str:
         """Interpret activation patterns for a feature"""
+        if activation > 0.8:
             return f"Very strong presence of {feature.name.lower()}"
+        elif activation > 0.5:
             return f"Moderate presence of {feature.name.lower()}"
         else:
             return f"Limited presence of {feature.name.lower()}"
         """Generate content recommendations based on analysis"""
         recommendations = []
+        try:
+            # Get technical features
+            tech_features = [
+                f for f in results['features'].values()
+                if f['category'] == 'technical'
+            ]
+            # Calculate average technical score if we have features
+            if tech_features:
+                tech_score = np.mean([f['activation_score'] for f in tech_features])
+                if tech_score > 0.8:
+                    recommendations.append(
+                        "Consider simplifying technical language for broader audience"
+                    )
+                elif tech_score < 0.3:
+                    recommendations.append(
+                        "Could benefit from more specific technical details"
+                    )
+        except Exception as e:
+            logger.error(f"Error generating recommendations: {str(e)}")
         return recommendations
 def create_gradio_interface():
         analyzer = MarketingAnalyzer()
     except Exception as e:
         logger.error(f"Failed to initialize analyzer: {str(e)}")
         return gr.Interface(
             fn=lambda x: "Error: Failed to initialize model. Please check authentication.",
             inputs=gr.Textbox(),
         # Overall category scores
         output += "Category Scores:\n"
         for category, features in results['categories'].items():
+            if features:  # Check if we have features for this category
+                avg_score = np.mean([f['activation_score'] for f in features])
+                output += f"{category.title()}: {avg_score:.2f}\n"
         # Feature details
         output += "\nFeature Details:\n"
             output += f"Interpretation: {feature['interpretation']}\n"
         # Recommendations
+        if results['recommendations']:
+            output += "\nRecommendations:\n"
+            for rec in results['recommendations']:
+                output += f"- {rec}\n"
         return output
+    # Create interface with custom styling
+    interface = gr.Interface(
         fn=analyze,
         inputs=gr.Textbox(
             lines=5,
             ["WordLift is an AI-powered SEO tool"],
             ["Our advanced machine learning algorithms optimize your content"],
             ["Simple and effective website optimization"]
+        ],
+        theme=gr.themes.Default().set(
+            button_primary_background_color="#3452db",
+            button_primary_text_color="white",
+            button_secondary_background_color="#f5f5f5",
+            button_secondary_text_color="#3452db",
+        )
     )
+    return interface
 if __name__ == "__main__":
     iface = create_gradio_interface()