Merge pull request #15 from leonvanbokhorst/cleanup

Update .gitignore to include .DS_Store file
leonvanbokhorst · Dec 26, 2024 · f632027 · f632027
2 parents 8157cd7 + 5f8d189
commit f632027
Show file tree

Hide file tree

Showing 3 changed files with 352 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -21,6 +21,7 @@ wheels/
 *.egg
 .venv/
 cache/
+.DS_Store
 
 # Testing
 .coverage

diff --git a/src/response-variability/respvar_01.py b/src/response-variability/respvar_01.py
@@ -0,0 +1,169 @@
+import re
+from dataclasses import dataclass
+from typing import List, Dict
+import numpy as np
+from collections import defaultdict
+
+
+@dataclass
+class ResponseMetrics:
+    """Metrics for analyzing response variability"""
+
+    message_length: int
+    sentence_count: int
+    avg_sentence_length: float
+    thinking_markers: List[str]
+    correction_patterns: List[str]
+    response_type: str  # 'direct', 'reflective', or 'mixed'
+    messiness_score: float
+
+
+class ResponseAnalyzer:
+    def __init__(self):
+        # Patterns voor verschillende aspecten van natuurlijke communicatie
+        self.thinking_patterns = [
+            r"\b(hmm|uhm|eh|oh)\b",
+            r"\.{3,}",  # ... denk pauzes
+            r"\(denkt\)",
+            r"even nadenken",
+        ]
+
+        self.correction_patterns = [
+            r"oh wacht",
+            r"laat ik dat anders",
+            r"ik bedoel",
+            r"corrigeert",
+            r"wat ik eigenlijk bedoel",
+        ]
+
+        self.reflective_markers = [
+            r"ik denk",
+            r"volgens mij",
+            r"misschien",
+            r"het lijkt erop",
+            r"wat als we",
+            r"interessant",
+        ]
+
+    def analyze_response(self, text: str) -> ResponseMetrics:
+        """Analyseer een enkele response op verschillende metrics"""
+        # Basis tekstanalyse
+        sentences = [s.strip() for s in re.split(r"[.!?]+", text) if s.strip()]
+        message_length = len(text)
+        sentence_count = len(sentences)
+        avg_sentence_length = (
+            message_length / sentence_count if sentence_count > 0 else 0
+        )
+
+        # Zoek naar denkpatronen
+        thinking_markers = []
+        for pattern in self.thinking_patterns:
+            matches = re.findall(pattern, text.lower())
+            thinking_markers.extend(matches)
+
+        correction_patterns = [
+            pattern
+            for pattern in self.correction_patterns
+            if re.search(pattern, text.lower())
+        ]
+        # Bepaal response type
+        reflective_count = sum(bool(re.search(pattern, text.lower()))
+                           for pattern in self.reflective_markers)
+        response_type = self._determine_response_type(reflective_count, len(sentences))
+
+        # Bereken messiness score (0-1)
+        messiness_score = self._calculate_messiness(
+            len(thinking_markers), len(correction_patterns), sentence_count, text
+        )
+
+        return ResponseMetrics(
+            message_length=message_length,
+            sentence_count=sentence_count,
+            avg_sentence_length=avg_sentence_length,
+            thinking_markers=thinking_markers,
+            correction_patterns=correction_patterns,
+            response_type=response_type,
+            messiness_score=messiness_score,
+        )
+
+    def analyze_conversation(self, messages: List[str]) -> Dict:
+        """Analyseer een hele conversatie voor patronen"""
+        metrics = [self.analyze_response(msg) for msg in messages]
+
+        # Bereken conversation-level statistieken
+        length_variation = np.std([m.message_length for m in metrics])
+        response_types = [m.response_type for m in metrics]
+        messiness_trend = [m.messiness_score for m in metrics]
+
+        return {
+            "individual_metrics": metrics,
+            "length_variation": length_variation,
+            "response_type_distribution": self._count_response_types(response_types),
+            "avg_messiness": np.mean(messiness_trend),
+            "messiness_trend": messiness_trend,
+        }
+
+    def _determine_response_type(
+        self, reflective_count: int, sentence_count: int
+    ) -> str:
+        """Bepaal of een response direct, reflectief of gemengd is"""
+        if sentence_count == 0:
+            return "unknown"
+
+        reflective_ratio = reflective_count / sentence_count
+        if reflective_ratio > 0.6:
+            return "reflective"
+        elif reflective_ratio < 0.2:
+            return "direct"
+        else:
+            return "mixed"
+
+    def _calculate_messiness(
+        self, thinking_count: int, correction_count: int, sentence_count: int, text: str
+    ) -> float:
+        """Bereken een genormaliseerde messiness score"""
+        if sentence_count == 0:
+            return 0.0
+
+        # Basis componenten voor messiness
+        thinking_ratio = min(thinking_count / sentence_count, 1.0)
+        correction_ratio = min(correction_count / sentence_count, 1.0)
+
+        # Check voor informele elementen
+        informal_elements = len(
+            re.findall(r"[!?]{2,}|\b(haha|nou|tja)\b", text.lower())
+        )
+        informal_ratio = min(informal_elements / sentence_count, 1.0)
+
+        # Gewogen gemiddelde van verschillende factoren
+        weights = [0.4, 0.3, 0.3]  # thinking, corrections, informal
+        components = [thinking_ratio, correction_ratio, informal_ratio]
+
+        return sum(w * c for w, c in zip(weights, components))
+
+    def _count_response_types(self, types: List[str]) -> Dict[str, int]:
+        """Tel de frequentie van verschillende response types"""
+        counter = defaultdict(int)
+        for t in types:
+            counter[t] += 1
+        return dict(counter)
+
+
+# Voorbeeld gebruik
+if __name__ == "__main__":
+    analyzer = ResponseAnalyzer()
+
+    # Test met een enkele response
+    test_response = """
+    Hmm... laat me daar even over nadenken. 
+    Oh wacht, ik zie wat je bedoelt! 
+    Volgens mij kunnen we dit het beste aanpakken door eerst...
+    *corrigeert* Wat ik eigenlijk bedoel is dat we misschien moeten beginnen met een simpelere aanpak.
+    """
+
+    metrics = analyzer.analyze_response(test_response)
+    print(f"Message length: {metrics.message_length}")
+    print(f"Messiness score: {metrics.messiness_score:.2f}")
+    print(f"Response type: {metrics.response_type}")
+    print(f"Thinking markers found: {metrics.thinking_markers}")
+    print(f"Correction patterns found: {metrics.correction_patterns}")
diff --git a/src/response-variability/respvar_02.py b/src/response-variability/respvar_02.py
@@ -0,0 +1,182 @@
+from typing import List, Dict, Any
+import json
+
+
+class LLMAnalyzer:
+    def __init__(self, llm_client):
+        self.llm = llm_client
+
+    def analyze_response(self, text: str) -> Dict[str, Any]:
+        """Analyze a single response using LLM capabilities"""
+
+        analysis_prompt = f"""
+        Analyze the following message for communication patterns. Consider:
+
+        1. Response Style:
+           - Is it direct, reflective, or mixed?
+           - What's the emotional tone?
+           - How formal/informal is the language?
+
+        2. Conversational Elements:
+           - Are there thinking patterns (hesitations, self-corrections)?
+           - How does it manage attention and topic flow?
+           - What linguistic markers show human-like communication?
+
+        3. Structural Analysis:
+           - How varied is the sentence structure?
+           - Are there natural breaks or topic shifts?
+           - How does it handle complexity?
+
+        Message to analyze: "{text}"
+
+        Provide your analysis in JSON format with the following structure:
+        {{
+            "style": {{
+                "type": "direct|reflective|mixed",
+                "formality_level": 0-1,
+                "emotional_tone": "description"
+            }},
+            "conversational_elements": {{
+                "thinking_patterns": ["list", "of", "patterns"],
+                "attention_management": "description",
+                "human_markers": ["list", "of", "markers"]
+            }},
+            "structure": {{
+                "complexity_score": 0-1,
+                "natural_flow_score": 0-1,
+                "topic_coherence": "description"
+            }},
+            "overall_naturalness": 0-1
+        }}
+        """
+
+        response = self.llm.analyze(analysis_prompt)
+        return json.loads(response)
+
+    def analyze_conversation(self, messages: List[str]) -> Dict[str, Any]:
+        """Analyze a full conversation for patterns and development"""
+
+        conversation_prompt = f"""
+        Analyze this conversation for interaction patterns. Consider:
+
+        1. Conversation Flow:
+           - How natural are the topic transitions?
+           - Is there a good balance of initiative between participants?
+           - How well is rapport maintained?
+
+        2. Response Patterns:
+           - How do response styles vary?
+           - Are there consistent patterns in timing and length?
+           - How are attention shifts handled?
+
+        3. Relationship Development:
+           - How does the conversation build rapport?
+           - Are there signs of mutual understanding?
+           - How are agreements/disagreements handled?
+
+        Messages:
+        {json.dumps(messages, indent=2)}
+
+        Provide analysis in JSON format with:
+        {{
+            "flow": {{
+                "transition_naturalness": 0-1,
+                "initiative_balance": 0-1,
+                "rapport_maintenance": "description"
+            }},
+            "patterns": {{
+                "style_variation": 0-1,
+                "rhythm_naturalness": 0-1,
+                "attention_management": "description"
+            }},
+            "relationship": {{
+                "rapport_building": ["observed", "techniques"],
+                "understanding_indicators": ["list", "of", "indicators"],
+                "conflict_management": "description"
+            }},
+            "overall_conversation_quality": 0-1
+        }}
+        """
+
+        response = self.llm.analyze(conversation_prompt)
+        return json.loads(response)
+
+    def benchmark_naturalness(
+        self, target_message: str, comparison_corpus: List[str]
+    ) -> Dict[str, Any]:
+        """Compare a message against a corpus of known natural communication"""
+
+        benchmark_prompt = f"""
+        Compare this message against examples of natural human communication:
+
+        Target message: "{target_message}"
+
+        Comparison examples:
+        {json.dumps(comparison_corpus, indent=2)}
+
+        Analyze how the target message compares in terms of:
+        1. Linguistic naturalness
+        2. Communication patterns
+        3. Human-like variability
+        4. Authenticity markers
+
+        Provide analysis in JSON format with:
+        {{
+            "naturalness_comparison": {{
+                "similarity_score": 0-1,
+                "matching_patterns": ["list", "of", "patterns"],
+                "missing_elements": ["list", "of", "elements"],
+                "improvement_suggestions": ["list", "of", "suggestions"]
+            }},
+            "believability_assessment": {{
+                "overall_score": 0-1,
+                "strengths": ["list"],
+                "weaknesses": ["list"]
+            }}
+        }}
+        """
+
+        response = self.llm.analyze(benchmark_prompt)
+        return json.loads(response)
+
+
+# Example usage:
+class MockLLM:
+    def analyze(self, prompt: str) -> str:
+        # This would be replaced with actual LLM API calls
+        return """
+        {
+            "style": {
+                "type": "mixed",
+                "formality_level": 0.6,
+                "emotional_tone": "engaged and thoughtful"
+            },
+            "conversational_elements": {
+                "thinking_patterns": ["self-reflection", "hesitation"],
+                "attention_management": "natural topic shifts with clear connections",
+                "human_markers": ["informal asides", "self-correction"]
+            },
+            "structure": {
+                "complexity_score": 0.7,
+                "natural_flow_score": 0.8,
+                "topic_coherence": "maintains focus while allowing natural digressions"
+            },
+            "overall_naturalness": 0.75
+        }
+        """
+
+
+if __name__ == "__main__":
+    # Example usage
+    llm = MockLLM()
+    analyzer = LLMAnalyzer(llm)
+
+    test_message = """
+    Hmm, interesting point... Let me think about this for a moment.
+    I see what you're getting at, though I wonder if we might be 
+    overlooking something. Oh wait, actually - this reminds me of 
+    a similar case where... no, let me rephrase that. What I mean is...
+    """
+
+    result = analyzer.analyze_response(test_message)
+    print(json.dumps(result, indent=2))
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,6 +21,7 @@ wheels/ @@
     *.egg
     .venv/
     cache/
+    .DS_Store
     # Testing
     .coverage
@@ Expand Down @@