-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #15 from leonvanbokhorst/cleanup
Update .gitignore to include .DS_Store file
- Loading branch information
Showing
3 changed files
with
352 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ wheels/ | |
*.egg | ||
.venv/ | ||
cache/ | ||
.DS_Store | ||
|
||
# Testing | ||
.coverage | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
import re | ||
from dataclasses import dataclass | ||
from typing import List, Dict | ||
import numpy as np | ||
from collections import defaultdict | ||
|
||
|
||
@dataclass | ||
class ResponseMetrics: | ||
"""Metrics for analyzing response variability""" | ||
|
||
message_length: int | ||
sentence_count: int | ||
avg_sentence_length: float | ||
thinking_markers: List[str] | ||
correction_patterns: List[str] | ||
response_type: str # 'direct', 'reflective', or 'mixed' | ||
messiness_score: float | ||
|
||
|
||
class ResponseAnalyzer: | ||
def __init__(self): | ||
# Patterns voor verschillende aspecten van natuurlijke communicatie | ||
self.thinking_patterns = [ | ||
r"\b(hmm|uhm|eh|oh)\b", | ||
r"\.{3,}", # ... denk pauzes | ||
r"\(denkt\)", | ||
r"even nadenken", | ||
] | ||
|
||
self.correction_patterns = [ | ||
r"oh wacht", | ||
r"laat ik dat anders", | ||
r"ik bedoel", | ||
r"corrigeert", | ||
r"wat ik eigenlijk bedoel", | ||
] | ||
|
||
self.reflective_markers = [ | ||
r"ik denk", | ||
r"volgens mij", | ||
r"misschien", | ||
r"het lijkt erop", | ||
r"wat als we", | ||
r"interessant", | ||
] | ||
|
||
def analyze_response(self, text: str) -> ResponseMetrics: | ||
"""Analyseer een enkele response op verschillende metrics""" | ||
# Basis tekstanalyse | ||
sentences = [s.strip() for s in re.split(r"[.!?]+", text) if s.strip()] | ||
message_length = len(text) | ||
sentence_count = len(sentences) | ||
avg_sentence_length = ( | ||
message_length / sentence_count if sentence_count > 0 else 0 | ||
) | ||
|
||
# Zoek naar denkpatronen | ||
thinking_markers = [] | ||
for pattern in self.thinking_patterns: | ||
matches = re.findall(pattern, text.lower()) | ||
thinking_markers.extend(matches) | ||
|
||
correction_patterns = [ | ||
pattern | ||
for pattern in self.correction_patterns | ||
if re.search(pattern, text.lower()) | ||
] | ||
# Bepaal response type | ||
reflective_count = sum(bool(re.search(pattern, text.lower())) | ||
for pattern in self.reflective_markers) | ||
response_type = self._determine_response_type(reflective_count, len(sentences)) | ||
|
||
# Bereken messiness score (0-1) | ||
messiness_score = self._calculate_messiness( | ||
len(thinking_markers), len(correction_patterns), sentence_count, text | ||
) | ||
|
||
return ResponseMetrics( | ||
message_length=message_length, | ||
sentence_count=sentence_count, | ||
avg_sentence_length=avg_sentence_length, | ||
thinking_markers=thinking_markers, | ||
correction_patterns=correction_patterns, | ||
response_type=response_type, | ||
messiness_score=messiness_score, | ||
) | ||
|
||
def analyze_conversation(self, messages: List[str]) -> Dict: | ||
"""Analyseer een hele conversatie voor patronen""" | ||
metrics = [self.analyze_response(msg) for msg in messages] | ||
|
||
# Bereken conversation-level statistieken | ||
length_variation = np.std([m.message_length for m in metrics]) | ||
response_types = [m.response_type for m in metrics] | ||
messiness_trend = [m.messiness_score for m in metrics] | ||
|
||
return { | ||
"individual_metrics": metrics, | ||
"length_variation": length_variation, | ||
"response_type_distribution": self._count_response_types(response_types), | ||
"avg_messiness": np.mean(messiness_trend), | ||
"messiness_trend": messiness_trend, | ||
} | ||
|
||
def _determine_response_type( | ||
self, reflective_count: int, sentence_count: int | ||
) -> str: | ||
"""Bepaal of een response direct, reflectief of gemengd is""" | ||
if sentence_count == 0: | ||
return "unknown" | ||
|
||
reflective_ratio = reflective_count / sentence_count | ||
if reflective_ratio > 0.6: | ||
return "reflective" | ||
elif reflective_ratio < 0.2: | ||
return "direct" | ||
else: | ||
return "mixed" | ||
|
||
def _calculate_messiness( | ||
self, thinking_count: int, correction_count: int, sentence_count: int, text: str | ||
) -> float: | ||
"""Bereken een genormaliseerde messiness score""" | ||
if sentence_count == 0: | ||
return 0.0 | ||
|
||
# Basis componenten voor messiness | ||
thinking_ratio = min(thinking_count / sentence_count, 1.0) | ||
correction_ratio = min(correction_count / sentence_count, 1.0) | ||
|
||
# Check voor informele elementen | ||
informal_elements = len( | ||
re.findall(r"[!?]{2,}|\b(haha|nou|tja)\b", text.lower()) | ||
) | ||
informal_ratio = min(informal_elements / sentence_count, 1.0) | ||
|
||
# Gewogen gemiddelde van verschillende factoren | ||
weights = [0.4, 0.3, 0.3] # thinking, corrections, informal | ||
components = [thinking_ratio, correction_ratio, informal_ratio] | ||
|
||
return sum(w * c for w, c in zip(weights, components)) | ||
|
||
def _count_response_types(self, types: List[str]) -> Dict[str, int]: | ||
"""Tel de frequentie van verschillende response types""" | ||
counter = defaultdict(int) | ||
for t in types: | ||
counter[t] += 1 | ||
return dict(counter) | ||
|
||
|
||
# Voorbeeld gebruik | ||
if __name__ == "__main__": | ||
analyzer = ResponseAnalyzer() | ||
|
||
# Test met een enkele response | ||
test_response = """ | ||
Hmm... laat me daar even over nadenken. | ||
Oh wacht, ik zie wat je bedoelt! | ||
Volgens mij kunnen we dit het beste aanpakken door eerst... | ||
*corrigeert* Wat ik eigenlijk bedoel is dat we misschien moeten beginnen met een simpelere aanpak. | ||
""" | ||
|
||
metrics = analyzer.analyze_response(test_response) | ||
print(f"Message length: {metrics.message_length}") | ||
print(f"Messiness score: {metrics.messiness_score:.2f}") | ||
print(f"Response type: {metrics.response_type}") | ||
print(f"Thinking markers found: {metrics.thinking_markers}") | ||
print(f"Correction patterns found: {metrics.correction_patterns}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
from typing import List, Dict, Any | ||
import json | ||
|
||
|
||
class LLMAnalyzer: | ||
def __init__(self, llm_client): | ||
self.llm = llm_client | ||
|
||
def analyze_response(self, text: str) -> Dict[str, Any]: | ||
"""Analyze a single response using LLM capabilities""" | ||
|
||
analysis_prompt = f""" | ||
Analyze the following message for communication patterns. Consider: | ||
1. Response Style: | ||
- Is it direct, reflective, or mixed? | ||
- What's the emotional tone? | ||
- How formal/informal is the language? | ||
2. Conversational Elements: | ||
- Are there thinking patterns (hesitations, self-corrections)? | ||
- How does it manage attention and topic flow? | ||
- What linguistic markers show human-like communication? | ||
3. Structural Analysis: | ||
- How varied is the sentence structure? | ||
- Are there natural breaks or topic shifts? | ||
- How does it handle complexity? | ||
Message to analyze: "{text}" | ||
Provide your analysis in JSON format with the following structure: | ||
{{ | ||
"style": {{ | ||
"type": "direct|reflective|mixed", | ||
"formality_level": 0-1, | ||
"emotional_tone": "description" | ||
}}, | ||
"conversational_elements": {{ | ||
"thinking_patterns": ["list", "of", "patterns"], | ||
"attention_management": "description", | ||
"human_markers": ["list", "of", "markers"] | ||
}}, | ||
"structure": {{ | ||
"complexity_score": 0-1, | ||
"natural_flow_score": 0-1, | ||
"topic_coherence": "description" | ||
}}, | ||
"overall_naturalness": 0-1 | ||
}} | ||
""" | ||
|
||
response = self.llm.analyze(analysis_prompt) | ||
return json.loads(response) | ||
|
||
def analyze_conversation(self, messages: List[str]) -> Dict[str, Any]: | ||
"""Analyze a full conversation for patterns and development""" | ||
|
||
conversation_prompt = f""" | ||
Analyze this conversation for interaction patterns. Consider: | ||
1. Conversation Flow: | ||
- How natural are the topic transitions? | ||
- Is there a good balance of initiative between participants? | ||
- How well is rapport maintained? | ||
2. Response Patterns: | ||
- How do response styles vary? | ||
- Are there consistent patterns in timing and length? | ||
- How are attention shifts handled? | ||
3. Relationship Development: | ||
- How does the conversation build rapport? | ||
- Are there signs of mutual understanding? | ||
- How are agreements/disagreements handled? | ||
Messages: | ||
{json.dumps(messages, indent=2)} | ||
Provide analysis in JSON format with: | ||
{{ | ||
"flow": {{ | ||
"transition_naturalness": 0-1, | ||
"initiative_balance": 0-1, | ||
"rapport_maintenance": "description" | ||
}}, | ||
"patterns": {{ | ||
"style_variation": 0-1, | ||
"rhythm_naturalness": 0-1, | ||
"attention_management": "description" | ||
}}, | ||
"relationship": {{ | ||
"rapport_building": ["observed", "techniques"], | ||
"understanding_indicators": ["list", "of", "indicators"], | ||
"conflict_management": "description" | ||
}}, | ||
"overall_conversation_quality": 0-1 | ||
}} | ||
""" | ||
|
||
response = self.llm.analyze(conversation_prompt) | ||
return json.loads(response) | ||
|
||
def benchmark_naturalness( | ||
self, target_message: str, comparison_corpus: List[str] | ||
) -> Dict[str, Any]: | ||
"""Compare a message against a corpus of known natural communication""" | ||
|
||
benchmark_prompt = f""" | ||
Compare this message against examples of natural human communication: | ||
Target message: "{target_message}" | ||
Comparison examples: | ||
{json.dumps(comparison_corpus, indent=2)} | ||
Analyze how the target message compares in terms of: | ||
1. Linguistic naturalness | ||
2. Communication patterns | ||
3. Human-like variability | ||
4. Authenticity markers | ||
Provide analysis in JSON format with: | ||
{{ | ||
"naturalness_comparison": {{ | ||
"similarity_score": 0-1, | ||
"matching_patterns": ["list", "of", "patterns"], | ||
"missing_elements": ["list", "of", "elements"], | ||
"improvement_suggestions": ["list", "of", "suggestions"] | ||
}}, | ||
"believability_assessment": {{ | ||
"overall_score": 0-1, | ||
"strengths": ["list"], | ||
"weaknesses": ["list"] | ||
}} | ||
}} | ||
""" | ||
|
||
response = self.llm.analyze(benchmark_prompt) | ||
return json.loads(response) | ||
|
||
|
||
# Example usage: | ||
class MockLLM: | ||
def analyze(self, prompt: str) -> str: | ||
# This would be replaced with actual LLM API calls | ||
return """ | ||
{ | ||
"style": { | ||
"type": "mixed", | ||
"formality_level": 0.6, | ||
"emotional_tone": "engaged and thoughtful" | ||
}, | ||
"conversational_elements": { | ||
"thinking_patterns": ["self-reflection", "hesitation"], | ||
"attention_management": "natural topic shifts with clear connections", | ||
"human_markers": ["informal asides", "self-correction"] | ||
}, | ||
"structure": { | ||
"complexity_score": 0.7, | ||
"natural_flow_score": 0.8, | ||
"topic_coherence": "maintains focus while allowing natural digressions" | ||
}, | ||
"overall_naturalness": 0.75 | ||
} | ||
""" | ||
|
||
|
||
if __name__ == "__main__": | ||
# Example usage | ||
llm = MockLLM() | ||
analyzer = LLMAnalyzer(llm) | ||
|
||
test_message = """ | ||
Hmm, interesting point... Let me think about this for a moment. | ||
I see what you're getting at, though I wonder if we might be | ||
overlooking something. Oh wait, actually - this reminds me of | ||
a similar case where... no, let me rephrase that. What I mean is... | ||
""" | ||
|
||
result = analyzer.analyze_response(test_message) | ||
print(json.dumps(result, indent=2)) |