Skip to content

Commit

Permalink
Merge pull request #15 from leonvanbokhorst/cleanup
Browse files Browse the repository at this point in the history
Update .gitignore to include .DS_Store file
  • Loading branch information
leonvanbokhorst authored Dec 26, 2024
2 parents 8157cd7 + 5f8d189 commit f632027
Show file tree
Hide file tree
Showing 3 changed files with 352 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ wheels/
*.egg
.venv/
cache/
.DS_Store

# Testing
.coverage
Expand Down
169 changes: 169 additions & 0 deletions src/response-variability/respvar_01.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import re
from dataclasses import dataclass
from typing import List, Dict
import numpy as np
from collections import defaultdict


@dataclass
class ResponseMetrics:
"""Metrics for analyzing response variability"""

message_length: int
sentence_count: int
avg_sentence_length: float
thinking_markers: List[str]
correction_patterns: List[str]
response_type: str # 'direct', 'reflective', or 'mixed'
messiness_score: float


class ResponseAnalyzer:
def __init__(self):
# Patterns voor verschillende aspecten van natuurlijke communicatie
self.thinking_patterns = [
r"\b(hmm|uhm|eh|oh)\b",
r"\.{3,}", # ... denk pauzes
r"\(denkt\)",
r"even nadenken",
]

self.correction_patterns = [
r"oh wacht",
r"laat ik dat anders",
r"ik bedoel",
r"corrigeert",
r"wat ik eigenlijk bedoel",
]

self.reflective_markers = [
r"ik denk",
r"volgens mij",
r"misschien",
r"het lijkt erop",
r"wat als we",
r"interessant",
]

def analyze_response(self, text: str) -> ResponseMetrics:
"""Analyseer een enkele response op verschillende metrics"""
# Basis tekstanalyse
sentences = [s.strip() for s in re.split(r"[.!?]+", text) if s.strip()]
message_length = len(text)
sentence_count = len(sentences)
avg_sentence_length = (
message_length / sentence_count if sentence_count > 0 else 0
)

# Zoek naar denkpatronen
thinking_markers = []
for pattern in self.thinking_patterns:
matches = re.findall(pattern, text.lower())
thinking_markers.extend(matches)

correction_patterns = [
pattern
for pattern in self.correction_patterns
if re.search(pattern, text.lower())
]
# Bepaal response type
reflective_count = sum(bool(re.search(pattern, text.lower()))
for pattern in self.reflective_markers)
response_type = self._determine_response_type(reflective_count, len(sentences))

# Bereken messiness score (0-1)
messiness_score = self._calculate_messiness(
len(thinking_markers), len(correction_patterns), sentence_count, text
)

return ResponseMetrics(
message_length=message_length,
sentence_count=sentence_count,
avg_sentence_length=avg_sentence_length,
thinking_markers=thinking_markers,
correction_patterns=correction_patterns,
response_type=response_type,
messiness_score=messiness_score,
)

def analyze_conversation(self, messages: List[str]) -> Dict:
"""Analyseer een hele conversatie voor patronen"""
metrics = [self.analyze_response(msg) for msg in messages]

# Bereken conversation-level statistieken
length_variation = np.std([m.message_length for m in metrics])
response_types = [m.response_type for m in metrics]
messiness_trend = [m.messiness_score for m in metrics]

return {
"individual_metrics": metrics,
"length_variation": length_variation,
"response_type_distribution": self._count_response_types(response_types),
"avg_messiness": np.mean(messiness_trend),
"messiness_trend": messiness_trend,
}

def _determine_response_type(
self, reflective_count: int, sentence_count: int
) -> str:
"""Bepaal of een response direct, reflectief of gemengd is"""
if sentence_count == 0:
return "unknown"

reflective_ratio = reflective_count / sentence_count
if reflective_ratio > 0.6:
return "reflective"
elif reflective_ratio < 0.2:
return "direct"
else:
return "mixed"

def _calculate_messiness(
self, thinking_count: int, correction_count: int, sentence_count: int, text: str
) -> float:
"""Bereken een genormaliseerde messiness score"""
if sentence_count == 0:
return 0.0

# Basis componenten voor messiness
thinking_ratio = min(thinking_count / sentence_count, 1.0)
correction_ratio = min(correction_count / sentence_count, 1.0)

# Check voor informele elementen
informal_elements = len(
re.findall(r"[!?]{2,}|\b(haha|nou|tja)\b", text.lower())
)
informal_ratio = min(informal_elements / sentence_count, 1.0)

# Gewogen gemiddelde van verschillende factoren
weights = [0.4, 0.3, 0.3] # thinking, corrections, informal
components = [thinking_ratio, correction_ratio, informal_ratio]

return sum(w * c for w, c in zip(weights, components))

def _count_response_types(self, types: List[str]) -> Dict[str, int]:
"""Tel de frequentie van verschillende response types"""
counter = defaultdict(int)
for t in types:
counter[t] += 1
return dict(counter)


# Voorbeeld gebruik
if __name__ == "__main__":
analyzer = ResponseAnalyzer()

# Test met een enkele response
test_response = """
Hmm... laat me daar even over nadenken.
Oh wacht, ik zie wat je bedoelt!
Volgens mij kunnen we dit het beste aanpakken door eerst...
*corrigeert* Wat ik eigenlijk bedoel is dat we misschien moeten beginnen met een simpelere aanpak.
"""

metrics = analyzer.analyze_response(test_response)
print(f"Message length: {metrics.message_length}")
print(f"Messiness score: {metrics.messiness_score:.2f}")
print(f"Response type: {metrics.response_type}")
print(f"Thinking markers found: {metrics.thinking_markers}")
print(f"Correction patterns found: {metrics.correction_patterns}")
182 changes: 182 additions & 0 deletions src/response-variability/respvar_02.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
from typing import List, Dict, Any
import json


class LLMAnalyzer:
def __init__(self, llm_client):
self.llm = llm_client

def analyze_response(self, text: str) -> Dict[str, Any]:
"""Analyze a single response using LLM capabilities"""

analysis_prompt = f"""
Analyze the following message for communication patterns. Consider:
1. Response Style:
- Is it direct, reflective, or mixed?
- What's the emotional tone?
- How formal/informal is the language?
2. Conversational Elements:
- Are there thinking patterns (hesitations, self-corrections)?
- How does it manage attention and topic flow?
- What linguistic markers show human-like communication?
3. Structural Analysis:
- How varied is the sentence structure?
- Are there natural breaks or topic shifts?
- How does it handle complexity?
Message to analyze: "{text}"
Provide your analysis in JSON format with the following structure:
{{
"style": {{
"type": "direct|reflective|mixed",
"formality_level": 0-1,
"emotional_tone": "description"
}},
"conversational_elements": {{
"thinking_patterns": ["list", "of", "patterns"],
"attention_management": "description",
"human_markers": ["list", "of", "markers"]
}},
"structure": {{
"complexity_score": 0-1,
"natural_flow_score": 0-1,
"topic_coherence": "description"
}},
"overall_naturalness": 0-1
}}
"""

response = self.llm.analyze(analysis_prompt)
return json.loads(response)

def analyze_conversation(self, messages: List[str]) -> Dict[str, Any]:
"""Analyze a full conversation for patterns and development"""

conversation_prompt = f"""
Analyze this conversation for interaction patterns. Consider:
1. Conversation Flow:
- How natural are the topic transitions?
- Is there a good balance of initiative between participants?
- How well is rapport maintained?
2. Response Patterns:
- How do response styles vary?
- Are there consistent patterns in timing and length?
- How are attention shifts handled?
3. Relationship Development:
- How does the conversation build rapport?
- Are there signs of mutual understanding?
- How are agreements/disagreements handled?
Messages:
{json.dumps(messages, indent=2)}
Provide analysis in JSON format with:
{{
"flow": {{
"transition_naturalness": 0-1,
"initiative_balance": 0-1,
"rapport_maintenance": "description"
}},
"patterns": {{
"style_variation": 0-1,
"rhythm_naturalness": 0-1,
"attention_management": "description"
}},
"relationship": {{
"rapport_building": ["observed", "techniques"],
"understanding_indicators": ["list", "of", "indicators"],
"conflict_management": "description"
}},
"overall_conversation_quality": 0-1
}}
"""

response = self.llm.analyze(conversation_prompt)
return json.loads(response)

def benchmark_naturalness(
self, target_message: str, comparison_corpus: List[str]
) -> Dict[str, Any]:
"""Compare a message against a corpus of known natural communication"""

benchmark_prompt = f"""
Compare this message against examples of natural human communication:
Target message: "{target_message}"
Comparison examples:
{json.dumps(comparison_corpus, indent=2)}
Analyze how the target message compares in terms of:
1. Linguistic naturalness
2. Communication patterns
3. Human-like variability
4. Authenticity markers
Provide analysis in JSON format with:
{{
"naturalness_comparison": {{
"similarity_score": 0-1,
"matching_patterns": ["list", "of", "patterns"],
"missing_elements": ["list", "of", "elements"],
"improvement_suggestions": ["list", "of", "suggestions"]
}},
"believability_assessment": {{
"overall_score": 0-1,
"strengths": ["list"],
"weaknesses": ["list"]
}}
}}
"""

response = self.llm.analyze(benchmark_prompt)
return json.loads(response)


# Example usage:
class MockLLM:
def analyze(self, prompt: str) -> str:
# This would be replaced with actual LLM API calls
return """
{
"style": {
"type": "mixed",
"formality_level": 0.6,
"emotional_tone": "engaged and thoughtful"
},
"conversational_elements": {
"thinking_patterns": ["self-reflection", "hesitation"],
"attention_management": "natural topic shifts with clear connections",
"human_markers": ["informal asides", "self-correction"]
},
"structure": {
"complexity_score": 0.7,
"natural_flow_score": 0.8,
"topic_coherence": "maintains focus while allowing natural digressions"
},
"overall_naturalness": 0.75
}
"""


if __name__ == "__main__":
# Example usage
llm = MockLLM()
analyzer = LLMAnalyzer(llm)

test_message = """
Hmm, interesting point... Let me think about this for a moment.
I see what you're getting at, though I wonder if we might be
overlooking something. Oh wait, actually - this reminds me of
a similar case where... no, let me rephrase that. What I mean is...
"""

result = analyzer.analyze_response(test_message)
print(json.dumps(result, indent=2))

0 comments on commit f632027

Please sign in to comment.