Skip to content

Commit

Permalink
Merge branch 'rcdvm_fact_1422' into development
Browse files Browse the repository at this point in the history
  • Loading branch information
beastoin committed Dec 6, 2024
2 parents 4bc48b6 + 9a13e64 commit 13cd421
Show file tree
Hide file tree
Showing 34 changed files with 385 additions and 119 deletions.
4 changes: 2 additions & 2 deletions app/lib/backend/http/api/facts.dart
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ Future<bool> createFact(String content, FactCategory category) async {
return response.statusCode == 200;
}

Future<List<Fact>> getFacts({int limit = 5000, int offset = 0}) async {
Future<List<Fact>> getFacts({int limit = 100, int offset = 0}) async {
var response = await makeApiCall(
url: '${Env.apiBaseUrl}v1/facts', // limit=$limit&offset=$offset
url: '${Env.apiBaseUrl}v2/facts?limit=${limit}&offset=${offset}',
headers: {},
method: 'GET',
body: '',
Expand Down
10 changes: 7 additions & 3 deletions app/lib/pages/home/page.dart
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,21 @@ import 'package:friend_private/backend/http/api/users.dart';
import 'package:friend_private/backend/preferences.dart';
import 'package:friend_private/backend/schema/geolocation.dart';
import 'package:friend_private/main.dart';
import 'package:friend_private/pages/apps/page.dart';
import 'package:friend_private/pages/chat/page.dart';
import 'package:friend_private/pages/facts/page.dart';
import 'package:friend_private/pages/home/widgets/chat_apps_dropdown_widget.dart';
import 'package:friend_private/pages/home/widgets/speech_language_sheet.dart';
import 'package:friend_private/pages/memories/page.dart';
import 'package:friend_private/pages/apps/page.dart';
import 'package:friend_private/pages/settings/page.dart';
import 'package:friend_private/providers/app_provider.dart';
import 'package:friend_private/providers/capture_provider.dart';
import 'package:friend_private/providers/connectivity_provider.dart';
import 'package:friend_private/providers/device_provider.dart';
import 'package:friend_private/providers/home_provider.dart';
import 'package:friend_private/providers/memory_provider.dart' as mp;
import 'package:friend_private/providers/memory_provider.dart';
import 'package:friend_private/providers/message_provider.dart';
import 'package:friend_private/providers/app_provider.dart';
import 'package:friend_private/services/notifications.dart';
import 'package:friend_private/utils/analytics/analytics_manager.dart';
import 'package:friend_private/utils/analytics/mixpanel.dart';
Expand Down Expand Up @@ -116,7 +117,10 @@ class _HomePageState extends State<HomePage> with WidgetsBindingObserver, Ticker
}

///Screens with respect to subpage
final Map<String, Widget> screensWithRespectToPath = {'/settings': const SettingsPage()};
final Map<String, Widget> screensWithRespectToPath = {
'/settings': const SettingsPage(),
'/facts': const FactsPage(),
};
bool? previousConnection;

void _onReceiveTaskData(dynamic data) async {
Expand Down
4 changes: 2 additions & 2 deletions app/lib/services/notifications.dart
Original file line number Diff line number Diff line change
Expand Up @@ -265,8 +265,8 @@ class NotificationUtil {

// Always ensure that all plugins was initialized
WidgetsFlutterBinding.ensureInitialized();
if (payload.containsKey('navigateTo')) {
SharedPreferencesUtil().subPageToShowFromNotification = payload['navigateTo'] ?? '';
if (payload.containsKey('navigateTo') || payload.containsKey('navigate_to')) {
SharedPreferencesUtil().subPageToShowFromNotification = payload['navigateTo'] ?? payload['navigate_to'] ?? '';
}

// Notification page
Expand Down
1 change: 1 addition & 0 deletions backend/database/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ def record_app_usage(
'timestamp': datetime.now(timezone.utc) if timestamp is None else timestamp,
'type': usage_type,
}

db.collection('plugins').document(app_id).collection('usage_history').document(memory_id or message_id).set(data)
return data

Expand Down
20 changes: 15 additions & 5 deletions backend/database/facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,24 @@ def get_facts(uid: str, limit: int = 100, offset: int = 0):
print('get_facts', uid, limit, offset)
facts_ref = db.collection('users').document(uid).collection('facts')
facts_ref = (
facts_ref.order_by('created_at', direction=firestore.Query.DESCENDING)
facts_ref.order_by('scoring', direction=firestore.Query.DESCENDING)
.order_by('created_at', direction=firestore.Query.DESCENDING)
.where(filter=FieldFilter('user_review', '!=', False))
.where(filter=FieldFilter('deleted', '==', False))
# .where(filter=FieldFilter('user_review', '!=', False))
)
facts_ref = facts_ref.limit(limit).offset(offset)
facts = [doc.to_dict() for doc in facts_ref.stream()]
result = [fact for fact in facts if fact['user_review'] is not False]
return result
return [doc.to_dict() for doc in facts_ref.stream()]

def get_non_filtered_facts(uid: str, limit: int = 100, offset: int = 0):
print('get_non_filtered_facts', uid, limit, offset)
facts_ref = db.collection('users').document(uid).collection('facts')
facts_ref = (
facts_ref.order_by('scoring', direction=firestore.Query.DESCENDING)
.order_by('created_at', direction=firestore.Query.DESCENDING)
.where(filter=FieldFilter('deleted', '==', False))
)
facts_ref = facts_ref.limit(limit).offset(offset)
return [doc.to_dict() for doc in facts_ref.stream()]


def create_fact(uid: str, data: dict):
Expand Down
1 change: 1 addition & 0 deletions backend/models/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def get_image_url(self) -> str:

class UsageHistoryType(str, Enum):
memory_created_external_integration = 'memory_created_external_integration'
transcript_processed_external_integration = 'transcript_processed_external_integration'
memory_created_prompt = 'memory_created_prompt'
chat_message_sent = 'chat_message_sent'

Expand Down
24 changes: 23 additions & 1 deletion backend/models/facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ class FactCategory(str, Enum):
other = "other"


CATEGORY_BOOSTS = {FactCategory.core.value: 1,
FactCategory.habits.value:10,
FactCategory.work.value:20,
FactCategory.skills.value:30,
FactCategory.lifestyle.value: 40,
FactCategory.hobbies.value: 40,
FactCategory.interests.value:40,
FactCategory.other.value: 50,}

class Fact(BaseModel):
content: str = Field(description="The content of the fact")
category: FactCategory = Field(description="The category of the fact", default=FactCategory.other)
Expand Down Expand Up @@ -57,10 +66,21 @@ class FactDB(Fact):
manually_added: bool = False
edited: bool = False
deleted: bool = False
scoring: Optional[str] = None

@staticmethod
def calculate_score(fact: 'FactDB') -> 'FactDB':
cat_boost = (999 - CATEGORY_BOOSTS[fact.category.value]) if fact.category.value in CATEGORY_BOOSTS else 0

user_manual_added_boost = 1
if fact.manually_added is False:
user_manual_added_boost = 0

return "{:02d}_{:02d}_{:010d}".format(user_manual_added_boost, cat_boost, int(fact.created_at.timestamp()))

@staticmethod
def from_fact(fact: Fact, uid: str, memory_id: str, memory_category: CategoryEnum) -> 'FactDB':
return FactDB(
fact_db = FactDB(
id=document_id_from_seed(fact.content),
uid=uid,
content=fact.content,
Expand All @@ -70,3 +90,5 @@ def from_fact(fact: Fact, uid: str, memory_id: str, memory_category: CategoryEnu
memory_id=memory_id,
memory_category=memory_category,
)
fact_db.scoring = FactDB.calculate_score(fact_db)
return fact_db
1 change: 1 addition & 0 deletions backend/models/notification_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class NotificationMessage(BaseModel):
type: str
notification_type: str
text: Optional[str] = ""
navigate_to: Optional[str] = None

@staticmethod
def get_message_as_dict(
Expand Down
1 change: 1 addition & 0 deletions backend/models/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def get_image_url(self) -> str:

class UsageHistoryType(str, Enum):
memory_created_external_integration = 'memory_created_external_integration'
transcript_processed_external_integration = 'transcript_processed_external_integration'
memory_created_prompt = 'memory_created_prompt'
chat_message_sent = 'chat_message_sent'

Expand Down
8 changes: 7 additions & 1 deletion backend/routers/facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def create_fact(fact: Fact, uid: str = Depends(auth.get_current_user_uid)):


@router.get('/v1/facts', tags=['facts'], response_model=List[FactDB]) # filters
def get_facts(limit: int = 5000, offset: int = 0, uid: str = Depends(auth.get_current_user_uid)):
def get_facts_v1(limit: int = 5000, offset: int = 0, uid: str = Depends(auth.get_current_user_uid)):
facts = facts_db.get_facts(uid, limit, offset)
# facts = list(filter(lambda x: x['category'] == 'skills', facts))
# TODO: consider this "$name" part if really is an issue, when changing name or smth.
Expand All @@ -45,6 +45,12 @@ def get_facts(limit: int = 5000, offset: int = 0, uid: str = Depends(auth.get_cu
# return facts


@router.get('/v2/facts', tags=['facts'], response_model=List[FactDB])
def get_facts(limit: int = 100, offset: int = 0, uid: str = Depends(auth.get_current_user_uid)):
facts = facts_db.get_facts(uid, limit, offset)
return facts


@router.delete('/v1/facts/{fact_id}', tags=['facts'])
def delete_fact(fact_id: str, uid: str = Depends(auth.get_current_user_uid)):
facts_db.delete_fact(uid, fact_id)
Expand Down
7 changes: 4 additions & 3 deletions backend/routers/pusher.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,10 @@ async def receive_audio_bytes():

# Transcript
if header_type == 100:
segments = json.loads(bytes(data[4:]).decode("utf-8"))
print(f"transcript received {len(segments)}")
asyncio.run_coroutine_threadsafe(trigger_realtime_integrations(uid, segments), loop)
res = json.loads(bytes(data[4:]).decode("utf-8"))
segments = res.get('segments')
memory_id = res.get('memory_id')
asyncio.run_coroutine_threadsafe(trigger_realtime_integrations(uid, segments, memory_id), loop)
asyncio.run_coroutine_threadsafe(realtime_transcript_webhook(uid, segments), loop)
continue

Expand Down
16 changes: 13 additions & 3 deletions backend/routers/transcribe_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,14 +309,18 @@ def create_pusher_task_handler():
# Transcript
transcript_ws = None
segment_buffers = []
in_progress_memory_id = None

def transcript_send(segments):
def transcript_send(segments, memory_id):
nonlocal segment_buffers
nonlocal in_progress_memory_id
in_progress_memory_id = memory_id
segment_buffers.extend(segments)

async def transcript_consume():
nonlocal websocket_active
nonlocal segment_buffers
nonlocal in_progress_memory_id
nonlocal transcript_ws
nonlocal pusher_connected
while websocket_active or len(segment_buffers) > 0:
Expand All @@ -327,7 +331,7 @@ async def transcript_consume():
# 100|data
data = bytearray()
data.extend(struct.pack("I", 100))
data.extend(bytes(json.dumps(segment_buffers), "utf-8"))
data.extend(bytes(json.dumps({"segments":segment_buffers,"memory_id":in_progress_memory_id}), "utf-8"))
segment_buffers = [] # reset
await transcript_ws.send(data)
print(f"transcript sent {len(data)}", uid)
Expand Down Expand Up @@ -404,11 +408,15 @@ async def close(code: int = 1000):

pusher_connect, pusher_close, transcript_send, transcript_consume, audio_bytes_send, audio_bytes_consume = create_pusher_task_handler()


current_memory_id = None

async def stream_transcript_process():
nonlocal websocket_active
nonlocal realtime_segment_buffers
nonlocal websocket
nonlocal seconds_to_trim
nonlocal current_memory_id

while websocket_active or len(realtime_segment_buffers) > 0:
try:
Expand Down Expand Up @@ -448,12 +456,14 @@ async def stream_transcript_process():
# Send to external trigger
print(f"transcript send {transcript_send} {len(segments)}", uid)
if transcript_send:
transcript_send(segments)
transcript_send(segments,current_memory_id)

memory = _get_or_create_in_progress_memory(segments) # can trigger race condition? increase soniox utterance?
current_memory_id = memory.id
memories_db.update_memory_segments(uid, memory.id, [s.dict() for s in memory.transcript_segments])
memories_db.update_memory_finished_at(uid, memory.id, finished_at)


# threading.Thread(target=process_segments, args=(uid, segments)).start() # restore when plugins work
except Exception as e:
print(f'Could not process transcript: error {e}', uid)
Expand Down
33 changes: 31 additions & 2 deletions backend/scripts/rag/facts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import database.facts as facts_db
from utils.llm import new_facts_extractor, new_learnings_extractor
import threading
from typing import Tuple

Expand All @@ -8,8 +10,6 @@
from models.facts import Fact, FactDB

firebase_admin.initialize_app()
from utils.llm import new_facts_extractor, new_learnings_extractor
import database.facts as facts_db


def get_facts_from_memories(
Expand Down Expand Up @@ -85,5 +85,34 @@ def script_migrate_users():
[t.join() for t in chunk]


# migrate scoring for facts
def migration_fact_scoring_for_user(uid: str):
print('migration_fact_scoring_for_user', uid)
offset = 0
while True:
facts_data = facts_db.get_non_filtered_facts(uid, limit=400, offset=offset)
facts = [FactDB(**d) for d in facts_data]
if not facts or len(facts) == 0:
break

print('execute_for_user', uid, 'found facts', len(facts))
for fact in facts:
fact.scoring = FactDB.calculate_score(fact)
facts_db.save_facts(uid, [fact.dict() for fact in facts])
offset += len(facts)

def script_migrate_fact_scoring_users(uids: [str]):
threads = []
for uid in uids:
t = threading.Thread(target=migration_fact_scoring_for_user, args=(uid,))
threads.append(t)

chunk_size = 1
chunks = [threads[i:i + chunk_size] for i in range(0, len(threads), chunk_size)]
for i, chunk in enumerate(chunks):
[t.start() for t in chunk]
[t.join() for t in chunk]


if __name__ == '__main__':
script_migrate_users()
2 changes: 2 additions & 0 deletions backend/utils/apps.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,13 @@ def get_app_money_made(app_id: str) -> dict[str, int | float]:
type1 = len(list(filter(lambda x: x.type == UsageHistoryType.memory_created_external_integration, usage)))
type2 = len(list(filter(lambda x: x.type == UsageHistoryType.memory_created_prompt, usage)))
type3 = len(list(filter(lambda x: x.type == UsageHistoryType.chat_message_sent, usage)))
type4 = len(list(filter(lambda x: x.type == UsageHistoryType.transcript_processed_external_integration, usage)))

# tbd based on current prod stats
t1multiplier = 0.02
t2multiplier = 0.01
t3multiplier = 0.005
t4multiplier = 0.00001 # This is for transcript processed triggered for every segment, so it should be very low

money = {
'money': round((type1 * t1multiplier) + (type2 * t2multiplier) + (type3 * t3multiplier), 2),
Expand Down
2 changes: 1 addition & 1 deletion backend/utils/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ def new_facts_extractor(
user_name, facts_str = get_prompt_facts(uid)

content = TranscriptSegment.segments_as_string(segments, user_name=user_name)
if not content or len(content) < 100: # less than 20 words, probably nothing
if not content or len(content) < 25: # less than 5 words, probably nothing
return []
# TODO: later, focus a lot on user said things, rn is hard because of speech profile accuracy
# TODO: include negative facts too? Things the user doesn't like?
Expand Down
2 changes: 1 addition & 1 deletion backend/utils/memories/facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def get_prompt_facts(uid: str) -> str:

def get_prompt_data(uid: str) -> Tuple[str, List[Fact], List[Fact]]:
# TODO: cache this
existing_facts = facts_db.get_facts(uid, limit=250) # TODO: what the fuck to do here? too much context for llm
existing_facts = facts_db.get_facts(uid, limit=100)
user_made = [Fact(**fact) for fact in existing_facts if fact['manually_added']]
# TODO: filter only reviewed True
generated = [Fact(**fact) for fact in existing_facts if not fact['manually_added']]
Expand Down
20 changes: 20 additions & 0 deletions backend/utils/memories/process_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from models.memory import *
from models.task import Task, TaskStatus, TaskAction, TaskActionProvider
from models.trend import Trend
from models.notification_message import NotificationMessage
from utils.apps import get_available_apps
from utils.llm import obtain_emotional_message, retrieve_metadata_fields_from_transcript
from utils.llm import summarize_open_glass, get_transcript_structure, generate_embedding, \
Expand Down Expand Up @@ -128,8 +129,27 @@ def _extract_facts(uid: str, memory: Memory):
for fact in new_facts:
parsed_facts.append(FactDB.from_fact(fact, uid, memory.id, memory.structured.category))
print('_extract_facts:', fact.category.value.upper(), '|', fact.content)
if len(parsed_facts) == 0:
return

facts_db.save_facts(uid, [fact.dict() for fact in parsed_facts])

# send notification
token = notification_db.get_token_only(uid)
if token and len(token) > 0:
send_new_facts_notification(token, parsed_facts)

def send_new_facts_notification(token: str, facts: [FactDB]):
facts_str = ",".join([fact.content for fact in facts])
message = f"New facts {facts_str}"
ai_message = NotificationMessage(
text=message,
type='text',
navigate_to="/facts",
)

send_notification(token, "Omi" + ' says', message, NotificationMessage.get_message_as_dict(ai_message))


def _extract_trends(memory: Memory):
extracted_items = trends_extractor(memory)
Expand Down
Loading

0 comments on commit 13cd421

Please sign in to comment.