Skip to content

Commit

Permalink
+ content field (optional) (#61)
Browse files Browse the repository at this point in the history
* + content field (optional)

* added translated and raw_content

---------

Co-authored-by: patrick borowy <patrick.borowy@protonmail.com>
  • Loading branch information
6r17 and patrick borowy authored Jul 17, 2024
1 parent f81ecd5 commit e51bd62
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 5 deletions.
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
FROM python:3.10.11


# Update and install dependencies
RUN apt-get update \
&& apt-get upgrade -y \
Expand Down
2 changes: 1 addition & 1 deletion exorde/ipfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class EnumEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, Enum):
return obj.name # Serialize Enum value as its name
return super().default(obj)
return str(super().default(obj))


async def upload_to_ipfs(
Expand Down
5 changes: 5 additions & 0 deletions exorde/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
ExternalId,
ExternalParentId,
Domain,
Content,
Item
)
from dataclasses import dataclass

Expand Down Expand Up @@ -200,6 +202,7 @@ class LiveConfiguration(dict):

class Processed(dict, metaclass=MadType):
translation: Translation
raw_content: Content
top_keywords: Keywords
classification: Classification
item: Item
Expand Down Expand Up @@ -233,6 +236,8 @@ class ProtocolItem(dict, metaclass=MadType):

created_at: CreatedAt
title: Optional[Title] # titre obligatoire si pas de contenu
raw_content: Optional[Content]
translated_content: Optional[Content]
summary: Optional[Summary] # <- description or summary available
picture: Optional[Url]
author: Optional[Author]
Expand Down
1 change: 1 addition & 0 deletions exorde/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ async def process(
raise err
return Processed(
item=item,
raw_content=item.content,
translation=translation,
top_keywords=top_keywords,
classification=classification,
Expand Down
4 changes: 3 additions & 1 deletion exorde/process_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
Gender,
Analysis
)
from exorde_data import Url
from exorde_data import Url, Content

from exorde.tag import tag
from collections import Counter
Expand Down Expand Up @@ -236,6 +236,8 @@ async def process_batch(
complete_processes: dict[int, list[ProcessedItem]] = {}
for (id, processed), analysis in zip(batch, analysis_results):
prot_item: ProtocolItem = ProtocolItem(
raw_content=Content(processed.raw_content),
translated_content=Content(processed.translation.translation),
created_at=processed.item.created_at,
domain=processed.item.domain,
url=Url(processed.item.url),
Expand Down
23 changes: 22 additions & 1 deletion exorde/spotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,15 @@
from exorde.spot_data import spot_data

from exorde.get_transaction_receipt import get_transaction_receipt
from exorde.ipfs import download_ipfs_file, upload_to_ipfs
from exorde.ipfs import download_ipfs_file, upload_to_ipfs, EnumEncoder
from exorde.models import LiveConfiguration, StaticConfiguration
from exorde.counter import AsyncItemCounter

import json
import os
import hashlib
import random
import string
import json
import logging
import argparse
Expand Down Expand Up @@ -99,6 +104,22 @@ async def count_rep_for_each_domain(
await counter.increment(f"rep_{alias}")


def save_json_to_file(data, folder_path='output_folder'):
"""Save JSON data to a file with a random hash filename in the specified folder."""
# Ensure the folder exists
if not os.path.exists(folder_path):
os.makedirs(folder_path)

# Generate a random hash for the filename
filename = f"{generate_random_hash()}.json"
file_path = os.path.join(folder_path, filename)

# Write JSON data to the file
with open(file_path, 'w') as file:
json.dump(data, file, indent=4, cls=EnumEncoder)

return file_path

async def spotting(
live_configuration: LiveConfiguration,
static_configuration: StaticConfiguration,
Expand Down
2 changes: 1 addition & 1 deletion keywords.json

Large diffs are not rendered by default.

0 comments on commit e51bd62

Please sign in to comment.