-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathipfs_lod.py
90 lines (82 loc) · 3.48 KB
/
ipfs_lod.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# -*- coding: utf-8 -*-
"""Publishes LOD datasets over IPFS based on their W3C VoID descriptions.
Assumptions (maybe not completely reasonable)
=============================================
- Datasets are described in VoID documents.
- Versioning can be discovered by looking at the dcterms:modified property.
- Actual data can be accessed via void:dataDump properties.
- Both the VoID description and the dataset are sent to IPFS.
- The VoID description is modified to include the addresses of the dumps over IPFS.
"""
import ipfsapi
import logging
from lodataset import LODatasetDescription
import os
import wget
import shutil
class IPFSLODPublisher(object):
def __init__(self, dataset, client='127.0.0.1', port = 5001):
""" Build the publisher from a LODataset.
"""
self.dataset = dataset
self.dataset_id = dataset.id
self.last_modified = dataset["modified"]
self.api = ipfsapi.connect(client, port)
self.was_updated = True
logging.getLogger().setLevel(logging.INFO)
logging.info("Dataset " + dataset.id)
logging.info("Last modified " +
self.last_modified.toPython().strftime("%Y-%m-%d %H:%M:%S"))
def update(self):
""" Reload the dataset and its description.
If it was modified since last update, flags it for next publish.
"""
lod = LODatasetDescription(self.dataset.desc.uri,
self.dataset.desc.well_known)
self.dataset = lod[self.dataset_id]
newtime = self.dataset["modified"].toPython()
# Check if the new last modification is more recent:
if newtime > self.last_modified.toPython():
self.was_updated = True
logging.info("Dataset updated.")
else:
logging.info("Dataset remains the same.")
self.last_modified = self.dataset["modified"]
def publish(self, style="folder"):
"""Publish the Dataset to IPFS.
Styles
======
"folder" : the VOID file and dump files go in a common folder.
"ipfsld" : a VOID file is augmented with IPFSLD links (not implemented)
"""
if self.was_updated:
self.was_updated = False
if style=="folder":
# Create the folder:
folder = self.dataset.id.replace("/", "_")
folder = folder + self.last_modified.toPython().strftime("%Y_%m_%d_%H:%M:%S")
print(folder)
if not os.path.exists(folder):
os.mkdir(folder)
os.chdir(folder)
# Serialize the VOID:
#TODO: Include only the descriptions of the dataset, not all of them.
self.dataset.desc.g.serialize(destination='void.ttl', format='turtle')
# Get the dumps:
dumps = self.dataset["dataDump"]
# check if it is single dump:
if not isinstance(dumps, list):
dumps = [dumps]
for dump in dumps:
wget.download(dump)
os.chdir("..")
# Add to IPFS:
res = self.api.add(folder, recursive=False)
for r in res:
if r["Name"] == folder:
self.ipfs_addr = r["Hash"]
logging.info(res)
# cleanup
shutil.rmtree(folder)
else:
raise ValueError("Publishing style " + style + "not supported." )