-
Notifications
You must be signed in to change notification settings - Fork 127
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fluffy: Implement offer cache #2827
Changes from all commits
15a224a
29ed761
f0cf1fa
37c795b
39aa06f
3a93b24
2900261
09a9951
6404565
a76d712
2d69b93
24d9cdb
7a37a08
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,10 +75,16 @@ declareCounter portal_gossip_without_lookup, | |
"Portal wire protocol neighborhood gossip that did not require a node lookup", | ||
labels = ["protocol_id"] | ||
declareCounter portal_content_cache_hits, | ||
"Portal wire protocol local content lookups that hit the cache", | ||
"Portal wire protocol local content lookups that hit the content cache", | ||
labels = ["protocol_id"] | ||
declareCounter portal_content_cache_misses, | ||
"Portal wire protocol local content lookups that don't hit the cache", | ||
"Portal wire protocol local content lookups that don't hit the content cache", | ||
labels = ["protocol_id"] | ||
declareCounter portal_offer_cache_hits, | ||
"Portal wire protocol local content lookups that hit the offer cache", | ||
labels = ["protocol_id"] | ||
declareCounter portal_offer_cache_misses, | ||
"Portal wire protocol local content lookups that don't hit the offer cache", | ||
labels = ["protocol_id"] | ||
|
||
declareCounter portal_poke_offers, | ||
|
@@ -161,8 +167,16 @@ type | |
|
||
RadiusCache* = LruCache[NodeId, UInt256] | ||
|
||
# Caches content fetched from the network during lookups. | ||
# Content outside our radius is also cached in order to improve performance | ||
# of queries which may lookup data outside our radius. | ||
ContentCache = LruCache[ContentId, seq[byte]] | ||
|
||
# Caches the most recently received content/offers. | ||
# Content is only stored in this cache if it falls within our radius and similarly | ||
# the cache is only checked if the content id is within our radius. | ||
OfferCache = LruCache[ContentId, seq[byte]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I initially thought of this as a cache that only holds the content ids or the content keys (and thus could store more of them for the same data size). The cache would only be there to avoid hammering the database on a spam of offers. After checking the PR I understand that it is similar as the In general the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I remember we discussed only storing the content keys/ids and mapping to a bool in order to save space. I'm open to doing it this way which does provide some benefit for rejecting existing offers quickly but I believe that caching the recently received offers provides some other benefits because the cached offers are reusable in the other flows. The Sqlite in memory caching is limited and having some form of in memory caching per sub-network that is not specifically tied to recent queries will be useful. For example once we have the network fully synced and following the latest blocks, the most recently received offered content is likely to be looked up more frequently during normal usage of portal network by applications. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Compare the in memory caching of Sqlite to RocksDb:
For RocksDb the block cache improves the performance of recent reads and the MemTable improves the performance of reading recently written values (this is not the only purpose of the MemTable but one of its benefits). For the caching in Fluffy I was thinking we should have something similar in purpose to the RocksDb MemTable where we cache recent writes (we can keep the default size small to avoid increased memory usage). For Fluffy we have a single database/table shared across multiple logically isolated sub-networks for which the data doesn't normally need to be queried together which ideally should be stored in separate tables (but that leads to compilations with pruning). With the offer cache we at least have a separate write cache per subnetwork to alleviate load on the shared database. My motivation for this change was partly because I noticed that the performance of returning a value from a large fully populated LRU cache is dramatically faster than even doing a contains check on a small Sqlite db. |
||
|
||
ContentKV* = object | ||
contentKey*: ContentKeyByteList | ||
content*: seq[byte] | ||
|
@@ -197,6 +211,7 @@ type | |
radiusCache: RadiusCache | ||
offerQueue: AsyncQueue[OfferRequest] | ||
offerWorkers: seq[Future[void]] | ||
offerCache: OfferCache | ||
pingTimings: Table[NodeId, chronos.Moment] | ||
config*: PortalProtocolConfig | ||
|
||
|
@@ -401,6 +416,55 @@ proc handleFindNodes(p: PortalProtocol, fn: FindNodesMessage): seq[byte] = | |
let enrs = List[ByteList[2048], 32](@[]) | ||
encodeMessage(NodesMessage(total: 1, enrs: enrs)) | ||
|
||
proc storeContent*( | ||
p: PortalProtocol, | ||
contentKey: ContentKeyByteList, | ||
contentId: ContentId, | ||
content: seq[byte], | ||
cacheContent = false, | ||
cacheOffer = false, | ||
): bool {.discardable.} = | ||
if cacheContent and not p.config.disableContentCache: | ||
# We cache content regardless of whether it is in our radius or not | ||
p.contentCache.put(contentId, content) | ||
|
||
# Always re-check that the key is still in the node range to make sure only | ||
# content in range is stored. | ||
if p.inRange(contentId): | ||
p.dbPut(contentKey, contentId, content) | ||
|
||
if cacheOffer and not p.config.disableOfferCache: | ||
p.offerCache.put(contentId, content) | ||
|
||
true | ||
else: | ||
false | ||
|
||
proc getLocalContent*( | ||
p: PortalProtocol, contentKey: ContentKeyByteList, contentId: ContentId | ||
): Opt[seq[byte]] = | ||
# The cache can contain content that is not in our radius | ||
let maybeContent = p.contentCache.get(contentId) | ||
if maybeContent.isSome(): | ||
portal_content_cache_hits.inc(labelValues = [$p.protocolId]) | ||
return maybeContent | ||
|
||
portal_content_cache_misses.inc(labelValues = [$p.protocolId]) | ||
|
||
# Check first if content is in range, as this is a cheaper operation | ||
# than the database lookup. | ||
if p.inRange(contentId): | ||
let maybeContent = p.offerCache.get(contentId) | ||
if maybeContent.isSome(): | ||
portal_offer_cache_hits.inc(labelValues = [$p.protocolId]) | ||
return maybeContent | ||
|
||
portal_offer_cache_misses.inc(labelValues = [$p.protocolId]) | ||
|
||
p.dbGet(contentKey, contentId) | ||
else: | ||
Opt.none(seq[byte]) | ||
|
||
proc handleFindContent( | ||
p: PortalProtocol, fc: FindContentMessage, srcId: NodeId | ||
): seq[byte] = | ||
|
@@ -420,25 +484,21 @@ proc handleFindContent( | |
int64(logDistance), labelValues = [$p.protocolId] | ||
) | ||
|
||
# Check first if content is in range, as this is a cheaper operation | ||
if p.inRange(contentId): | ||
let contentResult = p.dbGet(fc.contentKey, contentId) | ||
if contentResult.isOk(): | ||
let content = contentResult.get() | ||
if content.len <= maxPayloadSize: | ||
return encodeMessage( | ||
ContentMessage( | ||
contentMessageType: contentType, content: ByteList[2048](content) | ||
) | ||
let contentResult = p.getLocalContent(fc.contentKey, contentId) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When handling findContent messages we should return the content from the cache if available. When following the latest blocks, we likely will see more of these queries looking up the latest received offers. |
||
if contentResult.isOk(): | ||
let content = contentResult.get() | ||
if content.len <= maxPayloadSize: | ||
return encodeMessage( | ||
ContentMessage( | ||
contentMessageType: contentType, content: ByteList[2048](content) | ||
) | ||
else: | ||
let connectionId = p.stream.addContentRequest(srcId, content) | ||
) | ||
else: | ||
let connectionId = p.stream.addContentRequest(srcId, content) | ||
|
||
return encodeMessage( | ||
ContentMessage( | ||
contentMessageType: connectionIdType, connectionId: connectionId | ||
) | ||
) | ||
return encodeMessage( | ||
ContentMessage(contentMessageType: connectionIdType, connectionId: connectionId) | ||
) | ||
|
||
# Node does not have the content, or content is not even in radius, | ||
# send closest neighbours to the requested content id. | ||
|
@@ -479,7 +539,10 @@ proc handleOffer(p: PortalProtocol, o: OfferMessage, srcId: NodeId): seq[byte] = | |
) | ||
|
||
if p.inRange(contentId): | ||
if not p.dbContains(contentKey, contentId): | ||
# Checking the offer cache first to reduce the load on the database | ||
# for the case when the offer already exists and it was recently accepted | ||
if not p.offerCache.contains(contentId) and | ||
not p.dbContains(contentKey, contentId): | ||
contentKeysBitList.setBit(i) | ||
discard contentKeys.add(contentKey) | ||
else: | ||
|
@@ -592,6 +655,8 @@ proc new*( | |
stream: stream, | ||
radiusCache: RadiusCache.init(256), | ||
offerQueue: newAsyncQueue[OfferRequest](concurrentOffers), | ||
offerCache: | ||
OfferCache.init(if config.disableOfferCache: 0 else: config.offerCacheSize), | ||
pingTimings: Table[NodeId, chronos.Moment](), | ||
config: config, | ||
) | ||
|
@@ -955,7 +1020,7 @@ proc offer( | |
if contentIdResult.isOk(): | ||
let | ||
contentId = contentIdResult.get() | ||
contentResult = p.dbGet(contentKey, contentId) | ||
contentResult = p.getLocalContent(contentKey, contentId) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the case when offering content via the database, the cache should be checked. The only downside I can think of is that it would influence the order of the content cache which may not be desired. It might be better to just check the offer cache and not the content cache, not sure about this yet. |
||
|
||
var output = memoryOutput() | ||
if contentResult.isOk(): | ||
|
@@ -1600,44 +1665,6 @@ proc randomGossipDiscardPeers*( | |
): Future[void] {.async: (raises: [CancelledError]).} = | ||
discard await p.randomGossip(srcNodeId, contentKeys, content) | ||
|
||
proc storeContent*( | ||
p: PortalProtocol, | ||
contentKey: ContentKeyByteList, | ||
contentId: ContentId, | ||
content: seq[byte], | ||
cacheContent = false, | ||
): bool {.discardable.} = | ||
if cacheContent and not p.config.disableContentCache: | ||
# We cache content regardless of whether it is in our radius or not | ||
p.contentCache.put(contentId, content) | ||
|
||
# Always re-check that the key is still in the node range to make sure only | ||
# content in range is stored. | ||
if p.inRange(contentId): | ||
doAssert(p.dbPut != nil) | ||
p.dbPut(contentKey, contentId, content) | ||
true | ||
else: | ||
false | ||
|
||
proc getLocalContent*( | ||
p: PortalProtocol, contentKey: ContentKeyByteList, contentId: ContentId | ||
): Opt[seq[byte]] = | ||
# The cache can contain content that is not in our radius | ||
let maybeContent = p.contentCache.get(contentId) | ||
if maybeContent.isSome(): | ||
portal_content_cache_hits.inc(labelValues = [$p.protocolId]) | ||
return maybeContent | ||
|
||
portal_content_cache_misses.inc(labelValues = [$p.protocolId]) | ||
|
||
# Check first if content is in range, as this is a cheaper operation | ||
# than the database lookup. | ||
if p.inRange(contentId): | ||
p.dbGet(contentKey, contentId) | ||
else: | ||
Opt.none(seq[byte]) | ||
|
||
proc seedTable*(p: PortalProtocol) = | ||
## Seed the table with specifically provided Portal bootstrap nodes. These are | ||
## nodes that must support the wire protocol for the specific content network. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wasn't sure about whether to cache in the store call. It may not be necessary and might be better to only cache offers received over the network.