Dataset pulling algorithm
The approach is to explore the chunk graph of both sink and source in order of decreasing ref-height. As the code walks, it uses the knowledge gained about which chunks are present in the sink to both prune the source-graph-walk and build up a set of hints
that can be sent to a remote Database to aid in chunk validation.
Basic algorithm
-
let
sink
be the sink database -
let
source
be the source database -
let
snkQ
andsrcQ
be priority queues ofRef
prioritized by highestRef.height
-
let
hints
be a map ofhash => hash
-
let
reachableChunks
be a set of hashes -
let
snkHdRef
be the ref (ofCommit
) of the head of the sink dataset -
let
srcHdRef
be the ref of the sourceCommit
, which must descend from theCommit
indicated bysnkHdRef
-
let
traverseSource(srcRef, srcQ, sink, source, reachableChunks)
be- pop
srcRef
fromsrcQ
- if
!sink.has(srcRef)
- let
c
=source.batchStore().Get(srcRef.targetHash)
- let
v
=types.DecodeValue(c, source)
- insert all child refs,
cr
, fromv
intosrcQ
and into reachableRefs sink.batchStore().Put(c, srcRef.height, no hints)
- (hints will all be gathered and handed to sink.batchStore at the end)
- let
- if
- pop
-
let
traverseSink(sinkRef, snkQ, sink, hints)
be- pop
snkRef
fromsnkQ
- if
snkRef.height
> 1- let
v
=sink.readValue(snkRef.targetHash)
- insert all child refs,
cr
, fromv
intosnkQ
andhints[cr] = snkRef
- let
- pop
-
let
traverseCommon(comRef, snkHdRef, snkQ, srcQ, sink, hints)
be- pop
comRef
from bothsnkQ
andsrcQ
- if
comRef.height
> 1- if
comRef
is aRef
ofCommit
- let
v
=sink.readValue(comRef.targetHash)
- if
comRef
== snkHdRef- ignore all parent refs
- insert each other child ref
cr
fromv
intosnkQ
only, sethints[cr] = comRef
- else
- insert each child ref
cr
fromv
into bothsnkQ
andsrcQ
, sethints[cr] = comRef
- insert each child ref
- let
- if
- pop
-
let `pull(source, sink, srcHdRef, sinkHdRef)
- insert
snkHdRef
intosnkQ
andsrcHdRef
intosrcQ
- create empty
hints
andreachableChunks
- while
srcQ
is non-empty- let
srcHt
andsnkHt
be the respective heights of the topRef
in each ofsrcQ
andsnkQ
- if
srcHt
>snkHt
, for everysrcHdRef
insrcQ
which is of greater height thansnkHt
traverseSource(srcHdRef, srcQ, sink, source)
- else if
snkHt
>srcHt
, for everysnkHdRef
insnkQ
which is of greater height thansrcHt
traverseSink(snkHdRef, snkQ, sink)
- else
- for every
comRef
in which is common tosnkQ
andsrcQ
which is of heightsrcHt
(andsnkHt
)traverseCommon(comRef, snkHdRef, snkQ, srcQ, sink, hints)
- for every
ref
insrcQ
which is of heightsrcHt
traverseSource(ref, srcQ, sink, source, reachableChunks)
- for every
ref
insnkQ
which is of heightsnkHt
traverseSink(ref, snkQ, sink, hints)
- for every
- let
- for all
hash
inreachableChunks
- sink.batchStore().addHint(hints[hash])
- insert
Isomorphic, but less clear, algorithm
-
let all identifiers be as above
-
let
traverseSource
,traverseSink
, andtraverseCommon
be as above -
let
higherThan(refA, refB)
be- if refA.height == refB.height
- return refA.targetHash < refB.targetHash
- return refA.height > refB.height
- if refA.height == refB.height
-
let `pull(source, sink, srcHdRef, sinkHdRef)
- insert
snkHdRef
intosnkQ
andsrcHdRef
intosrcQ
- create empty
hints
andreachableChunks
- while
srcQ
is non-empty- if
sinkQ
is empty- pop
ref
fromsrcQ
- `traverseSource(ref, srcQ, sink, source, reachableChunks))
- pop
- else if
higherThan(head of srcQ, head of snkQ)
- pop
ref
fromsrcQ
- `traverseSource(ref, srcQ, sink, source, reachableChunks))
- pop
- else if
higherThan(head of snkQ, head of srcQ)
- pop
ref
fromsnkQ
traverseSink(ref, snkQ, sink, hints)
- pop
- else, heads of both queues are the same
- pop
comRef
fromsnkQ
andsrcQ
traverseCommon(comRef, snkHdRef, snkQ, srcQ, sink, hints)
- pop
- if
- for all
hash
inreachableChunks
- sink.batchStore().addHint(hints[hash])
- insert