diff --git a/TODO b/TODO
index c7313d95..4a2e9aec 100644
--- a/TODO
+++ b/TODO
@@ -37,7 +37,6 @@ LATER:
 * send progress information via sd_notify(), so that people can wrap casync nicely in UIs
 * maybe turn "recursive" mode into a numeric value specifying how far to descend?
 * make "casync stat" work on a directory with a subpath
-* tweak chunker: shift cut to last "marker".
 * define sane errors we can show user messages about
 * introduce a --best-effort mode when replaying, which means we'll ignore what we can't apply
 * when building the cache, also build a seed
@@ -49,6 +48,5 @@ LATER:
 * make sure "casync list /etc/fstab" does something useful
 * rework CaSeed logic to use CaCache as backend, and then add a new command "casync cache" or so, to explicitly generate a cache/seed
 * support blake2 as hashes
-* parallelize image generation: when storing chunks in the store do so in a thread
 * in "casync stat" output show which flags enable what
 * save/restore xfs/ext4 projid
diff --git a/doc/casync.rst b/doc/casync.rst
index 6191f716..f5a432d6 100644
--- a/doc/casync.rst
+++ b/doc/casync.rst
@@ -157,6 +157,8 @@ General options:
 --store=PATH                    The primary chunk store to use
 --extra-store=<PATH>            Additional chunk store to look for chunks in
 --chunk-size=<[MIN:]AVG[:MAX]>  The minimal/average/maximum number of bytes in a chunk
+--cutmark=CUTMARK               Specify a cutmark
+--cutmark-delta-bytes=BYTES     Maximum bytes to shift cut due to cutmark
 --digest=<DIGEST>               Pick digest algorithm (sha512-256 or sha256)
 --compression=<COMPRESSION>     Pick compression algorithm (zstd, xz or gzip)
 --seed=<PATH>                   Additional file or directory to use as seed
@@ -291,3 +293,79 @@ excluded:
    unconditionally take precedence over lines not marked like this. Moreover,
    lines prefixed with ``!`` also cancel the effect of patterns in
    ``.caexclude`` files placed in directories further up the tree.
+
+Cutmarks
+--------
+
+``casync`` cuts the stream to serialize into chunks of an average size (as
+specified with ``--chunk-size=``), determining cut points using the ``buzhash``
+rolling hash function and a modulo test. Frequently, cut points determined that
+way are at slightly inconvenient locations: in the midle of objects serialized
+in the stream rather then before or after them, thus needlessly exploding
+changes to individual objects into more than one chunk. To optimize this
+**cutmarks** may be configured. These are byte sequences ``casync`` (up to 8
+bytes in length) automatically detects in the data stream and that should be
+considered particularly good cutpoints. When cutmarks are defined the chunking
+algorithm will slightly move the cut point between two chunks to match a
+cutmark if one has recently been seen in the serialization stream.
+
+Cutmarks may be specified with the ``--cutmark=`` option. It takes a cutmark
+specification in the format ``VALUE:MASK+OFFSET`` or ``VALUE:MASK-OFFSET``. The
+first part, the value indicates the byte sequence to detect in hexadecimal
+digits, up to 8 bytes (thus 16 characters) in length. Following the colon a
+bitmask (also in hexadecimal) may be specified of the same size. Every 8 byte
+sequence at every 1 byte granularity stream position is tested against the
+value. If all bits indicated in the mask match a cutmark is found. The third
+part of the specification indicates where to place the cutmark specifically
+relative to the the end of the 8 byte sequence. Specify ``-8`` to cut
+immediately before the cutmark sequence, and ``+0`` right after. The offset
+(along with its ``+`` or ``-`` character) may be omitted, in which case the
+offset is assumed to be zero, i.e. the cut is done right after the
+sequence. The mask (along with its ``:`` character) may also be omitted, in
+which case it is assumed to be ``FFFFFFFFFFFFFFFF``, i.e. all
+bits on, matching the full specified byte sequence. In order to match shorter
+byte sequence (for example to adapt the tool to some specific file format using
+shorter object or section markers) simply specificy a shorter mask value and
+correct the offset value.
+
+Examples:
+
+  --cutmark=123456789ABCDEF0
+
+
+This defines a cutmark to be the 8 byte sequence 0x12, 0x34, 0x56, 0x78, 0x9A,
+0xBC, 0xDE, 0xF0, and the cut is placed right after the last byte, i.e. after the
+0xF0.
+
+
+  --cutmark=C0FFEE:FFFFFF-5
+
+
+This defines a cutmark to be the 3 byte sequence 0xC0, 0xFF, 0xEE and the cut is
+placed right after the last byte, i.e. after the 0xEE.
+
+  --cutmark=C0DECAFE:FFFFFFFF-8
+
+
+This defines a cutmark to be the 4 byte sequence 0xC0, 0xDE, 0xCA, 0xFE and the
+cut is placed right before the first byte, i.e. before the 0xC0.
+
+When operating on the file system layer (i.e. when creating `.caidx` files),
+the implicit cutmark of ``--cutmark=51bb5beabcfa9613+8`` is used, to increase
+the chance that cutmarks are placed right before each serialized file.
+
+Multiple cutmarks may be defined on the same operation, simply specify
+``--cutmark=`` multiple times. The parameter also takes the specifical values
+``yes`` and ``no``. If the latter any implicit cutmarks are turned off, in
+particular the implicit cutmark used when generating ``.caidx`` files above.
+
+``casync`` will honour cutmarks only within the immediate vicinity of the cut
+point the modulo test suggested. By default this a 16K window before the
+calculated cut point. This value may be altered using the
+``--cutmark-delta-max=`` setting.
+
+Any configured cutmark (and the selected ``--cutmark-delta-max=`` value) is
+also stored in the ``.caidx`` or ``.caibx`` file to ensure that such an index
+file contains sufficient data for an extracting client to properly use an
+existing file system tree (or block device) as seed while applying the same
+chunking logic as the original image.
diff --git a/meson.build b/meson.build
index 49dcaf0e..78967a07 100644
--- a/meson.build
+++ b/meson.build
@@ -88,6 +88,7 @@ foreach ident : [
         ['copy_file_range',   '''#define _GNU_SOURCE
                                  #include <sys/syscall.h>
                                  #include <unistd.h>'''],
+        ['reallocarray',      '''#include <malloc.h>'''],
 ]
         have = cc.has_function(ident[0], args : '-D_GNU_SOURCE', prefix : ident[1])
         conf.set10('HAVE_' + ident[0].to_upper(), have)
@@ -312,6 +313,14 @@ test_cache = find_program(test_cache_sh)
 test('test-cache.sh', test_cache,
      timeout : 30 * 60)
 
+test_seed_sh = configure_file(
+        output : 'test-seed.sh',
+        input : 'test/test-seed.sh.in',
+        configuration : substs)
+test_seed = find_program(test_seed_sh)
+test('test-seed.sh', test_seed,
+     timeout : 30 * 60)
+
 udev_rule = configure_file(
           output : '75-casync.rules',
           input : 'src/75-casync.rules.in',
@@ -325,6 +334,7 @@ test_sources = '''
         test-cachunk
         test-cachunker
         test-cachunker-histogram
+        test-cacutmark
         test-cadigest
         test-caencoder
         test-calocation
diff --git a/src/affinity-count.c b/src/affinity-count.c
new file mode 100644
index 00000000..29f749ea
--- /dev/null
+++ b/src/affinity-count.c
@@ -0,0 +1,39 @@
+#include "affinity-count.h"
+
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+
+int cpus_in_affinity_mask(void) {
+        size_t n = 16;
+        int r;
+
+        for (;;) {
+                cpu_set_t *c;
+
+                c = CPU_ALLOC(n);
+                if (!c)
+                        return -ENOMEM;
+
+                if (sched_getaffinity(0, CPU_ALLOC_SIZE(n), c) >= 0) {
+                        int k;
+
+                        k = CPU_COUNT_S(CPU_ALLOC_SIZE(n), c);
+                        CPU_FREE(c);
+
+                        if (k <= 0)
+                                return -EINVAL;
+
+                        return k;
+                }
+
+                r = -errno;
+                CPU_FREE(c);
+
+                if (r != -EINVAL)
+                        return r;
+                if (n*2 < n)
+                        return -ENOMEM;
+                n *= 2;
+        }
+}
diff --git a/src/affinity-count.h b/src/affinity-count.h
new file mode 100644
index 00000000..0a07d098
--- /dev/null
+++ b/src/affinity-count.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#ifndef fooaffinitycounthfoo
+#define fooaffinitycounthfoo
+
+int cpus_in_affinity_mask(void);
+
+#endif
diff --git a/src/cachunker.c b/src/cachunker.c
index 35fe8ca2..853343bc 100644
--- a/src/cachunker.c
+++ b/src/cachunker.c
@@ -197,35 +197,102 @@ static bool shall_break(CaChunker *c, uint32_t v) {
         return (v % c->discriminator) == (c->discriminator - 1);
 }
 
-size_t ca_chunker_scan(CaChunker *c, const void* p, size_t n) {
+static bool CA_CHUNKER_IS_FIXED_SIZE(CaChunker *c) {
+        return c->chunk_size_min == c->chunk_size_avg &&
+                c->chunk_size_max == c->chunk_size_avg;
+}
+
+static bool is_cutmark(
+                uint64_t qword,
+                const CaCutmark *cutmarks,
+                size_t n_cutmarks,
+                int64_t *ret_delta) {
+
+        size_t i;
+
+        for (i = 0; i < n_cutmarks; i++) {
+                const CaCutmark *m = cutmarks + i;
+
+                if (((qword ^ m->value) & m->mask) == 0) {
+                        *ret_delta = m->delta;
+                        return true;
+                }
+        }
+
+        *ret_delta = 0;
+        return false;
+}
+
+static void find_cutmarks(CaChunker *c, const void *p, size_t n) {
         const uint8_t *q = p;
+
+        /* Find "cutmarks", i.e. special magic values that may appear in the
+         * stream that make particularly good places for cutting up things into
+         * chunks. When our chunker finds a position to cut we'll check if a
+         * cutmark was recently seen, and if so the cut will be moved to that
+         * place. */
+
+        if (c->n_cutmarks == 0) /* Shortcut: no cutmark magic cutmark values defined */
+                return;
+
+        for (; n > 0; q++, n--) {
+                /* Let's put together a qword overlapping every byte of the file, in BE ordering. */
+                c->qword_be = (c->qword_be << 8) | *q;
+
+                if (c->last_cutmark >= (ssize_t) sizeof(c->qword_be)) {
+                        int64_t delta;
+
+                        if (is_cutmark(be64toh(c->qword_be), c->cutmarks, c->n_cutmarks, &delta)) {
+                                c->last_cutmark = -delta;
+                                continue;
+                        }
+                }
+
+                c->last_cutmark++;
+        }
+}
+
+static void chunker_cut(CaChunker *c) {
+        assert(c);
+
+        c->h = 0;
+        c->window_size = 0;
+        c->chunk_size = 0;
+}
+
+size_t ca_chunker_scan(CaChunker *c, bool test_break, const void* p, size_t n) {
+        const uint8_t *q = p;
+        size_t k, idx;
         uint32_t v;
-        size_t k = 0, idx;
 
         assert(c);
         assert(p);
 
-        /* fixed size chunker */
+        /* Scans the specified bytes for chunk borders. Returns (size_t) -1 if
+         * no border was discovered, otherwise the chunk size. */
 
-        if (c->chunk_size_min == c->chunk_size_avg && c->chunk_size_max == c->chunk_size_avg) {
-                size_t m;
-                size_t fixed_size = c->chunk_size_avg;
+        if (CA_CHUNKER_IS_FIXED_SIZE(c)) {
+                /* Special case: fixed size chunker */
+                size_t m, fixed_size = c->chunk_size_avg;
 
                 /* Append to window to make it full */
+                assert(c->chunk_size < fixed_size);
                 m = MIN(fixed_size - c->chunk_size, n);
-
                 c->chunk_size += m;
 
+                /* Note that we don't search for cutmarks if we are in fixed
+                 * size mode, since there's no point, we'll not move the cuts
+                 * anyway, since the chunks shall be fixed size. */
+
                 if (c->chunk_size < fixed_size)
                         return (size_t) -1;
 
-                k = m;
-
-                goto now;
+                chunker_cut(c);
+                return m;
         }
 
-        /* Scans the specified bytes for chunk borders. Returns (size_t) -1 if no border was discovered, otherwise the
-         * chunk size. */
+        if (c->window_size == 0) /* Reset cutmark location after each cut */
+                c->last_cutmark = c->chunk_size;
 
         if (c->window_size < CA_CHUNKER_WINDOW_SIZE) {
                 size_t m;
@@ -240,16 +307,20 @@ size_t ca_chunker_scan(CaChunker *c, const void* p, size_t n) {
                 c->chunk_size += m;
 
                 /* If the window isn't full still, return early */
-                if (c->window_size < CA_CHUNKER_WINDOW_SIZE)
+                if (c->window_size < CA_CHUNKER_WINDOW_SIZE) {
+                        find_cutmarks(c, p, m);
                         return (size_t) -1;
+                }
 
                 /* Window is full, we are now ready to go. */
                 v = ca_chunker_start(c, c->window, c->window_size);
                 k = m;
 
-                if (shall_break(c, v))
+                if (test_break &&
+                    shall_break(c, v))
                         goto now;
-        }
+        } else
+                k = 0;
 
         idx = c->chunk_size % CA_CHUNKER_WINDOW_SIZE;
 
@@ -258,7 +329,8 @@ size_t ca_chunker_scan(CaChunker *c, const void* p, size_t n) {
                 c->chunk_size++;
                 k++;
 
-                if (shall_break(c, v))
+                if (test_break &&
+                    shall_break(c, v))
                         goto now;
 
                 c->window[idx++] = *q;
@@ -268,12 +340,179 @@ size_t ca_chunker_scan(CaChunker *c, const void* p, size_t n) {
                 q++, n--;
         }
 
+        find_cutmarks(c, p, k);
         return (size_t) -1;
 
 now:
-        c->h = 0;
-        c->chunk_size = 0;
-        c->window_size = 0;
-
+        find_cutmarks(c, p, k);
+        chunker_cut(c);
         return k;
 }
+
+uint64_t ca_chunker_cutmark_delta_max(CaChunker *c) {
+        assert(c);
+
+        if (c->cutmark_delta_max != UINT64_MAX)
+                return c->cutmark_delta_max;
+
+        /* If not specified otherwise, use a quarter of the average chunk size. (Unless reconfigured this
+         * happens to match the default for chunk_size_min btw). */
+        return c->chunk_size_avg / 4;
+}
+
+int ca_chunker_extract_chunk(
+                CaChunker *c,
+                ReallocBuffer *buffer,
+                const void **pp,
+                size_t *ll,
+                const void **ret_chunk,
+                size_t *ret_chunk_size) {
+
+        const void *chunk = NULL, *p;
+        size_t chunk_size = 0, k;
+        bool indirect = false;
+        size_t l;
+
+        /* Takes some input data at *p of size *l and chunks it. If this supplied data is too short for a
+         * cut, adds the data to the specified buffer instead and returns CA_CHUNKER_NOT_YET. If a chunk is
+         * generated returns CA_CHUNKER_DIRECT or CA_CHUNKER_INDIRECT and returns a ptr to the new chunk in
+         * *ret_chunk, and its size in *ret_chunk_size. The value CA_CHUNKER_DIRECT is returned if these
+         * pointers point into the memory area passed in through *p and *l. The value CA_CHUNKER_INDIRECT is
+         * returned if the pointers point to a memory area in the specified 'buffer' object. In the latter
+         * case the caller should drop the chunk from the buffer after use with realloc_buffer_advance(). */
+
+        assert(c);
+        assert(buffer);
+        assert(pp);
+        assert(ll);
+
+        p = *pp;
+        l = *ll;
+
+        if (c->cut_pending != (size_t) -1) {
+
+                /* Is there a cut pending, if so process that first. */
+
+                if (c->cut_pending > l) {
+                        /* Need to read more. Let's add what we have now to the buffer hence, and continue */
+                        if (!realloc_buffer_append(buffer, p, l))
+                                return -ENOMEM;
+
+                        c->cut_pending -= l;
+                        goto not_yet;
+                }
+
+                k = c->cut_pending;
+                c->cut_pending = (size_t) -1;
+
+        } else {
+                k = ca_chunker_scan(c, true, p, l);
+                if (k == (size_t) -1) {
+                        /* No cut yet, add the stuff to the buffer, and return */
+                        if (!realloc_buffer_append(buffer, p, l))
+                                return -ENOMEM;
+
+                        goto not_yet;
+                }
+
+                /* Nice, we got told by the chunker to generate a new chunk. Let's see if we have any
+                 * suitable cutmarks we can use, so that we slightly shift the actual cut. */
+
+                if (c->n_cutmarks > 0) {
+
+                        /* Is there a left-hand cutmark defined that is within the area we are looking? */
+                        if (c->last_cutmark > 0 &&
+                            (size_t) c->last_cutmark <= ca_chunker_cutmark_delta_max(c)) {
+
+                                size_t cs;
+
+                                /* Yay, found a cutmark, to the left of the calculated cut. */
+
+                                cs = realloc_buffer_size(buffer) + k;
+
+                                /* Does this cutmark violate of the minimal chunk size? */
+                                if ((size_t) c->last_cutmark < cs &&
+                                    cs - (size_t) c->last_cutmark >= c->chunk_size_min) {
+
+                                        /* Yay, we found a cutmark we can apply. Let's add everything we have
+                                         * to the full buffer, and the take out just what we need from it.*/
+
+                                        if (!realloc_buffer_append(buffer, p, k))
+                                                return -ENOMEM;
+
+                                        chunk = realloc_buffer_data(buffer);
+
+                                        chunk_size = realloc_buffer_size(buffer);
+                                        assert_se(chunk_size >= (size_t) c->last_cutmark);
+                                        chunk_size -= (size_t) c->last_cutmark;
+
+                                        indirect = true;
+
+                                        c->cutmark_delta_sum += c->last_cutmark;
+                                        c->n_cutmarks_applied ++;
+
+                                        /* Make sure the rolling hash function processes the data that remains in the buffer */
+                                        assert_se(ca_chunker_scan(c, false, (uint8_t*) chunk + chunk_size, c->last_cutmark) == (size_t) -1);
+                                }
+
+                        } else if (c->last_cutmark < 0 &&
+                                   (size_t) -c->last_cutmark <= ca_chunker_cutmark_delta_max(c)) {
+
+                                size_t cs;
+
+                                /* Yay, found a cutmark, to the right of the calculated cut */
+
+                                cs = realloc_buffer_size(buffer) + k;
+
+                                if (cs + (size_t) -c->last_cutmark <= c->chunk_size_max) {
+
+                                        /* Remember how many more bytes to process before the cut we
+                                         * determine shall take place. Note that we don't advance anything
+                                         * here, we'll call ourselves and then do that for. */
+                                        c->cut_pending = k + (size_t) -c->last_cutmark;
+
+                                        c->cutmark_delta_sum -= c->last_cutmark;
+                                        c->n_cutmarks_applied ++;
+
+                                        return ca_chunker_extract_chunk(c, buffer, pp, ll, ret_chunk, ret_chunk_size);
+                                }
+                        }
+                }
+        }
+
+        if (!chunk) {
+                if (realloc_buffer_size(buffer) == 0) {
+                        chunk = p;
+                        chunk_size = k;
+                } else {
+                        if (!realloc_buffer_append(buffer, p, k))
+                                return -ENOMEM;
+
+                        chunk = realloc_buffer_data(buffer);
+                        chunk_size = realloc_buffer_size(buffer);
+
+                        indirect = true;
+                }
+        }
+
+        *pp = (uint8_t*) p + k;
+        *ll = l - k;
+
+        if (ret_chunk)
+                *ret_chunk = chunk;
+        if (ret_chunk_size)
+                *ret_chunk_size = chunk_size;
+
+        return indirect ? CA_CHUNKER_INDIRECT : CA_CHUNKER_DIRECT;
+
+not_yet:
+        *pp = (uint8_t*) p + l;
+        *ll = 0;
+
+        if (ret_chunk)
+                *ret_chunk = NULL;
+        if (ret_chunk_size)
+                *ret_chunk_size = 0;
+
+        return CA_CHUNKER_NOT_YET;
+}
diff --git a/src/cachunker.h b/src/cachunker.h
index 8329b9b4..d3e5312d 100644
--- a/src/cachunker.h
+++ b/src/cachunker.h
@@ -7,6 +7,9 @@
 #include <sys/types.h>
 #include <stdbool.h>
 
+#include "cacutmark.h"
+#include "realloc-buffer.h"
+
 /* The default average chunk size */
 #define CA_CHUNK_SIZE_AVG_DEFAULT ((size_t) (64U*1024U))
 
@@ -32,6 +35,22 @@ typedef struct CaChunker {
         size_t discriminator;
 
         uint8_t window[CA_CHUNKER_WINDOW_SIZE];
+
+        const CaCutmark *cutmarks;  /* List of defined cutmarks to look for */
+        size_t n_cutmarks;
+
+        ssize_t last_cutmark; /* The byte offset we have seen the last cutmark at, relative to the current byte index */
+        uint64_t qword_be;    /* The last 8 byte we read, always shifted through and hence in BE format. */
+
+        /* How many bytes to go back to search for cutmarks at most */
+        uint64_t cutmark_delta_max;
+
+        /* A cutmark was previously found, pointing to a cut in the future. This specifies how many more
+         * bytes to process before the cut we already determined shall take place. */
+        size_t cut_pending;
+
+        uint64_t n_cutmarks_applied;
+        int64_t cutmark_delta_sum;
 } CaChunker;
 
 /* The default initializer for the chunker. We pick an average chunk size equivalent to 64K */
@@ -41,6 +60,8 @@ typedef struct CaChunker {
                 .chunk_size_avg = CA_CHUNK_SIZE_AVG_DEFAULT,                                   \
                 .chunk_size_max = CA_CHUNK_SIZE_AVG_DEFAULT*4,                                 \
                 .discriminator = CA_CHUNKER_DISCRIMINATOR_FROM_AVG(CA_CHUNK_SIZE_AVG_DEFAULT), \
+                .cutmark_delta_max = UINT64_MAX,                                               \
+                .cut_pending = (size_t) -1,                                                    \
         }
 
 /* Set the min/avg/max chunk size. Each parameter may be 0, in which case a default is used. */
@@ -48,10 +69,20 @@ int ca_chunker_set_size(CaChunker *c, size_t min_size, size_t avg_size, size_t m
 
 /* Scans the specified data for a chunk border. Returns (size_t) -1 if none was found (and the function should be
  * called with more data later on), or another value indicating the position of a border. */
-size_t ca_chunker_scan(CaChunker *c, const void* p, size_t n);
+size_t ca_chunker_scan(CaChunker *c, bool test_break, const void* p, size_t n);
 
 /* Low-level buzhash functions. Only exported for testing purposes. */
 uint32_t ca_chunker_start(CaChunker *c, const void *p, size_t n);
 uint32_t ca_chunker_roll(CaChunker *c, uint8_t pop_byte, uint8_t push_byte);
 
+uint64_t ca_chunker_cutmark_delta_max(CaChunker *c);
+
+enum {
+        CA_CHUNKER_NOT_YET,     /* Not enough data for chunk */
+        CA_CHUNKER_DIRECT,      /* Found chunk, directly in the specified *pp and *ll buffer */
+        CA_CHUNKER_INDIRECT,    /* Found chunk, but inside of *buffer, need to advance it afterwards */
+};
+
+int ca_chunker_extract_chunk(CaChunker *c, ReallocBuffer *buffer, const void **pp, size_t *ll, const void **ret_chunk, size_t *ret_chunk_size);
+
 #endif
diff --git a/src/cacutmark.c b/src/cacutmark.c
new file mode 100644
index 00000000..e06e5fb9
--- /dev/null
+++ b/src/cacutmark.c
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include <errno.h>
+#include <sys/types.h>
+
+#include "cacutmark.h"
+#include "util.h"
+
+int ca_cutmark_parse(CaCutmark *c, const char *p) {
+        enum {
+                VALUE,
+                MASK,
+                DELTA_PLUS,
+                DELTA_MINUS,
+        } state = VALUE;
+
+        uint64_t value = 0, mask = 0, udelta = 0;
+        int64_t delta = 0;
+        size_t digits = 0;
+        const char *q;
+
+        /* Parsers a cutmark specification. Expects a value (in hex),
+         * optionally followed by a slash and a mask (in hex), optionally
+         * followed by +/- and a delta offset (in dec). */
+
+        for (q = p;; q++) {
+
+                switch (state) {
+
+                case VALUE:
+                        if (*q == 0) {
+                                if (digits == 0)
+                                        return -EINVAL;
+
+                                goto done;
+
+                        } else if (*q == '/') {
+                                if (digits == 0)
+                                        return -EINVAL;
+
+                                state = MASK;
+                                mask = 0;
+                                digits = 0;
+
+                        } else if (*q == '+') {
+                                if (digits == 0)
+                                        return -EINVAL;
+
+                                state = DELTA_PLUS;
+                                digits = 0;
+
+                        } else if (*q == '-') {
+                                if (digits == 0)
+                                        return -EINVAL;
+
+                                state = DELTA_MINUS;
+                                digits = 0;
+
+                        } else {
+                                int k;
+
+                                if (digits >= 16)
+                                        return -EOVERFLOW;
+
+                                k = unhexchar(*q);
+                                if (k < 0)
+                                        return k;
+
+                                value = (value << 4) | k;
+                                mask = (mask << 4) | 0xFU;
+                                digits++;
+                        }
+
+                        break;
+
+                case MASK:
+                        if (*q == 0) {
+                                if (digits == 0 || mask == 0)
+                                        return -EINVAL;
+
+                                goto done;
+
+                        } else if (*q == '+') {
+                                if (digits == 0 || mask == 0)
+                                        return -EINVAL;
+
+                                state = DELTA_PLUS;
+                                digits = 0;
+                        } else if (*q == '-') {
+                                if (digits == 0 || mask == 0)
+                                        return -EINVAL;
+
+                                state = DELTA_MINUS;
+                                digits = 0;
+                        } else {
+                                int k;
+
+                                if (digits >= 16)
+                                        return -EOVERFLOW;
+
+                                k = unhexchar(*q);
+                                if (k < 0)
+                                        return k;
+
+                                mask = (mask << 4) | k;
+                                digits++;
+                        }
+
+                        break;
+
+                case DELTA_PLUS:
+                case DELTA_MINUS:
+
+                        if (*q == 0) {
+                                if (digits == 0)
+                                        return -EINVAL;
+
+                                if (state == DELTA_MINUS) {
+                                        if (udelta > - (uint64_t) INT64_MIN)
+                                                return -EOVERFLOW;
+
+                                        if (udelta == -(uint64_t) INT64_MIN)
+                                                delta = INT64_MIN;
+                                        else
+                                                delta = -(int64_t) udelta;
+                                } else {
+                                        if (udelta > INT64_MAX)
+                                                return -EOVERFLOW;
+
+                                        delta = (int64_t) udelta;
+                                }
+
+                                goto done;
+                        } else {
+                                uint64_t d;
+                                int k;
+
+                                k = undecchar(*q);
+                                if (k < 0)
+                                        return k;
+
+                                d = udelta*10;
+                                if (d < udelta)
+                                        return -EOVERFLOW;
+                                d += k;
+                                if (d < udelta*10)
+                                        return -EOVERFLOW;
+
+                                udelta = d;
+                                digits ++;
+                        }
+
+                        break;
+                }
+        }
+
+done:
+        *c = (CaCutmark) {
+                .value = value,
+                .mask = mask,
+                .delta = delta,
+        };
+
+        return 0;
+}
+
+int ca_cutmark_cmp(const CaCutmark *a, const CaCutmark *b) {
+        int r;
+
+        if (a == b)
+                return 0;
+        if (!a)
+                return -1;
+        if (!b)
+                return 1;
+
+        r = CMP(a->value, b->value);
+        if (r != 0)
+                return r;
+
+        r = CMP(a->mask, b->mask);
+        if (r != 0)
+                return r;
+
+        return CMP(a->delta, b->delta);
+}
+
+void ca_cutmark_sort(CaCutmark *c, size_t n) {
+
+        if (n <= 1)
+                return;
+
+        assert(c);
+        qsort(c, n, sizeof(CaCutmark), (__compar_fn_t) ca_cutmark_cmp);
+}
diff --git a/src/cacutmark.h b/src/cacutmark.h
new file mode 100644
index 00000000..984cd073
--- /dev/null
+++ b/src/cacutmark.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#ifndef foocutmarkhfoo
+#define foocutmarkhfoo
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <sys/types.h>
+
+typedef struct CaCutmark {
+        uint64_t value;  /* Numeric value of the cutmark */
+        uint64_t mask;   /* Mask to apply when matching the cutmark */
+        int64_t delta;   /* Where to cut, as an offset (possibly negative) relative to the position right after the 64bit value. */
+} CaCutmark;
+
+int ca_cutmark_parse(CaCutmark *c, const char *p);
+
+void ca_cutmark_sort(CaCutmark *c, size_t n);
+
+int ca_cutmark_cmp(const CaCutmark *a, const CaCutmark *b);
+
+#endif
diff --git a/src/caencoder.c b/src/caencoder.c
index 0262a36b..38eacd4e 100644
--- a/src/caencoder.c
+++ b/src/caencoder.c
@@ -3527,9 +3527,9 @@ int ca_encoder_current_location(CaEncoder *e, uint64_t add, CaLocation **ret) {
 
                 name_table_node = node;
 
-                /* Here's a tweak: in CA_ENCODER_FILENAME state we actually encode the child's data, as we our current
-                 * node might be the directory, but we need to serialize at which directory entry within it we
-                 * currently are. */
+                /* Here's a tweak: in CA_ENCODER_FILENAME state we actually encode the child's data, as our
+                 * current node might be the directory, but we need to serialize at which directory entry
+                 * within it we currently are. */
                 node = ca_encoder_current_child_node(e);
                 if (!node)
                         return -EUNATCH;
diff --git a/src/caformat.h b/src/caformat.h
index 660f167c..acd5c474 100644
--- a/src/caformat.h
+++ b/src/caformat.h
@@ -72,6 +72,8 @@ enum {
 
         /* The index file format */
         CA_FORMAT_INDEX                 = UINT64_C(0x96824d9c7b129ff9),
+        CA_FORMAT_CUTMARK_DELTA_MAX     = UINT64_C(0x1bd67079a4939e36),
+        CA_FORMAT_CUTMARK               = UINT64_C(0x5a218a2418b94da3),
         CA_FORMAT_TABLE                 = UINT64_C(0xe75b9e112f17417d),
 
         /* The end marker used in the TABLE object */
@@ -427,6 +429,18 @@ typedef struct CaFormatIndex {
         le64_t chunk_size_max;
 } CaFormatIndex;
 
+typedef struct CaFormatCutmarkDeltaMax {
+        CaFormatHeader header;
+        le64_t cutmark_delta_max;
+} CaFormatCutmarkDeltaMax;
+
+typedef struct CaFormatCutmark {
+        CaFormatHeader header;
+        le64_t value;
+        le64_t mask;
+        le64_t delta;
+} CaFormatCutmark;
+
 typedef struct CaFormatTableItem {
         le64_t offset;
         uint8_t chunk[CA_CHUNK_ID_SIZE];
diff --git a/src/caindex.c b/src/caindex.c
index 1ee74b73..84e91ecc 100644
--- a/src/caindex.c
+++ b/src/caindex.c
@@ -18,9 +18,9 @@
 /* #define EINVAL __LINE__ */
 
 typedef enum CaIndexMode {
-        CA_INDEX_WRITE,                  /* only cooked writing */
+        CA_INDEX_WRITE,                  /* only cooked (i.e. individual parsed fields) writing */
         CA_INDEX_READ,                   /* only cooked reading */
-        CA_INDEX_INCREMENTAL_WRITE,      /* cooked writing + incremental raw reading back */
+        CA_INDEX_INCREMENTAL_WRITE,      /* cooked writing + incremental raw (i.e. raw byte-wise) reading back */
         CA_INDEX_INCREMENTAL_READ,       /* incremental raw writing + cooked reading back */
 } CaIndexMode;
 
@@ -48,6 +48,11 @@ struct CaIndex {
 
         uint64_t file_size; /* The size of the index file */
         uint64_t blob_size; /* The size of the blob this index file describes */
+
+        CaCutmark *cutmarks;
+        size_t n_cutmarks;
+
+        uint64_t cutmark_delta_max;
 };
 
 static inline uint64_t CA_INDEX_METADATA_SIZE(CaIndex *i) {
@@ -142,6 +147,8 @@ CaIndex *ca_index_unref(CaIndex *i) {
         if (i->fd >= 2)
                 safe_close(i->fd);
 
+        free(i->cutmarks);
+
         return mfree(i);
 }
 
@@ -242,15 +249,8 @@ static int ca_index_open_fd(CaIndex *i) {
 
 static int ca_index_write_head(CaIndex *i) {
 
-        struct {
-                CaFormatIndex index;
-                CaFormatHeader table;
-        } head = {
-                .index.header.size = htole64(sizeof(CaFormatIndex)),
-                .index.header.type = htole64(CA_FORMAT_INDEX),
-                .table.size = htole64(UINT64_MAX),
-                .table.type = htole64(CA_FORMAT_TABLE),
-        };
+        CaFormatIndex index;
+        CaFormatHeader table;
         int r;
 
         assert(i);
@@ -272,19 +272,65 @@ static int ca_index_write_head(CaIndex *i) {
               i->chunk_size_avg <= i->chunk_size_max))
                 return -EINVAL;
 
-        head.index.feature_flags = htole64(i->feature_flags);
+        assert(i->cooked_offset == 0);
 
-        head.index.chunk_size_min = htole64(i->chunk_size_min);
-        head.index.chunk_size_avg = htole64(i->chunk_size_avg);
-        head.index.chunk_size_max = htole64(i->chunk_size_max);
+        index = (CaFormatIndex) {
+                .header.size = htole64(sizeof(CaFormatIndex)),
+                .header.type = htole64(CA_FORMAT_INDEX),
+                .feature_flags = htole64(i->feature_flags),
+                .chunk_size_min = htole64(i->chunk_size_min),
+                .chunk_size_avg = htole64(i->chunk_size_avg),
+                .chunk_size_max = htole64(i->chunk_size_max),
+        };
 
-        assert(i->cooked_offset == 0);
+        r = loop_write(i->fd, &index, sizeof(index));
+        if (r < 0)
+                return r;
 
-        r = loop_write(i->fd, &head, sizeof(head));
+        i->cooked_offset = sizeof(index);
+
+        if (i->n_cutmarks > 0) {
+                CaFormatCutmarkDeltaMax dm = {
+                        .header.size = htole64(sizeof(CaFormatCutmarkDeltaMax)),
+                        .header.type = htole64(CA_FORMAT_CUTMARK_DELTA_MAX),
+                        .cutmark_delta_max = htole64(i->cutmark_delta_max),
+                };
+                size_t j;
+
+                r = loop_write(i->fd, &dm, sizeof(dm));
+                if (r < 0)
+                        return r;
+
+                i->cooked_offset += sizeof(dm);
+
+                for (j = 0; j < i->n_cutmarks; j++) {
+                        CaFormatCutmark cm = {
+                                .header.size = htole64(sizeof(CaFormatCutmark)),
+                                .header.type = htole64(CA_FORMAT_CUTMARK),
+                                .value = htole64(i->cutmarks[j].value),
+                                .mask = htole64(i->cutmarks[j].mask),
+                                .delta = htole64((uint64_t) i->cutmarks[j].delta),
+                        };
+
+                        r = loop_write(i->fd, &cm, sizeof(cm));
+                        if (r < 0)
+                                return r;
+
+                        i->cooked_offset += sizeof(cm);
+                }
+        }
+
+        table = (CaFormatHeader) {
+                .size = htole64(UINT64_MAX),
+                .type = htole64(CA_FORMAT_TABLE),
+        };
+
+        r = loop_write(i->fd, &table, sizeof(table));
         if (r < 0)
                 return r;
 
-        i->start_offset = i->cooked_offset = sizeof(head);
+        i->cooked_offset += sizeof(table);
+        i->start_offset = i->cooked_offset;
 
         return 0;
 }
@@ -311,12 +357,29 @@ static int ca_index_enough_data(CaIndex *i, size_t n) {
         return 1;
 }
 
+static int ca_index_progressive_read(CaIndex *i, void *p, size_t n) {
+        ssize_t m;
+        int r;
+
+        assert(i);
+        assert(p || n == 0);
+
+        r = ca_index_enough_data(i, n);
+        if (r < 0)
+                return r;
+        if (r == 0)
+                return -EAGAIN;
+
+        m = loop_read(i->fd, p, n);
+        if (m < 0)
+                return (int) m;
+        if ((size_t) m != n)
+                return -EPIPE;
+
+        return 0;
+}
+
 static int ca_index_read_head(CaIndex *i) {
-        struct {
-                CaFormatIndex index;
-                CaFormatHeader table;
-        } head;
-        ssize_t n;
         int r;
 
         assert(i);
@@ -326,59 +389,152 @@ static int ca_index_read_head(CaIndex *i) {
         if (i->start_offset != 0) /* already past the head */
                 return 0;
 
-        assert(i->cooked_offset == 0);
+        if (i->cooked_offset == 0) {
+                CaFormatIndex index;
 
-        r = ca_index_enough_data(i, sizeof(head));
-        if (r < 0)
-                return r;
-        if (r == 0)
-                return -EAGAIN;
+                r = ca_index_progressive_read(i, &index, sizeof(index));
+                if (r < 0)
+                        return r;
 
-        n = loop_read(i->fd, &head, sizeof(head));
-        if (n < 0)
-                return (int) n;
-        if (n != sizeof(head))
-                return -EPIPE;
+                if (le64toh(index.header.size) != sizeof(CaFormatIndex) ||
+                    le64toh(index.header.type) != CA_FORMAT_INDEX)
+                        return -EBADMSG;
 
-        if (le64toh(head.index.header.size) != sizeof(CaFormatIndex) ||
-            le64toh(head.index.header.type) != CA_FORMAT_INDEX)
-                return -EBADMSG;
+                r = ca_feature_flags_are_normalized(le64toh(index.feature_flags));
+                if (r < 0)
+                        return r;
+                if (r == 0)
+                        return -EINVAL;
 
-        r = ca_feature_flags_are_normalized(le64toh(head.index.feature_flags));
-        if (r < 0)
-                return r;
-        if (r == 0)
-                return -EINVAL;
+                if (le64toh(index.chunk_size_min) < CA_CHUNK_SIZE_LIMIT_MIN ||
+                    le64toh(index.chunk_size_min) > CA_CHUNK_SIZE_LIMIT_MAX)
+                        return -EBADMSG;
 
-        if (le64toh(head.index.chunk_size_min) < CA_CHUNK_SIZE_LIMIT_MIN ||
-            le64toh(head.index.chunk_size_min) > CA_CHUNK_SIZE_LIMIT_MAX)
-                return -EBADMSG;
+                if (le64toh(index.chunk_size_avg) < CA_CHUNK_SIZE_LIMIT_MIN ||
+                    le64toh(index.chunk_size_avg) > CA_CHUNK_SIZE_LIMIT_MAX)
+                        return -EBADMSG;
 
-        if (le64toh(head.index.chunk_size_avg) < CA_CHUNK_SIZE_LIMIT_MIN ||
-            le64toh(head.index.chunk_size_avg) > CA_CHUNK_SIZE_LIMIT_MAX)
-                return -EBADMSG;
+                if (le64toh(index.chunk_size_max) < CA_CHUNK_SIZE_LIMIT_MIN ||
+                    le64toh(index.chunk_size_max) > CA_CHUNK_SIZE_LIMIT_MAX)
+                        return -EBADMSG;
 
-        if (le64toh(head.index.chunk_size_max) < CA_CHUNK_SIZE_LIMIT_MIN ||
-            le64toh(head.index.chunk_size_max) > CA_CHUNK_SIZE_LIMIT_MAX)
-                return -EBADMSG;
+                if (!(le64toh(index.chunk_size_min) <= le64toh(index.chunk_size_avg) &&
+                      le64toh(index.chunk_size_avg) <= le64toh(index.chunk_size_max)))
+                        return -EBADMSG;
 
-        if (!(le64toh(head.index.chunk_size_min) <= le64toh(head.index.chunk_size_avg) &&
-              le64toh(head.index.chunk_size_avg) <= le64toh(head.index.chunk_size_max)))
-                return -EBADMSG;
+                i->feature_flags = le64toh(index.feature_flags);
 
-        if (le64toh(head.table.size) != UINT64_MAX ||
-            le64toh(head.table.type) != CA_FORMAT_TABLE)
-                return -EBADMSG;
+                i->chunk_size_min = le64toh(index.chunk_size_min);
+                i->chunk_size_avg = le64toh(index.chunk_size_avg);
+                i->chunk_size_max = le64toh(index.chunk_size_max);
+
+                i->cooked_offset += sizeof(index);
+        }
+
+        if (i->cooked_offset == sizeof(CaFormatIndex)) {
+                CaFormatHeader header;
 
-        i->start_offset = i->cooked_offset = sizeof(head);
+                r = ca_index_progressive_read(i, &header, sizeof(header));
+                if (r < 0)
+                        return r;
 
-        i->feature_flags = le64toh(head.index.feature_flags);
+                if (le64toh(header.type) == CA_FORMAT_TABLE) {
+                        /* No cutmarks defined, the table follows immediately */
 
-        i->chunk_size_min = le64toh(head.index.chunk_size_min);
-        i->chunk_size_avg = le64toh(head.index.chunk_size_avg);
-        i->chunk_size_max = le64toh(head.index.chunk_size_max);
+                        if (le64toh(header.size) != UINT64_MAX)
+                                return -EBADMSG;
 
-        return 0;
+                        i->cooked_offset += sizeof(header);
+                        i->start_offset = i->cooked_offset;
+
+                        return 0;
+                }
+
+                /* Otherwise the cutmark delata max object has to follow immediately */
+                if (le64toh(header.type) != CA_FORMAT_CUTMARK_DELTA_MAX ||
+                    le64toh(header.size) != sizeof(CaFormatCutmarkDeltaMax))
+                        return -EBADMSG;
+
+                i->cooked_offset += sizeof(header);
+        }
+
+        if (i->cooked_offset == sizeof(CaFormatIndex) + sizeof(CaFormatHeader)) {
+                CaFormatCutmarkDeltaMax dm = {};
+                size_t need = sizeof(CaFormatCutmarkDeltaMax) - sizeof(CaFormatHeader);
+
+                r = ca_index_progressive_read(i, (uint8_t*) &dm + sizeof(CaFormatHeader), need);
+                if (r < 0)
+                        return r;
+
+                i->cutmark_delta_max = le64toh(dm.cutmark_delta_max);
+                i->cooked_offset += need;
+        }
+
+        if (i->cooked_offset >= sizeof(CaFormatIndex) + sizeof(CaFormatCutmarkDeltaMax)) {
+
+                for (;;) {
+                        CaFormatCutmark cm = {};
+                        uint64_t p;
+
+                        p = i->cooked_offset - sizeof(CaFormatIndex) - sizeof(CaFormatCutmarkDeltaMax);
+                        if (p % sizeof(CaFormatCutmark) == 0) {
+
+                                r = ca_index_progressive_read(i, &cm.header, sizeof(cm.header));
+                                if (r < 0)
+                                        return r;
+
+                                if (le64toh(cm.header.type) == CA_FORMAT_TABLE) {
+                                        /* No further cutmarks defined, the table follows now */
+
+                                        if (le64toh(cm.header.size) != UINT64_MAX)
+                                                return -EBADMSG;
+
+                                        i->cooked_offset += sizeof(cm.header);
+                                        i->start_offset = i->cooked_offset;
+                                        return 0;
+                                }
+
+                                if (le64toh(cm.header.type) != CA_FORMAT_CUTMARK ||
+                                    le64toh(cm.header.size) != sizeof(CaFormatCutmark))
+                                        return -EBADMSG;
+
+                                i->cooked_offset += sizeof(cm.header);
+                        } else {
+                                size_t need = sizeof(CaFormatCutmark) - sizeof(CaFormatHeader);
+                                CaCutmark e, *a;
+
+                                assert(p % sizeof(CaFormatCutmark) == sizeof(CaFormatHeader));
+
+                                r = ca_index_progressive_read(i, (uint8_t*) &cm + sizeof(CaFormatHeader), need);
+                                if (r < 0)
+                                        return r;
+
+                                if (le64toh(cm.mask == 0))
+                                        return -EBADMSG;
+
+                                e = (CaCutmark) {
+                                        .value = le64toh(cm.value),
+                                        .mask = le64toh(cm.mask),
+                                        .delta = le64toh(cm.delta),
+                                };
+
+                                if (i->n_cutmarks > 0 &&
+                                    ca_cutmark_cmp(i->cutmarks + i->n_cutmarks - 1, &e) >= 0)
+                                        return -EBADMSG;
+
+                                a = reallocarray(i->cutmarks, i->n_cutmarks+1, sizeof(CaCutmark));
+                                if (!a)
+                                        return -ENOMEM;
+
+                                i->cutmarks = a;
+                                i->cutmarks[i->n_cutmarks++] = e;
+
+                                i->cooked_offset += need;
+                        }
+                }
+        }
+
+        assert_not_reached("Huh, read the wrong number of bytes so far, this can't be.");
 }
 
 int ca_index_open(CaIndex *i) {
@@ -468,6 +624,13 @@ int ca_index_write_chunk(CaIndex *i, const CaChunkID *id, uint64_t size) {
         return 0;
 }
 
+static uint64_t index_offset(CaIndex *i) {
+        assert(i);
+        assert(i->start_offset >= sizeof(CaFormatIndex) + offsetof(CaFormatTable, items));
+
+        return i->start_offset - offsetof(CaFormatTable, items);
+}
+
 int ca_index_write_eof(CaIndex *i) {
         CaFormatTableTail tail = {};
         int r;
@@ -485,7 +648,7 @@ int ca_index_write_eof(CaIndex *i) {
         if (r < 0)
                 return r;
 
-        tail.index_offset = htole64(sizeof(CaFormatIndex));
+        tail.index_offset = htole64(index_offset(i));
         tail.size = htole64(offsetof(CaFormatTable, items) +
                             (i->item_position * sizeof(CaFormatTableItem)) +
                             sizeof(tail));
@@ -543,7 +706,7 @@ int ca_index_read_chunk(CaIndex *i, CaChunkID *ret_id, uint64_t *ret_offset_end,
         if (buffer.tail.marker == htole64(CA_FORMAT_TABLE_TAIL_MARKER) &&
             buffer.tail._zero_fill1 == 0 &&
             buffer.tail._zero_fill2 == 0 &&
-            buffer.tail.index_offset == htole64(sizeof(CaFormatIndex)) &&
+            buffer.tail.index_offset == htole64(index_offset(i)) &&
             le64toh(buffer.tail.size) == (i->cooked_offset - i->start_offset + offsetof(CaFormatTable, items) + sizeof(CaFormatTableTail))) {
                 uint8_t final_byte;
 
@@ -872,6 +1035,75 @@ int ca_index_get_chunk_size_max(CaIndex *i, size_t *ret) {
         return 0;
 }
 
+int ca_index_set_cutmarks(
+                CaIndex *i,
+                const CaCutmark *cutmarks,
+                size_t n_cutmarks) {
+
+        CaCutmark *m;
+
+        if (!i)
+                return -EINVAL;
+        if (n_cutmarks > 0 && !cutmarks)
+                return -EINVAL;
+        if (!IN_SET(i->mode, CA_INDEX_WRITE, CA_INDEX_INCREMENTAL_WRITE))
+                return -EROFS;
+
+        /* We don't validate the cutmark array here, the assumption is that it is already valid */
+
+        m = newdup(CaCutmark, cutmarks, n_cutmarks);
+        if (!m)
+                return -ENOMEM;
+
+        free_and_replace(i->cutmarks, m);
+        i->n_cutmarks = n_cutmarks;
+
+        return 0;
+}
+
+int ca_index_set_cutmark_delta_max(
+                CaIndex *i,
+                uint64_t cutmark_delta_max) {
+
+        if (!i)
+                return -EINVAL;
+        if (!IN_SET(i->mode, CA_INDEX_WRITE, CA_INDEX_INCREMENTAL_WRITE))
+                return -EROFS;
+
+        i->cutmark_delta_max = cutmark_delta_max;
+        return 0;
+}
+
+int ca_index_get_cutmarks(CaIndex *i, const CaCutmark **ret_cutmarks, size_t *ret_n_cutmarks) {
+        if (!i)
+                return -EINVAL;
+        if (!ret_cutmarks && !ret_n_cutmarks)
+                return -EINVAL;
+
+        if (IN_SET(i->mode, CA_INDEX_READ, CA_INDEX_INCREMENTAL_READ) && i->start_offset == 0)
+                return -ENODATA;
+
+        if (ret_cutmarks)
+                *ret_cutmarks = i->cutmarks;
+        if (ret_n_cutmarks)
+                *ret_n_cutmarks = i->n_cutmarks;
+
+        return 0;
+}
+
+int ca_index_get_cutmark_delta_max(CaIndex *i, uint64_t *ret) {
+        if (!i)
+                return -EINVAL;
+        if (!ret)
+                return -EINVAL;
+
+        if (IN_SET(i->mode, CA_INDEX_READ, CA_INDEX_INCREMENTAL_READ) && i->start_offset == 0)
+                return -ENODATA;
+
+        *ret = i->cutmark_delta_max;
+        return 0;
+}
+
 int ca_index_get_index_size(CaIndex *i, uint64_t *ret) {
         uint64_t size, metadata_size;
         int r;
@@ -976,9 +1208,9 @@ static int ca_index_read_tail(CaIndex *i) {
 
         if (le64toh(buffer.tail.marker) != CA_FORMAT_TABLE_TAIL_MARKER)
                 return -EBADMSG;
-        if (le64toh(buffer.tail.index_offset) != sizeof(CaFormatIndex))
+        if (le64toh(buffer.tail.index_offset) != index_offset(i))
                 return -EBADMSG;
-        if (le64toh(buffer.tail.size) + sizeof(CaFormatIndex) != size)
+        if (index_offset(i) + le64toh(buffer.tail.size) != size)
                 return -EBADMSG;
 
         i->blob_size = le64toh(buffer.last_item.offset);
diff --git a/src/caindex.h b/src/caindex.h
index a50e776c..67e833b4 100644
--- a/src/caindex.h
+++ b/src/caindex.h
@@ -4,6 +4,7 @@
 #define foocaindexhfoo
 
 #include "cachunkid.h"
+#include "cacutmark.h"
 #include "realloc-buffer.h"
 
 typedef struct CaIndex CaIndex;
@@ -51,6 +52,12 @@ int ca_index_get_chunk_size_min(CaIndex *i, size_t *ret);
 int ca_index_get_chunk_size_avg(CaIndex *i, size_t *ret);
 int ca_index_get_chunk_size_max(CaIndex *i, size_t *ret);
 
+int ca_index_set_cutmarks(CaIndex *i, const CaCutmark *cutmarks, size_t n_cutmarks);
+int ca_index_set_cutmark_delta_max(CaIndex *i, uint64_t cutmark_delta_max);
+
+int ca_index_get_cutmarks(CaIndex *i, const CaCutmark **ret_cutmarks, size_t *ret_n_cutmarks);
+int ca_index_get_cutmark_delta_max(CaIndex *i, uint64_t *ret);
+
 int ca_index_get_blob_size(CaIndex *i, uint64_t *ret);
 int ca_index_get_index_size(CaIndex *i, uint64_t *ret);
 int ca_index_get_total_chunks(CaIndex *i, uint64_t *ret);
diff --git a/src/caseed.c b/src/caseed.c
index ce88555e..b127b67a 100644
--- a/src/caseed.c
+++ b/src/caseed.c
@@ -41,7 +41,7 @@ struct CaSeed {
         bool cache_chunks:1;
 
         ReallocBuffer buffer;
-        CaLocation *buffer_location;
+        CaOrigin *buffer_origin;
 
         CaFileRoot *root;
 
@@ -52,23 +52,25 @@ struct CaSeed {
 
         uint64_t first_step_nsec;
         uint64_t last_step_nsec;
+
+        CaCutmark *cutmarks;
+        size_t n_cutmarks;
 };
 
 CaSeed *ca_seed_new(void) {
         CaSeed *s;
 
-        s = new0(CaSeed, 1);
+        s = new(CaSeed, 1);
         if (!s)
                 return NULL;
 
-        s->cache_fd = -1;
-        s->base_fd = -1;
-
-        s->cache_chunks = true;
-
-        s->chunker = (CaChunker) CA_CHUNKER_INIT;
-
-        s->feature_flags = CA_FORMAT_DEFAULT & SUPPORTED_FEATURE_MASK;
+        *s = (CaSeed) {
+                .cache_fd = -1,
+                .base_fd = -1,
+                .cache_chunks = true,
+                .chunker = CA_CHUNKER_INIT,
+                .feature_flags = CA_FORMAT_DEFAULT & SUPPORTED_FEATURE_MASK,
+        };
 
         return s;
 }
@@ -106,10 +108,12 @@ CaSeed *ca_seed_unref(CaSeed *s) {
         ca_digest_free(s->chunk_digest);
 
         realloc_buffer_free(&s->buffer);
-        ca_location_unref(s->buffer_location);
+        ca_origin_unref(s->buffer_origin);
 
         ca_file_root_unref(s->root);
 
+        free(s->cutmarks);
+
         return mfree(s);
 }
 
@@ -247,7 +251,8 @@ static int ca_seed_make_chunk_id(CaSeed *s, const void *p, size_t l, CaChunkID *
         return ca_chunk_id_make(s->chunk_digest, p, l, ret);
 }
 
-static int ca_seed_write_cache_entry(CaSeed *s, CaLocation *location, const void *data, size_t l) {
+static int ca_seed_write_cache_entry(CaSeed *s, CaLocation *loc, const void *data, size_t l) {
+        _cleanup_(ca_location_unrefp) CaLocation *location = ca_location_ref(loc);
         char ids[CA_CHUNK_ID_FORMAT_MAX];
         const char *t, *four, *combined;
         CaChunkID id;
@@ -258,6 +263,7 @@ static int ca_seed_write_cache_entry(CaSeed *s, CaLocation *location, const void
         assert(data);
         assert(l > 0);
 
+        /* We took our own copy above, to make sure we don't write around in the object the caller passed to us */
         r = ca_location_patch_size(&location, l);
         if (r < 0)
                 return r;
@@ -317,7 +323,7 @@ static int ca_seed_write_cache_entry(CaSeed *s, CaLocation *location, const void
 }
 
 static int ca_seed_cache_chunks(CaSeed *s) {
-        uint64_t offset = 0;
+        _cleanup_(ca_location_unrefp) CaLocation *location = NULL;
         const void *p;
         size_t l;
         int r;
@@ -333,46 +339,48 @@ static int ca_seed_cache_chunks(CaSeed *s) {
         if (!s->cache_chunks)
                 return 0;
 
-        while (l > 0) {
-                const void *chunk;
-                size_t chunk_size, k;
+        r = ca_encoder_current_location(s->encoder, 0, &location);
+        if (r < 0)
+                return r;
 
-                if (!s->buffer_location) {
-                        r = ca_encoder_current_location(s->encoder, offset, &s->buffer_location);
-                        if (r < 0)
-                                return r;
-                }
+        r = ca_location_patch_size(&location, l);
+        if (r < 0)
+                return r;
 
-                k = ca_chunker_scan(&s->chunker, p, l);
-                if (k == (size_t) -1) {
-                        if (!realloc_buffer_append(&s->buffer, p, l))
-                                return -ENOMEM;
+        if (!s->buffer_origin) {
+                r = ca_origin_new(&s->buffer_origin);
+                if (r < 0)
+                        return r;
+        }
 
-                        return 0;
-                }
+        r = ca_origin_put(s->buffer_origin, location);
+        if (r < 0)
+                return r;
 
-                if (realloc_buffer_size(&s->buffer) == 0) {
-                        chunk = p;
-                        chunk_size = k;
-                } else {
-                        if (!realloc_buffer_append(&s->buffer, p, k))
-                                return -ENOMEM;
+        while (l > 0) {
+                const void *chunk;
+                size_t chunk_size;
+                int verdict;
 
-                        chunk = realloc_buffer_data(&s->buffer);
-                        chunk_size = realloc_buffer_size(&s->buffer);
-                }
+                verdict = ca_chunker_extract_chunk(&s->chunker, &s->buffer, &p, &l, &chunk, &chunk_size);
+                if (verdict < 0)
+                        return verdict;
+                if (verdict == CA_CHUNKER_NOT_YET)
+                        return 0;
 
-                r = ca_seed_write_cache_entry(s, s->buffer_location, chunk, chunk_size);
+                r = ca_seed_write_cache_entry(s, ca_origin_get(s->buffer_origin, 0), chunk, chunk_size);
                 if (r < 0)
                         return r;
 
-                realloc_buffer_empty(&s->buffer);
-                s->buffer_location = ca_location_unref(s->buffer_location);
-
-                p = (const uint8_t*) p + k;
-                l -= k;
+                if (verdict == CA_CHUNKER_INDIRECT) {
+                        r = realloc_buffer_advance(&s->buffer, chunk_size);
+                        if (r < 0)
+                                return r;
+                }
 
-                offset += k;
+                r = ca_origin_advance_bytes(s->buffer_origin, chunk_size);
+                if (r < 0)
+                        return r;
         }
 
         return 0;
@@ -389,15 +397,12 @@ static int ca_seed_cache_final_chunk(CaSeed *s) {
         if (realloc_buffer_size(&s->buffer) == 0)
                 return 0;
 
-        if (!s->buffer_location)
-                return 0;
-
-        r = ca_seed_write_cache_entry(s, s->buffer_location, realloc_buffer_data(&s->buffer), realloc_buffer_size(&s->buffer));
+        r = ca_seed_write_cache_entry(s, ca_origin_get(s->buffer_origin, 0), realloc_buffer_data(&s->buffer), realloc_buffer_size(&s->buffer));
         if (r < 0)
                 return 0;
 
         realloc_buffer_empty(&s->buffer);
-        s->buffer_location = ca_location_unref(s->buffer_location);
+        s->buffer_origin = ca_origin_unref(s->buffer_origin);
 
         return 0;
 }
@@ -852,6 +857,32 @@ int ca_seed_set_chunk_size(CaSeed *s, size_t cmin, size_t cavg, size_t cmax) {
         return 0;
 }
 
+int ca_seed_set_cutmarks(CaSeed *s, const CaCutmark *cutmarks, size_t n_cutmarks) {
+        CaCutmark *copy;
+
+        if (!s)
+                return -EINVAL;
+        if (!cutmarks && n_cutmarks > 0)
+                return -EINVAL;
+
+        copy = newdup(CaCutmark, cutmarks, n_cutmarks);
+        if (!copy)
+                return -ENOMEM;
+
+        s->chunker.cutmarks = s->cutmarks = copy;
+        s->chunker.n_cutmarks = s->n_cutmarks = n_cutmarks;
+
+        return 0;
+}
+
+int ca_seed_set_cutmark_delta_max(CaSeed *s, int64_t delta) {
+        if (!s)
+                return -EINVAL;
+
+        s->chunker.cutmark_delta_max = delta;
+        return 0;
+}
+
 int ca_seed_get_file_root(CaSeed *s, CaFileRoot **ret) {
         int r;
 
diff --git a/src/caseed.h b/src/caseed.h
index bdaeb25e..903509b4 100644
--- a/src/caseed.h
+++ b/src/caseed.h
@@ -39,6 +39,8 @@ int ca_seed_current_mode(CaSeed *seed, mode_t *ret);
 int ca_seed_set_feature_flags(CaSeed *s, uint64_t flags);
 
 int ca_seed_set_chunk_size(CaSeed *s, size_t cmin, size_t cavg, size_t cmax);
+int ca_seed_set_cutmarks(CaSeed *s, const CaCutmark *cutmarks, size_t n_cutmarks);
+int ca_seed_set_cutmark_delta_max(CaSeed *s, int64_t delta);
 
 int ca_seed_set_hardlink(CaSeed *s, bool b);
 int ca_seed_set_chunks(CaSeed *s, bool b);
diff --git a/src/castore.c b/src/castore.c
index c290dfd9..612b17eb 100644
--- a/src/castore.c
+++ b/src/castore.c
@@ -3,9 +3,12 @@
 #include <dirent.h>
 #include <fcntl.h>
 #include <lzma.h>
+#include <pthread.h>
+#include <sys/socket.h>
 #include <sys/stat.h>
 #include <unistd.h>
 
+#include "affinity-count.h"
 #include "castore.h"
 #include "def.h"
 #include "dirent-util.h"
@@ -19,6 +22,8 @@
 /* #undef EBADMSG */
 /* #define EBADMSG __LINE__ */
 
+#define WORKER_THREADS_MAX 64U
+
 struct CaStore {
         char *root;
         bool is_cache:1;
@@ -34,6 +39,10 @@ struct CaStore {
 
         uint64_t n_requests;
         uint64_t n_request_bytes;
+
+        pthread_t worker_threads[WORKER_THREADS_MAX];
+        size_t n_worker_threads, n_worker_threads_max;
+        int worker_thread_socket[2];
 };
 
 struct CaStoreIterator {
@@ -47,14 +56,17 @@ struct CaStoreIterator {
 CaStore* ca_store_new(void) {
         CaStore *store;
 
-        store = new0(CaStore, 1);
+        store = new(CaStore, 1);
         if (!store)
                 return NULL;
 
-        store->digest_type = _CA_DIGEST_TYPE_INVALID;
-
-        store->compression = CA_CHUNK_COMPRESSED;
-        store->compression_type = CA_COMPRESSION_DEFAULT;
+        *store = (CaStore) {
+                .digest_type = _CA_DIGEST_TYPE_INVALID,
+                .compression = CA_CHUNK_COMPRESSED,
+                .compression_type = CA_COMPRESSION_DEFAULT,
+                .worker_thread_socket = { -1, -1},
+                .n_worker_threads_max = (size_t) -1,
+        };
 
         return store;
 }
@@ -62,13 +74,18 @@ CaStore* ca_store_new(void) {
 CaStore *ca_store_new_cache(void) {
         CaStore *s;
 
-        s = new0(CaStore, 1);
+        s = new(CaStore, 1);
         if (!s)
                 return NULL;
 
-        s->is_cache = true;
-        s->compression = CA_CHUNK_AS_IS;
-        s->compression_type = CA_COMPRESSION_DEFAULT;
+        *s = (CaStore) {
+                .is_cache = true,
+                .compression = CA_CHUNK_AS_IS,
+                .compression_type = CA_COMPRESSION_DEFAULT,
+
+                .worker_thread_socket = { -1, -1 },
+                .n_worker_threads_max = (size_t) -1,
+        };
 
         return s;
 }
@@ -77,6 +94,8 @@ CaStore* ca_store_unref(CaStore *store) {
         if (!store)
                 return NULL;
 
+        (void) ca_store_finalize(store);
+
         if (store->is_cache && store->root)
                 (void) rm_rf(store->root, REMOVE_ROOT|REMOVE_PHYSICAL);
 
@@ -240,6 +259,203 @@ int ca_store_has(CaStore *store, const CaChunkID *chunk_id) {
         return ca_chunk_file_test(AT_FDCWD, store->root, chunk_id);
 }
 
+struct queue_entry {
+        CaChunkID chunk_id;
+        CaChunkCompression effective_compression;
+        void *data;
+        size_t size;
+};
+
+static void* worker_thread(void *p) {
+        CaStore *store = p;
+        int ret = 0, r;
+
+        assert(store);
+        assert(store->worker_thread_socket[1] >= 0);
+
+        (void) pthread_setname_np(pthread_self(), "worker-thread");
+
+        for (;;) {
+                struct queue_entry e;
+                ssize_t n;
+
+                n = recv(store->worker_thread_socket[0], &e, sizeof(e), 0);
+                if (n < 0) {
+                        if (errno == EINTR)
+                                continue;
+
+                        log_debug_errno(errno, "Failed to read from thread pool socket: %m");
+                        return INT_TO_PTR(errno);
+                }
+                if (n == 0) /* Either EOF or zero-sized datagram (Linux doesn't really allow us to
+                             * distinguish that), we take both as an indication to exit the worker thread. */
+                        break;
+
+                assert(n == sizeof(e));
+
+                r = ca_chunk_file_save(
+                                AT_FDCWD, store->root,
+                                &e.chunk_id,
+                                e.effective_compression, store->compression,
+                                store->compression_type,
+                                e.data, e.size);
+                free(e.data);
+
+                if (r < 0) {
+                        log_debug_errno(r, "Failed to store chunk in store: %m");
+
+                        if (r != -EEXIST)
+                                ret = r;
+                }
+        }
+
+        return INT_TO_PTR(ret);
+}
+
+static int determine_worker_threads_max(CaStore *store) {
+        const char *e;
+        int r;
+
+        assert(store);
+
+        if (store->n_worker_threads_max != (size_t) -1)
+                return 0;
+
+        e = getenv("CASYNC_WORKER_THREADS");
+        if (e) {
+                unsigned u;
+
+                r = safe_atou(e, &u);
+                if (r < 0)
+                        log_debug_errno(r, "Failed to parse $CASYNC_WORKER_THREADS, ignoring: %s", e);
+                else if (u > WORKER_THREADS_MAX) {
+                        log_debug("$CASYNC_WORKER_THREADS out of range, clamping to %zu: %s", (size_t) WORKER_THREADS_MAX, e);
+                        store->n_worker_threads_max = WORKER_THREADS_MAX;
+                } else {
+                        store->n_worker_threads_max = u;
+                        return 0;
+                }
+        }
+
+        r = cpus_in_affinity_mask();
+        if (r < 0)
+                return log_debug_errno(r, "Failed to determine CPUs in affinity mask: %m");
+
+        store->n_worker_threads_max = MIN((size_t) r, WORKER_THREADS_MAX);
+        return 0;
+}
+
+static int start_worker_thread(CaStore *store) {
+        int r;
+
+        assert(store);
+
+        r = determine_worker_threads_max(store);
+        if (r < 0)
+                return r;
+
+        if (store->n_worker_threads >= (size_t) store->n_worker_threads_max)
+                return 0;
+
+        if (store->worker_thread_socket[0] < 0)
+                if (socketpair(AF_UNIX, SOCK_SEQPACKET|SOCK_CLOEXEC, 0, store->worker_thread_socket) < 0)
+                        return -errno;
+
+        r = pthread_create(store->worker_threads + store->n_worker_threads, NULL, worker_thread, store);
+        if (r != 0)
+                return -r;
+
+        store->n_worker_threads++;
+
+        log_debug("Started store worker thread %zu.", store->n_worker_threads);
+        return 0;
+}
+
+static int submit_to_worker_thread(
+                CaStore *store,
+                const CaChunkID *chunkid,
+                CaChunkCompression effective_compression,
+                const void *p,
+                uint64_t l) {
+
+        struct queue_entry e;
+        void *copy = NULL;
+        ssize_t n;
+        int r;
+
+        assert(store);
+
+        /* If there's no need to compress/decompress, then let's do things client side, since the operation
+         * is likely IO bound, not CPU bound */
+        if (store->compression == CA_CHUNK_AS_IS ||
+            store->compression == effective_compression)
+                return -ENOANO;
+
+        /* Before we submit the chunk for compression, let's see if it exists already. If so, let's return
+         * -EEXIST right away, so that the caller can count reused chunks. Note that this is a bit racy
+         * currently, as submitted but not yet processed chunks are not considered. */
+        r = ca_store_has(store, chunkid);
+        if (r < 0)
+                return r;
+        if (r > 0)
+                return -EEXIST;
+
+        /* Let's start a new worker thread each time we have a new job to process, until we reached all
+         * worker threads we need */
+        (void) start_worker_thread(store);
+
+        /* If there are no worker threads, do things client side */
+        if (store->n_worker_threads <= 0 ||
+            store->worker_thread_socket[1] < 0)
+                return -ENETDOWN;
+
+        copy = memdup(p, l);
+        if (!copy)
+                return -ENOMEM;
+
+        e = (struct queue_entry) {
+                .chunk_id = *chunkid,
+                .effective_compression = effective_compression,
+                .data = copy,
+                .size = l,
+        };
+
+        n = send(store->worker_thread_socket[1], &e, sizeof(e), 0);
+        if (n < 0) {
+                free(copy);
+                return -errno;
+        }
+
+        assert(n == sizeof(e));
+        return 0;
+}
+
+int ca_store_finalize(CaStore *store) {
+        int ret = 0, r;
+        size_t i;
+
+        assert(store);
+
+        /* Trigger EOF in all worker threads */
+        store->worker_thread_socket[1] = safe_close(store->worker_thread_socket[1]);
+
+        for (i = 0; i < store->n_worker_threads; i++) {
+                void *p;
+                r = pthread_join(store->worker_threads[i], &p);
+                if (r != 0)
+                        ret = -r;
+                if (p != NULL)
+                        ret = -PTR_TO_INT(p);
+        }
+
+        store->n_worker_threads = 0;
+        store->worker_thread_socket[0] = safe_close(store->worker_thread_socket[0]);
+
+        /* Propagate errors we ran into while processing store requests. This is useful for callers to
+         * determine whether the worker threads ran into any problems. */
+        return ret;
+}
+
 int ca_store_put(
                 CaStore *store,
                 const CaChunkID *chunk_id,
@@ -273,6 +489,14 @@ int ca_store_put(
                 store->mkdir_done = true;
         }
 
+        r = submit_to_worker_thread(
+                        store,
+                        chunk_id,
+                        effective_compression,
+                        data, size);
+        if (r >= 0)
+                return 0;
+
         return ca_chunk_file_save(
                         AT_FDCWD, store->root,
                         chunk_id,
diff --git a/src/castore.h b/src/castore.h
index 003bb955..0d2ee5c3 100644
--- a/src/castore.h
+++ b/src/castore.h
@@ -25,6 +25,8 @@ int ca_store_get(CaStore *store, const CaChunkID *chunk_id, CaChunkCompression d
 int ca_store_has(CaStore *store, const CaChunkID *chunk_id);
 int ca_store_put(CaStore *store, const CaChunkID *chunk_id, CaChunkCompression effective_compression, const void *data, uint64_t size);
 
+int ca_store_finalize(CaStore *store);
+
 int ca_store_get_requests(CaStore *s, uint64_t *ret);
 int ca_store_get_request_bytes(CaStore *s, uint64_t *ret);
 
diff --git a/src/casync-tool.c b/src/casync-tool.c
index f9206886..cb7ff19b 100644
--- a/src/casync-tool.c
+++ b/src/casync-tool.c
@@ -73,6 +73,10 @@ static bool arg_uid_shift_apply = false;
 static bool arg_mkdir = true;
 static CaDigestType arg_digest = CA_DIGEST_DEFAULT;
 static CaCompressionType arg_compression = CA_COMPRESSION_DEFAULT;
+static CaCutmark *arg_cutmarks = NULL;
+static size_t arg_n_cutmarks = 0;
+static bool arg_auto_cutmarks = true;
+static uint64_t arg_cutmark_delta_max = 0;
 
 static void help(void) {
         printf("%1$s [OPTIONS...] make [ARCHIVE|ARCHIVE_INDEX|BLOB_INDEX] [PATH]\n"
@@ -99,6 +103,9 @@ static void help(void) {
                "     --chunk-size=[MIN:]AVG[:MAX]\n"
                "                             The minimal/average/maximum number of bytes in a\n"
                "                             chunk\n"
+               "     --cutmark=CUTMARK       Specify a cutmark\n"
+               "     --cutmark-delta-max=BYTES\n"
+               "                             Maximum bytes to shift cut due to cutmark\n"
                "     --digest=DIGEST         Pick digest algorithm (sha512-256 or sha256)\n"
                "     --compression=COMPRESSION\n"
                "                             Pick compression algorithm (zstd, xz or gzip)\n"
@@ -347,6 +354,8 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_DIGEST,
                 ARG_COMPRESSION,
                 ARG_VERSION,
+                ARG_CUTMARK,
+                ARG_CUTMARK_DELTA_MAX,
         };
 
         static const struct option options[] = {
@@ -380,6 +389,8 @@ static int parse_argv(int argc, char *argv[]) {
                 { "mkdir",             required_argument, NULL, ARG_MKDIR             },
                 { "digest",            required_argument, NULL, ARG_DIGEST            },
                 { "compression",       required_argument, NULL, ARG_COMPRESSION       },
+                { "cutmark",           required_argument, NULL, ARG_CUTMARK           },
+                { "cutmark-delta-max", required_argument, NULL, ARG_CUTMARK_DELTA_MAX },
                 {}
         };
 
@@ -684,6 +695,47 @@ static int parse_argv(int argc, char *argv[]) {
                         break;
                 }
 
+                case ARG_CUTMARK:
+                        r = parse_boolean(optarg);
+                        if (r < 0) {
+                                CaCutmark *n;
+
+                                n = reallocarray(arg_cutmarks, sizeof(CaCutmark), arg_n_cutmarks + 1);
+                                if (!n)
+                                        return log_oom();
+
+                                arg_cutmarks = n;
+
+                                r = ca_cutmark_parse(arg_cutmarks + arg_n_cutmarks, optarg);
+                                if (r < 0)
+                                        return log_error_errno(r, "Failed to parse cutmark specification: %m");
+
+                                arg_n_cutmarks++;
+                                arg_auto_cutmarks = false;
+                        } else {
+                                arg_auto_cutmarks = r;
+
+                                arg_cutmarks = mfree(arg_cutmarks);
+                                arg_n_cutmarks = 0;
+                        }
+
+                        break;
+
+                case ARG_CUTMARK_DELTA_MAX: {
+                        uint64_t u;
+
+                        r = parse_size(optarg, &u);
+                        if (r < 0)
+                                return log_error_errno(r, "Failed to parse cutmark delta: %s", optarg);
+                        if (u == 0) {
+                                log_error("Cutmark delta cannot be zero.");
+                                return -EINVAL;
+                        }
+
+                        arg_cutmark_delta_max = u;
+                        break;
+                }
+
                 case '?':
                         return -EINVAL;
 
@@ -810,8 +862,10 @@ static int load_feature_flags(CaSync *s, uint64_t default_with_flags) {
         return 0;
 }
 
-static int load_chunk_size(CaSync *s) {
+static int load_chunk_size(CaSync *s, bool is_catar) {
         uint64_t cavg, cmin, cmax;
+        const CaCutmark *cutmarks;
+        size_t n_cutmarks;
         int r;
 
         if (arg_chunk_size_avg != 0) {
@@ -832,6 +886,22 @@ static int load_chunk_size(CaSync *s) {
                         return log_error_errno(r, "Failed to set maximum chunk size to %zu: %m", arg_chunk_size_max);
         }
 
+        if (arg_auto_cutmarks && is_catar) {
+                r = ca_sync_set_cutmarks_catar(s);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to set automatic cutmarks: %m");
+        } else {
+                r = ca_sync_set_cutmarks(s, arg_cutmarks, arg_n_cutmarks);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to set manual cutmarks: %m");
+        }
+
+        if (arg_cutmark_delta_max != 0) {
+                r = ca_sync_set_cutmark_delta_max(s, arg_cutmark_delta_max);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to set cutmark delta: %m");
+        }
+
         if (!arg_verbose)
                 return 1;
 
@@ -848,6 +918,31 @@ static int load_chunk_size(CaSync *s) {
                 return log_error_errno(r, "Failed to read maximum chunk size: %m");
 
         log_info("Selected chunk sizes: min=%" PRIu64 "..avg=%" PRIu64 "..max=%" PRIu64, cmin, cavg, cmax);
+
+        r = ca_sync_get_cutmarks(s, &cutmarks, &n_cutmarks);
+        if (r < 0)
+                return log_error_errno(r, "Failed to acquire cutmarks: %m");
+
+        if (n_cutmarks == 0)
+                log_info("No cutmarks defined.");
+        else {
+                uint64_t delta;
+                size_t i;
+
+                for (i = 0; i < n_cutmarks; i++)
+                        log_info("Cutmark: %016" PRIx64 "/%016" PRIx64 "%c%"PRIu64,
+                                 cutmarks[i].value,
+                                 cutmarks[i].mask,
+                                 cutmarks[i].delta < 0 ? '-' : '+',
+                                 (uint64_t) (cutmarks[i].delta < 0 ? -cutmarks[i].delta : cutmarks[i].delta));
+
+                r = ca_sync_get_cutmark_delta_max(s, &delta);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to determine cutmark delta: %m");
+
+                log_info("Maximum cutmark delta: %" PRIu64, delta);
+        }
+
         return 1;
 }
 
@@ -904,7 +999,9 @@ static int verbose_print_path(CaSync *s, const char *verb) {
 
 static int verbose_print_done_make(CaSync *s) {
         uint64_t n_chunks = UINT64_MAX, size = UINT64_MAX, n_reused = UINT64_MAX, covering,
-                n_cache_hits = UINT64_MAX, n_cache_misses = UINT64_MAX, n_cache_invalidated = UINT64_MAX, n_cache_added = UINT64_MAX;
+                n_cache_hits = UINT64_MAX, n_cache_misses = UINT64_MAX, n_cache_invalidated = UINT64_MAX, n_cache_added = UINT64_MAX,
+                n_cutmarks_applied = UINT64_MAX;
+        int64_t cutmark_delta_sum = INT64_MAX;
         char buffer[FORMAT_BYTES_MAX];
         int r;
 
@@ -989,6 +1086,19 @@ static int verbose_print_done_make(CaSync *s) {
         if (n_cache_hits != UINT64_MAX && n_cache_misses != UINT64_MAX && n_cache_invalidated != UINT64_MAX && n_cache_added != UINT64_MAX)
                 log_info("Cache hits: %" PRIu64 ", misses: %" PRIu64 ", invalidated: %" PRIu64 ", added: %" PRIu64, n_cache_hits, n_cache_misses, n_cache_invalidated, n_cache_added);
 
+        r = ca_sync_current_cutmarks_applied(s, &n_cutmarks_applied);
+        if (r < 0 && r != -ENODATA)
+                return log_error_errno(r, "Failed to read number of cutmarks: %m");
+        if (n_cutmarks_applied != UINT64_MAX)
+                log_info("Cutmarks applied: %"PRIu64, n_cutmarks_applied);
+
+        r = ca_sync_current_cutmark_delta_sum(s, &cutmark_delta_sum);
+        if (r < 0 && r != -ENODATA)
+                return log_error_errno(r, "Failed to read cutmark delta: %m");
+        if (cutmark_delta_sum != INT64_MAX)
+                log_info("Average cutmark delta for all chunks: %0.1f",
+                         n_chunks > 0 ? (double) cutmark_delta_sum / n_chunks : 0.0);
+
         return 1;
 }
 
@@ -1314,7 +1424,7 @@ static int verb_make(int argc, char *argv[]) {
         if (!s)
                 return log_oom();
 
-        r = load_chunk_size(s);
+        r = load_chunk_size(s, IN_SET(operation, MAKE_ARCHIVE, MAKE_ARCHIVE_INDEX));
         if (r < 0)
                 return r;
 
@@ -2242,7 +2352,7 @@ static int verb_list(int argc, char *argv[]) {
         if (!s)
                 return log_oom();
 
-        r = load_chunk_size(s);
+        r = load_chunk_size(s, true);
         if (r < 0)
                 return r;
 
@@ -2549,7 +2659,7 @@ static int verb_digest(int argc, char *argv[]) {
         if (!s)
                 return log_oom();
 
-        r = load_chunk_size(s);
+        r = load_chunk_size(s, IN_SET(operation, DIGEST_ARCHIVE, DIGEST_ARCHIVE_INDEX, DIGEST_DIRECTORY));
         if (r < 0)
                 return r;
 
@@ -4013,6 +4123,7 @@ int main(int argc, char *argv[]) {
         free(arg_cache);
         strv_free(arg_extra_stores);
         strv_free(arg_seeds);
+        free(arg_cutmarks);
 
         /* fprintf(stderr, PID_FMT ": exiting with error code: %m", getpid()); */
 
diff --git a/src/casync.c b/src/casync.c
index 9fee77f7..a3daef30 100644
--- a/src/casync.c
+++ b/src/casync.c
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: LGPL-2.1+ */
 
+#include <byteswap.h>
 #include <fcntl.h>
 #include <sys/poll.h>
 #include <sys/stat.h>
@@ -152,6 +153,10 @@ struct CaSync {
 
         uint64_t first_chunk_request_nsec;
         uint64_t last_chunk_request_nsec;
+
+        /* List of defined cutmarks */
+        CaCutmark *cutmarks;
+        size_t n_cutmarks;
 };
 
 #define CA_SYNC_IS_STARTED(s) ((s)->start_nsec != 0)
@@ -508,6 +513,8 @@ CaSync *ca_sync_unref(CaSync *s) {
 
         ca_digest_free(s->chunk_digest);
 
+        free(s->cutmarks);
+
         return mfree(s);
 }
 
@@ -1079,7 +1086,7 @@ int ca_sync_add_store_path(CaSync *s, const char *path) {
                 return r;
         }
 
-        array = realloc_multiply(s->rstores, sizeof(CaStore*), s->n_rstores+1);
+        array = reallocarray(s->rstores, sizeof(CaStore*), s->n_rstores+1);
         if (!array) {
                 ca_store_unref(store);
                 return -ENOMEM;
@@ -1110,7 +1117,7 @@ int ca_sync_add_store_remote(CaSync *s, const char *url) {
                 return r;
         }
 
-        array = realloc_multiply(s->remote_rstores, sizeof(CaRemote*),  s->n_remote_rstores+1);
+        array = reallocarray(s->remote_rstores, sizeof(CaRemote*),  s->n_remote_rstores+1);
         if (!array) {
                 ca_remote_unref(remote);
                 return -ENOMEM;
@@ -1145,7 +1152,7 @@ static int ca_sync_extend_seeds_array(CaSync *s) {
 
         assert(s);
 
-        new_seeds = realloc_multiply(s->seeds, sizeof(CaSeed*), s->n_seeds+1);
+        new_seeds = reallocarray(s->seeds, sizeof(CaSeed*), s->n_seeds+1);
         if (!new_seeds)
                 return -ENOMEM;
 
@@ -1549,6 +1556,14 @@ static int ca_sync_start(CaSync *s) {
                         if (r < 0)
                                 return r;
 
+                        r = ca_index_set_cutmarks(s->index, s->chunker.cutmarks, s->chunker.n_cutmarks);
+                        if (r < 0)
+                                return r;
+
+                        r = ca_index_set_cutmark_delta_max(s->index, ca_chunker_cutmark_delta_max(&s->chunker));
+                        if (r < 0)
+                                return r;
+
                         if (s->make_mode != (mode_t) -1) {
                                 r = ca_index_set_make_mode(s->index, s->make_mode);
                                 if (r < 0 && r != -ENOTTY)
@@ -1697,50 +1712,39 @@ static int ca_sync_write_chunks(CaSync *s, const void *p, size_t l, CaLocation *
 
         while (l > 0) {
                 _cleanup_(ca_origin_unrefp) CaOrigin *chunk_origin = NULL;
+                size_t chunk_size;
                 const void *chunk;
-                size_t chunk_size, k;
+                int verdict;
 
-                k = ca_chunker_scan(&s->chunker, p, l);
-                if (k == (size_t) -1) {
-                        if (!realloc_buffer_append(&s->buffer, p, l))
-                                return -ENOMEM;
+                verdict = ca_chunker_extract_chunk(&s->chunker, &s->buffer, &p, &l, &chunk, &chunk_size);
+                if (verdict < 0)
+                        return verdict;
+                if (verdict == CA_CHUNKER_NOT_YET)
                         return 0;
-                }
-
-                if (realloc_buffer_size(&s->buffer) == 0) {
-                        chunk = p;
-                        chunk_size = k;
-                } else {
-                        if (!realloc_buffer_append(&s->buffer, p, k))
-                                return -ENOMEM;
-
-                        chunk = realloc_buffer_data(&s->buffer);
-                        chunk_size = realloc_buffer_size(&s->buffer);
-                }
 
                 if (s->buffer_origin) {
-                        if (chunk_size == ca_origin_bytes(s->buffer_origin)) {
-                                chunk_origin = s->buffer_origin;
-                                s->buffer_origin = NULL;
-                        } else {
-                                r = ca_origin_extract_bytes(s->buffer_origin, chunk_size, &chunk_origin);
-                                if (r < 0)
-                                        return r;
-
-                                r = ca_origin_advance_bytes(s->buffer_origin, chunk_size);
-                                if (r < 0)
-                                        return r;
-                        }
+                        r = ca_origin_extract_bytes(s->buffer_origin, chunk_size, &chunk_origin);
+                        if (r < 0)
+                                return r;
                 }
 
                 r = ca_sync_write_one_chunk(s, chunk, chunk_size, chunk_origin);
                 if (r < 0)
                         return r;
 
-                realloc_buffer_empty(&s->buffer);
+                /* If the verdict was "indirect", then chunk/chunk_size don't point into p/l but into the
+                 * temporary buffer "buffer". In that case, we need to advance it after using it. */
+                if (verdict == CA_CHUNKER_INDIRECT) {
+                        r = realloc_buffer_advance(&s->buffer, chunk_size);
+                        if (r < 0)
+                                return r;
+                }
 
-                p = (const uint8_t*) p + k;
-                l -= k;
+                if (s->buffer_origin) {
+                        r = ca_origin_advance_bytes(s->buffer_origin, chunk_size);
+                        if (r < 0)
+                                return r;
+                }
         }
 
         return 0;
@@ -2058,6 +2062,14 @@ static int ca_sync_step_encode(CaSync *s) {
                 if (r < 0)
                         return r;
 
+                if (s->wstore) {
+                        /* Make sure the store ends all worker threads and pick up any pending errors from
+                         * it */
+                        r = ca_store_finalize(s->wstore);
+                        if (r < 0)
+                                return r;
+                }
+
                 r = ca_sync_install_archive(s);
                 if (r < 0)
                         return r;
@@ -3058,7 +3070,16 @@ static int ca_sync_propagate_flags_to_stores(CaSync *s, uint64_t flags) {
         return 0;
 }
 
-static int ca_sync_propagate_flags_to_seeds(CaSync *s, uint64_t flags, size_t cmin, size_t cavg, size_t cmax) {
+static int ca_sync_propagate_flags_to_seeds(
+                CaSync *s,
+                uint64_t flags,
+                size_t cmin,
+                size_t cavg,
+                size_t cmax,
+                const CaCutmark *cutmarks,
+                size_t n_cutmarks,
+                uint64_t cutmark_delta_max) {
+
         size_t i;
         int r;
 
@@ -3073,6 +3094,16 @@ static int ca_sync_propagate_flags_to_seeds(CaSync *s, uint64_t flags, size_t cm
                 r = ca_seed_set_chunk_size(s->seeds[i], cmin, cavg, cmax);
                 if (r < 0)
                         return r;
+
+                if (n_cutmarks > 0) {
+                        r = ca_seed_set_cutmarks(s->seeds[i], cutmarks, n_cutmarks);
+                        if (r < 0)
+                                return r;
+
+                        r = ca_seed_set_cutmark_delta_max(s->seeds[i], cutmark_delta_max);
+                        if (r < 0)
+                                return r;
+                }
         }
 
         return 0;
@@ -3124,8 +3155,9 @@ static int ca_sync_propagate_flags_to_decoder(CaSync *s, uint64_t flags) {
 }
 
 static int ca_sync_propagate_index_flags(CaSync *s) {
-        size_t cmin, cavg, cmax;
-        uint64_t flags;
+        size_t cmin, cavg, cmax, n_cutmarks;
+        uint64_t flags, cutmark_delta_max;
+        const CaCutmark *cutmarks;
         int r;
 
         assert(s);
@@ -3155,11 +3187,19 @@ static int ca_sync_propagate_index_flags(CaSync *s) {
         if (r < 0)
                 return r;
 
+        r = ca_index_get_cutmarks(s->index, &cutmarks, &n_cutmarks);
+        if (r < 0)
+                return r;
+
+        r = ca_index_get_cutmark_delta_max(s->index, &cutmark_delta_max);
+        if (r < 0)
+                return r;
+
         r = ca_sync_propagate_flags_to_stores(s, flags);
         if (r < 0)
                 return r;
 
-        r = ca_sync_propagate_flags_to_seeds(s, flags, cmin, cavg, cmax);
+        r = ca_sync_propagate_flags_to_seeds(s, flags, cmin, cavg, cmax, cutmarks, n_cutmarks, cutmark_delta_max);
         if (r < 0)
                 return r;
 
@@ -4514,3 +4554,145 @@ int ca_sync_current_cache_added(CaSync *s, uint64_t *ret) {
         *ret = s->n_cache_added;
         return 0;
 }
+
+int ca_sync_set_cutmarks(CaSync *s, const CaCutmark *c, size_t n) {
+        if (!s)
+                return -EINVAL;
+
+        if (n == 0) {
+                s->cutmarks = mfree(s->cutmarks);
+                s->n_cutmarks = 0;
+        } else {
+                _cleanup_free_ CaCutmark *copy = NULL;
+                size_t i;
+
+                if (!c)
+                        return -EINVAL;
+
+                copy = newdup(CaCutmark, c, n);
+                if (!copy)
+                        return -ENOMEM;
+
+                /* Bring into a defined order */
+                ca_cutmark_sort(copy, n);
+
+                if (copy[0].mask == 0)
+                        return -EINVAL;
+
+                /* Refuse duplicate and bad entries */
+                for (i = 1; i < n; i++) {
+                        if (ca_cutmark_cmp(copy + i - 1, copy + i) == 0)
+                                return -ENOTUNIQ;
+
+                        if (copy[i].mask == 0)
+                                return -EINVAL;
+                }
+
+                free_and_replace(s->cutmarks, copy);
+                s->n_cutmarks = n;
+        }
+
+        /* Propagate to chunker */
+        s->chunker.cutmarks = s->cutmarks;
+        s->chunker.n_cutmarks = s->n_cutmarks;
+
+        return 0;
+}
+
+int ca_sync_set_cutmarks_catar(CaSync *s) {
+
+        /* Set the cutmarks in a way suitable for catar streams */
+
+        static const CaCutmark catar_cutmarks[] = {
+                {
+                        /* Cut before each new directory entry */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+                        .value = CA_FORMAT_ENTRY,
+#else
+                        .value = __builtin_bswap64(CA_FORMAT_ENTRY),
+#endif
+                        .mask = UINT64_C(0xFFFFFFFFFFFFFFFF),
+                        .delta = -8,
+                },
+        };
+
+        if (!s)
+                return -EINVAL;
+
+        s->cutmarks = mfree(s->cutmarks);
+        s->n_cutmarks = 0;
+
+        s->chunker.cutmarks = catar_cutmarks;
+        s->chunker.n_cutmarks = ELEMENTSOF(catar_cutmarks);
+
+        return 0;
+}
+
+int ca_sync_set_cutmark_delta_max(CaSync *s, uint64_t m) {
+        if (!s)
+                return -EINVAL;
+        if (m == 0)
+                return -EINVAL;
+
+        s->chunker.cutmark_delta_max = m;
+        return 0;
+}
+
+int ca_sync_get_cutmarks(CaSync *s, const CaCutmark **ret_cutmarks, size_t *ret_n) {
+        if (!s)
+                return -EINVAL;
+        if (!ret_cutmarks)
+                return -EINVAL;
+        if (!ret_n)
+                return -EINVAL;
+
+        /* We return the cutmarks configured in the chunker instead of the ones configured in the CaSync
+         * object, since the latter are not set if we use the 'catar' default cutmarks. */
+
+        *ret_cutmarks = s->chunker.cutmarks;
+        *ret_n = s->chunker.n_cutmarks;
+        return 0;
+}
+
+int ca_sync_get_cutmark_delta_max(CaSync *s, uint64_t *ret) {
+        if (!s)
+                return -EINVAL;
+        if (!ret)
+                return -EINVAL;
+
+        *ret = ca_chunker_cutmark_delta_max(&s->chunker);
+        return 0;
+}
+
+int ca_sync_current_cutmark_delta_sum(CaSync *s, int64_t *ret) {
+        if (!s)
+                return -EINVAL;
+        if (!ret)
+                return -EINVAL;
+
+        if (s->direction != CA_SYNC_ENCODE)
+                return -ENODATA;
+        if (!s->wstore && !s->cache_store && !s->index)
+                return -ENODATA;
+
+        *ret = s->chunker.cutmark_delta_sum;
+        return 0;
+}
+
+int ca_sync_current_cutmarks_applied(CaSync *s, uint64_t *ret) {
+        if (!s)
+                return -EINVAL;
+        if (!ret)
+                return -EINVAL;
+
+        if (s->direction != CA_SYNC_ENCODE)
+                return -ENODATA;
+        if (!s->wstore && !s->cache_store && !s->index)
+                return -ENODATA;
+
+        if (s->chunker.n_cutmarks == 0)
+                return -ENODATA;
+
+        *ret = s->chunker.n_cutmarks_applied;
+        return 0;
+}
diff --git a/src/casync.h b/src/casync.h
index 982f67e5..01392ad9 100644
--- a/src/casync.h
+++ b/src/casync.h
@@ -9,6 +9,7 @@
 #include "cachunk.h"
 #include "cachunkid.h"
 #include "cacommon.h"
+#include "cacutmark.h"
 #include "caorigin.h"
 
 typedef struct CaSync CaSync;
@@ -123,6 +124,16 @@ int ca_sync_set_chunk_size_min(CaSync *s, uint64_t v);
 int ca_sync_set_chunk_size_avg(CaSync *s, uint64_t v);
 int ca_sync_set_chunk_size_max(CaSync *s, uint64_t v);
 
+int ca_sync_set_cutmarks(CaSync *s, const CaCutmark *c, size_t n);
+int ca_sync_set_cutmarks_catar(CaSync *s);
+int ca_sync_set_cutmark_delta_max(CaSync *s, uint64_t m);
+
+int ca_sync_get_cutmarks(CaSync *s, const CaCutmark **ret_cutmarks, size_t *ret_n);
+int ca_sync_get_cutmark_delta_max(CaSync *s, uint64_t *ret);
+
+int ca_sync_current_cutmark_delta_sum(CaSync *s, int64_t *ret);
+int ca_sync_current_cutmarks_applied(CaSync *s, uint64_t *ret);
+
 int ca_sync_get_chunk_size_avg(CaSync *s, uint64_t *ret);
 int ca_sync_get_chunk_size_min(CaSync *s, uint64_t *ret);
 int ca_sync_get_chunk_size_max(CaSync *s, uint64_t *ret);
diff --git a/src/meson.build b/src/meson.build
index 5b1ce782..2672c27d 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -10,6 +10,8 @@ util_sources = files('''
 '''.split())
 
 libshared_sources = files('''
+        affinity-count.c
+        affinity-count.h
         cacache.c
         cacache.h
         cachunk.c
@@ -21,6 +23,8 @@ libshared_sources = files('''
         cacommon.h
         cacompression.c
         cacompression.h
+        cacutmark.c
+        cacutmark.h
         cadecoder.c
         cadecoder.h
         cadigest.c
diff --git a/src/util.c b/src/util.c
index 299e4bba..64922341 100644
--- a/src/util.c
+++ b/src/util.c
@@ -429,6 +429,14 @@ int unhexchar(char c) {
         return -EINVAL;
 }
 
+int undecchar(char c) {
+
+        if (c >= '0' && c <= '9')
+                return c - '0';
+
+        return -EINVAL;
+}
+
 char *hexmem(const void *p, size_t l) {
         const uint8_t *x;
         char *r, *z;
@@ -919,7 +927,7 @@ int strv_push(char ***l, char *value) {
         if (m < n)
                 return -ENOMEM;
 
-        c = realloc_multiply(*l, sizeof(char*), m);
+        c = reallocarray(*l, sizeof(char*), m);
         if (!c)
                 return -ENOMEM;
 
diff --git a/src/util.h b/src/util.h
index dc9d03e4..f4b62914 100644
--- a/src/util.h
+++ b/src/util.h
@@ -24,10 +24,32 @@
 #include "gcc-macro.h"
 #include "log.h"
 
-#define new(t, n) ((t*) malloc((n) * sizeof(t)))
-#define new0(t, n) ((t*) calloc((n), sizeof(t)))
+/* If for some reason more than 4M are allocated on the stack, let's abort immediately. It's better than
+ * proceeding and smashing the stack limits. Note that by default RLIMIT_STACK is 8M on Linux. */
+#define ALLOCA_MAX (4U*1024U*1024U)
 
-#define newa(t, n) ((t*) alloca((n) * sizeof(t)))
+#define new(t, n) ((t*) malloc_multiply(sizeof(t), (n)))
+#define new0(t, n) ((t*) calloc((n) ?: 1, sizeof(t)))
+
+#define newa(t, n)                                                      \
+        ({                                                              \
+                size_t _n_ = n;                                         \
+                assert(!size_multiply_overflow(sizeof(t), _n_));        \
+                assert(sizeof(t)*_n_ <= ALLOCA_MAX);                    \
+                (t*) alloca(sizeof(t)*_n_);                             \
+        })
+
+#define newa0(t, n)                                                     \
+        ({                                                              \
+                size_t _n_ = n;                                         \
+                assert(!size_multiply_overflow(sizeof(t), _n_));        \
+                assert(sizeof(t)*_n_ <= ALLOCA_MAX);                    \
+                (t*) alloca0(sizeof(t)*_n_);                            \
+        })
+
+#define newdup(t, p, n) ((t*) memdup_multiply(p, sizeof(t), (n)))
+
+#define malloc0(n) (calloc(1, (n)))
 
 #define XCONCATENATE(x, y) x ## y
 #define CONCATENATE(x, y) XCONCATENATE(x, y)
@@ -63,8 +85,6 @@
                 ((_A) > (_B)) ? (_A) : (_B),                            \
                 (void)0))
 
-
-
 int loop_write(int fd, const void *p, size_t l);
 int loop_write_block(int fd, const void *p, size_t l);
 ssize_t loop_read(int fd, void *p, size_t l);
@@ -226,6 +246,7 @@ static inline uint64_t random_u64(void) {
 
 char hexchar(int x);
 int unhexchar(char c);
+int undecchar(char c);
 char octchar(int x);
 
 char *hexmem(const void *p, size_t l);
@@ -330,23 +351,40 @@ char *strv_find(char **l, const char *name) _pure_;
 #define strv_contains(l, s) (!!strv_find((l), (s)))
 
 static inline bool size_multiply_overflow(size_t size, size_t need) {
-        return need != 0 && size > (SIZE_MAX / need);
+        return _unlikely_(need != 0 && size > (SIZE_MAX / need));
 }
 
 _malloc_  _alloc_(1, 2) static inline void *malloc_multiply(size_t size, size_t need) {
-        if (_unlikely_(size_multiply_overflow(size, need)))
+        if (size_multiply_overflow(size, need))
+                return NULL;
+
+        return malloc(size * need ?: 1);
+}
+
+#if !HAVE_REALLOCARRAY
+_alloc_(2, 3) static inline void *reallocarray(void *p, size_t need, size_t size) {
+        if (size_multiply_overflow(size, need))
                 return NULL;
 
-        return malloc(size * need);
+        return realloc(p, size * need ?: 1);
 }
+#endif
 
-_alloc_(2, 3) static inline void *realloc_multiply(void *p, size_t size, size_t need) {
-        if (_unlikely_(size_multiply_overflow(size, need)))
+_alloc_(2, 3) static inline void *memdup_multiply(const void *p, size_t size, size_t need) {
+        if (size_multiply_overflow(size, need))
                 return NULL;
 
-        return realloc(p, size * need);
+        return memdup(p, size * need);
 }
 
+#define free_and_replace(a, b)                  \
+        ({                                      \
+                free(a);                        \
+                (a) = (b);                      \
+                (b) = NULL;                     \
+                0;                              \
+        })
+
 #define STRV_FOREACH(s, l)                      \
         for ((s) = (l); (s) && *(s); (s)++)
 
@@ -780,4 +818,13 @@ int read_line(FILE *f, size_t limit, char **ret);
 char *delete_trailing_chars(char *s, const char *bad);
 char *strstrip(char *s);
 
+#define CMP(a, b) __CMP(UNIQ, (a), UNIQ, (b))
+#define __CMP(aq, a, bq, b)                             \
+        ({                                              \
+                const typeof(a) UNIQ_T(A, aq) = (a);    \
+                const typeof(b) UNIQ_T(B, bq) = (b);    \
+                UNIQ_T(A, aq) < UNIQ_T(B, bq) ? -1 :    \
+                UNIQ_T(A, aq) > UNIQ_T(B, bq) ? 1 : 0;  \
+        })
+
 #endif
diff --git a/test/test-cachunker-histogram.c b/test/test-cachunker-histogram.c
index 5f82232a..2b393fb0 100644
--- a/test/test-cachunker-histogram.c
+++ b/test/test-cachunker-histogram.c
@@ -41,7 +41,7 @@ static void* process(void *q) {
                 for (;;) {
                         size_t n;
 
-                        n = ca_chunker_scan(&t->chunker, p, l);
+                        n = ca_chunker_scan(&t->chunker, true, p, l);
                         if (n == (size_t) -1) {
                                 previous += l;
                                 break;
diff --git a/test/test-cachunker.c b/test/test-cachunker.c
index abacf37b..0ae75fed 100644
--- a/test/test-cachunker.c
+++ b/test/test-cachunker.c
@@ -1,8 +1,9 @@
 /* SPDX-License-Identifier: LGPL-2.1+ */
 
 #include <assert.h>
-#include <unistd.h>
+#include <endian.h>
 #include <fcntl.h>
+#include <unistd.h>
 
 #include "cachunker.h"
 #include "util.h"
@@ -62,7 +63,7 @@ static void test_chunk(void) {
                 for (;;) {
                         size_t k;
 
-                        k = ca_chunker_scan(&x, p, n);
+                        k = ca_chunker_scan(&x, true, p, n);
                         if (k == (size_t) -1) {
                                 acc += n;
                                 break;
@@ -142,11 +143,100 @@ static int test_set_size(void) {
         return 0;
 }
 
+static const ssize_t CUTMARK_BUFFER_SIZE = (1024*1024);
+
+static void test_cutmarks(void) {
+        struct CaChunker x = CA_CHUNKER_INIT;
+        struct CaCutmark marks[] = {
+                {
+                  .value = htole64(UINT64_C(0xEBCDABCDABCDABCF)),
+                  .mask  = htole64(UINT64_C(0xFFFFFFFFFFFFFFFF)),
+                  .delta = -8,
+                },
+                {
+                  .value = htole64(UINT64_C(0x00EFFEFFEE000000)),
+                  .mask  = htole64(UINT64_C(0x00FFFFFFFF000000)),
+                  .delta = -2,
+                },
+                {
+                  .value = htole64(UINT64_C(0x1122113311441155)),
+                  .mask  = htole64(UINT64_C(0xFFFFFFFFFFFFFFFF)),
+                  .delta = 3,
+                },
+
+        };
+
+        _cleanup_free_ uint8_t *buffer = NULL;
+        uint8_t *p;
+        size_t n, offset = 0, i;
+        unsigned found = 0;
+        uint32_t z;
+
+        x.cutmarks = marks;
+        x.n_cutmarks = ELEMENTSOF(marks);
+
+        buffer = new0(uint8_t, CUTMARK_BUFFER_SIZE);
+        assert_se(buffer);
+
+        /* Fill an array with (constant) rubbish */
+        srand(0);
+        for (i = 0; i < (size_t) CUTMARK_BUFFER_SIZE; i++)
+                buffer[i] = (uint8_t) rand();
+
+        /* Insert the cutmarks at five places */
+        z = le64toh(marks[1].value) >> 24;
+        memcpy(buffer + 444, &marks[0].value, 8);
+        memcpy(buffer + CUTMARK_BUFFER_SIZE/3-9, &z, 4);
+        memcpy(buffer + CUTMARK_BUFFER_SIZE/2+7, &marks[0].value, 8);
+        memcpy(buffer + CUTMARK_BUFFER_SIZE/3*2+5, &marks[2].value, 8);
+        memcpy(buffer + CUTMARK_BUFFER_SIZE - 333, &z, 4);
+
+        p = buffer;
+        n = CUTMARK_BUFFER_SIZE;
+
+        for (;;) {
+                size_t k;
+                ssize_t cm;
+
+                k = ca_chunker_scan(&x, true, p, n);
+
+                if (k == (size_t) -1) {
+                        offset += n;
+                        p += n;
+                        n = 0;
+                } else {
+                        offset += k;
+                        p += k;
+                        n -= k;
+                }
+
+                cm = offset - x.last_cutmark;
+
+                if (cm == 444)
+                        found |= 1;
+                else if (cm == CUTMARK_BUFFER_SIZE/3-9+3)
+                        found |= 2;
+                else if (cm == CUTMARK_BUFFER_SIZE/2+7)
+                        found |= 4;
+                else if (cm == CUTMARK_BUFFER_SIZE/3*2+5+8+3)
+                        found |= 8;
+                else if (cm == CUTMARK_BUFFER_SIZE - 333+3)
+                        found |= 16;
+
+                if (n == 0)
+                        break;
+        }
+
+        /* Make sure we found all three cutmarks */
+        assert_se(found == 31);
+}
+
 int main(int argc, char *argv[]) {
 
         test_rolling();
         test_chunk();
         test_set_size();
+        test_cutmarks();
 
         return 0;
 }
diff --git a/test/test-cacutmark.c b/test/test-cacutmark.c
new file mode 100644
index 00000000..973b6ee1
--- /dev/null
+++ b/test/test-cacutmark.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
+
+#include "cacutmark.h"
+#include "util.h"
+
+static void test_cutmark_parse_one(const char *s, int ret, uint64_t value, uint64_t mask, int64_t delta) {
+        CaCutmark c = {};
+
+        assert_se(ca_cutmark_parse(&c, s) == ret);
+
+        assert_se(c.value == value);
+        assert_se(c.mask == mask);
+        assert_se(c.delta == delta);
+}
+
+static void test_cutmark_parse(void) {
+        test_cutmark_parse_one("aaaaa", 0, 0xaaaaaU, 0xfffffU, 0);
+        test_cutmark_parse_one("0/1", 0, 0, 1, 0);
+        test_cutmark_parse_one("ff/ff+99", 0, 0xffU, 0xffU, 99);
+        test_cutmark_parse_one("ff/ff-99", 0, 0xffU, 0xffU, -99);
+        test_cutmark_parse_one("abc+99", 0, 0xabcU, 0xfffU, 99);
+        test_cutmark_parse_one("abc-99", 0, 0xabcU, 0xfffU, -99);
+        test_cutmark_parse_one("abc/eee", 0, 0xabcU, 0xeeeU, 0);
+        test_cutmark_parse_one("abcdef0123456789/123456789abcdef0+2147483647", 0, 0xabcdef0123456789U, 0x123456789abcdef0U, 2147483647);
+        test_cutmark_parse_one("abcdef0123456789/123456789abcdef0-2147483648", 0, 0xabcdef0123456789U, 0x123456789abcdef0U, -2147483648);
+        test_cutmark_parse_one("abcdef0123456789/123456789abcdef0+2147483647", 0, 0xabcdef0123456789U, 0x123456789abcdef0U, 2147483647);
+        test_cutmark_parse_one("abcdef0123456789/123456789abcdef0+9223372036854775807", 0, 0xabcdef0123456789U, 0x123456789abcdef0U, INT64_MAX);
+        test_cutmark_parse_one("abcdef0123456789/123456789abcdef0-9223372036854775808", 0, 0xabcdef0123456789U, 0x123456789abcdef0U, INT64_MIN);
+        test_cutmark_parse_one("1000000000000000/1000000000000000+0", 0, 0x1000000000000000U, 0x1000000000000000U, 0);
+        test_cutmark_parse_one("1000000000000000/1000000000000000-0", 0, 0x1000000000000000U, 0x1000000000000000U, 0);
+
+        test_cutmark_parse_one("", -EINVAL, 0, 0, 0);
+        test_cutmark_parse_one("fg", -EINVAL, 0, 0, 0);
+        test_cutmark_parse_one("/", -EINVAL, 0, 0, 0);
+        test_cutmark_parse_one("/f", -EINVAL, 0, 0, 0);
+        test_cutmark_parse_one("+", -EINVAL, 0, 0, 0);
+        test_cutmark_parse_one("+1", -EINVAL, 0, 0, 0);
+        test_cutmark_parse_one("-", -EINVAL, 0, 0, 0);
+        test_cutmark_parse_one("-1", -EINVAL, 0, 0, 0);
+        test_cutmark_parse_one("0/0", -EINVAL, 0, 0, 0);
+
+        test_cutmark_parse_one("10000000000000000", -EOVERFLOW, 0, 0, 0);
+        test_cutmark_parse_one("0/10000000000000000", -EOVERFLOW, 0, 0, 0);
+        test_cutmark_parse_one("abcdef0123456789/123456789abcdef0+9223372036854775808", -EOVERFLOW, 0, 0, 0);
+        test_cutmark_parse_one("abcdef0123456789/123456789abcdef0-9223372036854775809", -EOVERFLOW, 0, 0, 0);
+}
+
+int main(int argc, char *argv[]) {
+
+        test_cutmark_parse();
+
+        return 0;
+}
diff --git a/test/test-seed.sh.in b/test/test-seed.sh.in
new file mode 100755
index 00000000..a059371f
--- /dev/null
+++ b/test/test-seed.sh.in
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+set -ex
+
+SCRATCH_DIR=${TMPDIR:-/var/tmp}/test-casync.$RANDOM
+mkdir -p $SCRATCH_DIR/src
+
+mkdir $SCRATCH_DIR/src/casync
+if [ $UID = 0 ]; then
+    cp -a @top_srcdir@/{test-files,src} $SCRATCH_DIR/src/casync/
+else
+    # If we lack privileges we use rsync rather than cp to copy, as it will just skip over device nodes
+    rsync -a --exclude=.cacac @top_srcdir@/{test-files,src} $SCRATCH_DIR/src/casync/
+fi
+
+cd $SCRATCH_DIR/src
+
+@top_builddir@/casync make -v $SCRATCH_DIR/test.caidx
+@top_builddir@/casync extract -v $SCRATCH_DIR/test.caidx $SCRATCH_DIR/first
+@top_builddir@/casync extract -v --seed=$SCRATCH_DIR/src $SCRATCH_DIR/test.caidx $SCRATCH_DIR/second
+
+# Now, let's flush out the chunk store. If everything works correctly, the seed
+# should be sufficient as a source for chunks
+rm -rf $SCRATCH_DIR/default.castr/*
+
+@top_builddir@/casync extract -v --seed=$SCRATCH_DIR/src $SCRATCH_DIR/test.caidx $SCRATCH_DIR/third