PERSISTENT
DATA STRUCTURES
PERSISTENT
values persist after modification
DATA STRUCTURES
IMMUTABLE
DATA STRUCTURES
            
               #include <immer/vector.hpp>

               const auto a = immer::vector<int>{1, 2, 3};
            
        
            
               #include <immer/vector.hpp>

               const auto a = immer::vector<int>{1, 2, 3};
               const auto b = a.push_back(4);

               assert(a.size() == 3 && b.size() == 4);
            
        
            
               #include <immer/vector.hpp>

               const auto a = immer::vector<int>{1, 2, 3};
               const auto b = a.push_back(4);
               const auto c = b.set(0, 42);

               assert(a.size() == 3 && b.size() == 4);
               assert(a[0] == 1 && c[0] == 42);
            
        
            
               template <typename T>
               class immer::vector {
                  std::vector<T> data;

                  vector push_back(this vector self, T value) {
                      self.data.push_back(value);
                      return self;
                  }

                  vector set(this vector self, size_t idx, T value) {
                      self.data[idx] = std::move(value);
                      return self;
                  }
                  ...
               };
            
          

structural sharing

Radix Balanced Tree

  • branching factor: M = 2B
  • B = 5 [M = 32]   ⇒   effective O(1)
  • cache efficient
    iteration time comparable to std::vector
  • relaxed RRB   ⇒   O(log(n)) concatenation
    all new kinds of parallel algorithms
  • not amortized   ⇒   constant latency

Hash Array Mapped Tries

  • efficient tree structure for unordered maps
  • characterized also by B = 5 [M = 32]
  • no rehashing   ⇒   constant latency
  • compact
    50% smaller than std::unordered_map

VALUE SEMANTICS
AT SCALE

  1. concurrent programming
  2. reasoning about change
  3. time travel
              
              auto doc = document{};
              auto mutex = std::mutex{};

              std::thread{[&doc, &mutex] {
                 std::lock_guard<std::mutex> lock{mutex};
                 save_to_disk(doc, "file.doc");
              });
              
            
  1. concurrent programming
  2. reasoning about change
  3. time travel
              
              auto doc = document{};

              std::thread{[doc] {
                 save_to_disk(doc, "file.doc");
              });
              
            
  1. concurrent programming
  2. reasoning about change
  3. time travel
  1. concurrent programming
  2. reasoning about change
  3. time travel
              
              auto doc = document{};
              doc.on_foo_changed = [] { ... };
              doc.on_bar_changed = [] { ... };

              doc.set_bar(100);

              // doesn't compose
              void complex_operation(document& doc) {
                  doc.set_foo(42); // trigger on_foo_changed
                  doc.set_bar(69); // trigger on_bar_changed
              }

              // broken invariants, wasteful updates!
              complex_operation(doc);
              
            
  1. concurrent programming
  2. reasoning about change
  3. time travel
              
              // value semantics do compose
              document complex_operation(document doc) {
                  doc.foo = 42;
                  doc.bar = 69;
                  return doc;
              }

              auto curr = document{};
              auto next = complex_operation(curr);

              // curr remains valid, it "persisted"!
              // also: comparing vectors is cheap!
              if (curr.foo != next.foo) {
                  ...
              }
              
            
  1. concurrent programming
  2. reasoning about change
  3. time travel
              
              // HAMT's support diffing in O(|change|))
              auto a = immer::map<int, std::string>{
                 {0, "foo"},
                 {1, "bar"}
              };

              auto b = a
                 .set(2, "baz")
                 .update(1, [](auto x) { return x + x; });

              immer::diff(a, b, [](auto x) {
                  print("added: {}", x);
              }, [](auto x) {
                  print("removed: {}", x);
              }, [](auto x, auto y) {
                  print("changed: {} -> {}", x, y);
              });
              
            
  1. concurrent programming
  2. reasoning about change
  3. time travel
  1. concurrent programming
  2. reasoning about change
  3. time travel
              
              struct undo_history {
                  vector<document> steps;
                  size_t current = 0;
              };
              
            

for purely educational purposes,
we turn our beautiful radix balanced trees into binary trees

            
              
            
          
              
               struct document {
                   vector<string> data;
               };

               // we'll use Cereal for serialization
               // https://github.com/USCiLab/cereal

               template <class Archive>
               void serialize(Archive& ar, document& ar) {
                   ar(cereal::make_nvp("data", data));
               }
              
            
              
              template <typename Archive, typename T>
              void load(Archive& ar, vector<T>& m) {
                  auto size = cereal::size_type{};
                  ar(cereal::make_size_tag(size));
                  m = {};
                  for (auto i = size_type{}; i < size; ++i) {
                      T x;
                      ar(x);
                      m = std::move(m).push_back(std::move(x));
                  }
              }

              template <typename Archive, typename T>
              void save(Archive& ar, const vector<T>& m) {
                  auto size = static_cast<size_type>(m.size());
                  ar(cereal::make_size_tag(size));
                  immer::for_each(m, ar);
              }
              
            
              
    const auto v1 = vector<string>{"a", "b", "c", "d"};
    const auto v2 = v1.push_back("e").push_back("f");
    const auto v3 = v2;

    const auto hist = vector<document>{{v1}, {v2}, {v3}};

    {
        auto ar = cereal::JSONOutputArchive{std::cout};
        ar(hist);
    }
              
            
              
{
  "value0": [{
      "data": ["a", "b", "c", "d"]
    }, {
      "data": ["a", "b", "c", "d", "e", "f"]
    }, {
      "data": ["a", "b", "c", "d", "e", "f"]
    }]
}
              
            

immer ::
persist

              
               struct document {
                   vector<std::string> data;
               };

               // Louis Dionne's Boost.Hana for reflection
               BOOST_HANA_ADAPT_STRUCT(document, data);

               // C++20 alternative: boost::prf
               // C++26 alternative: std::meta::info
              
            
              
               template <typename Archive, typename T>
                   requires boost::hana::Struct<T>::value
               void serialize(Archive& ar, T& v)
               {
                   using namespace boost;
                   hana::for_each(hana::keys(v), [&](auto&& k) {
                       ar(cereal::make_nvp(k.c_str(),
                                           hana::at_key(v, k)));
                   });
               }
              
            
              
    const auto v1 = vector<string>{"a", "b", "c", "d"};
    const auto v2 = v1.push_back("e").push_back("f");
    const auto v3 = v2;

    const auto hist = vector<document>{{v1}, {v2}, {v3}};

    using namespace immer::persist;
    cereal_save_with_pools<cereal::JSONOutputArchive>(
        std::cout,
        hist,
        hana_struct_auto_member_name_policy(hist));
              
            
              
{
  "value0": [{ "data": 0 },
             { "data": 1 },
             { "data": 1 }],
  "pools": {
    "data": {
      "B": 5, "BL": 1,
      "leaves": [
        [ 1, [ "c", "d" ] ],
        [ 2, [ "a", "b" ] ],
        [ 4, [ "e", "f" ] ],
      ],
      "inners": [
        [ 0, { "children": [ 2 ], "relaxed": false }],
        [ 3, { "children": [ 2, 1 ], "relaxed": false }]
      ],
      "vectors": [
        { "root": 0, "tail": 1 },
        { "root": 3, "tail": 4 }
      ]
    }
  }
}
              
            
            
struct my_policy {
    template <class T>
    auto get_pool_types(const T&) const {
        return boost::hana::tuple_t<vector<string>>;
    }

    auto get_pool_name(const vector<string>&) const {
        return "vectorz_of_stringz";
    }

    template <class Archive>
    void save(Archive& ar, const vector<document>& v) const {
        ar(cereal::make_nvp("history", v));
    }

    template <class Archive>
    void load(Archive& ar, vector<document>& v) const {
        ar(cereal::make_nvp("history", v));
    }
};
            
          
              
               namespace v1 {
               struct document {
                   vector<std::string> data;
               };
               }

               namespace v2 {
               struct document {
                   vector<char> data;
               };
               }

               BOOST_HANA_ADAPT_STRUCT(
                   v1::document, data);
               BOOST_HANA_ADAPT_STRUCT(
                   v2::document, data);
              
            
              
        namespace hana = boost::hana;

        const auto xform = hana::make_map(
            hana::make_pair(
                hana::type_c<vector<std::string>>,
                [] (std::string v) -> char {
                     return v.empty() ? '\0' : v[0];
                }));
              
            
              
        const auto v1 = vector<string>{"a", "b", "c", "d"};
        const auto v2 = v1.push_back("e").push_back("f");
        const auto v3 = v2;
        const auto history = vector<v1::document>{{v1}, {v2}, {v3}};

        const auto p0 = get_output_pools(history);
        auto p1       = transform_output_pool(p0, xform);

        auto view = history | std::views::transform([&](v1::document x) {
           auto data = convert_container(p0, p1, x.data);
           return v2::document{data};
        });

        return vector<v2::document>{view.begin(), view.end()};
              
            
              
{
  "value0": [{ "data": 0 },
             { "data": 1 },
             { "data": 1 }],
  "pools": {
    "data": {
      "B": 5, "BL": 1,
      "leaves": [
        [ 1, [ 99, 100 ] ],
        [ 2, [ 97, 98 ] ],
        [ 4, [ 101, 102 ] ],
      ],
      "inners": [
        [ 0, { "children": [ 2 ], "relaxed": false }],
        [ 3, { "children": [ 2, 1 ], "relaxed": false }]
      ],
      "vectors": [
        { "root": 0, "tail": 1 },
        { "root": 3, "tail": 4 }
      ]
    }
  }
}
              
            

More stuff

  • Hash Array Mapped Tries work
    • use a portable hashing function for serialization (immer::persist::xx_hash)
    • converting key types supported, but be careful!
  • Complex data models work
    • All containers of same type get consolidated in a single pool
    • Recursive data models work!

TO-DO

  • Incremental processing
  • Binary formats
  • Compact representation
  • Cereal independence
  • Build time, object size

Thanks Alex!

Alex Shabalin
linkedin.com/in/a-shabalin

takeaways

structural sharing
enable value semantics
at scale

immer::persist
serialize and transform shared structure

HELP US!

CppCon'17
Postmodern immutable data structures
https://youtu.be/sPhpelUfu8Q

CppCon'19
The most valuable values
https://youtu.be/_oBx_NbLghY

CppCon'21
Squaring the circle
https://youtu.be/e2-FRFEx8CA

Thank you!