Created
May 16, 2026 18:17
-
-
Save mhenrixon/633db800344402d5ccfb305fac6d88f5 to your computer and use it in GitHub Desktop.
Dalli string_fastpath/compression flag collision repro (petergoldstein/dalli#1086)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| # Reproduces the Dalli string_fastpath / compression bitflag collision | |
| # (https://github.com/petergoldstein/dalli/pull/1086). | |
| # | |
| # Mechanism (read directly from dalli-5.0.2 source): | |
| # | |
| # ValueSerializer::FLAG_UTF8 = 0x2 | |
| # ValueCompressor::FLAG_COMPRESSED = 0x2 # SAME BIT. | |
| # | |
| # Trigger: any caller passes `string_fastpath: true` AND the value is a String | |
| # AND compression is enabled (Dalli's default >=4.0 for values over min_size). | |
| # On store, the UTF8 flag and COMPRESSED flag are OR'd into the same bit, so | |
| # the bit ends up set whether or not compression actually happened. On read | |
| # the decompression path sees the bit and either: | |
| # (a) tries to decompress raw UTF-8 bytes -> Zlib::DataError -> | |
| # Dalli::UnmarshalError ("Unable to uncompress value: incorrect header check") | |
| # (b) skips decompression on real compressed bytes -> mojibake to the app | |
| # | |
| # Gemfile (parameterized by env var): | |
| # | |
| # source "https://rubygems.org" | |
| # | |
| # if ENV["DALLI_VERSION"] == "byroot-fix" | |
| # gem "dalli", git: "https://github.com/byroot/dalli.git", branch: "fix-string-fastpath" | |
| # else | |
| # gem "dalli", ENV.fetch("DALLI_VERSION", "5.0.2") | |
| # end | |
| # gem "connection_pool" | |
| # | |
| # Run against any local memcached >=1.5 (tested on 1.6.41): | |
| # | |
| # DALLI_VERSION=3.2.8 bundle update dalli --quiet && bundle exec ruby repro.rb | |
| # DALLI_VERSION=4.2.0 bundle update dalli --quiet && bundle exec ruby repro.rb | |
| # DALLI_VERSION=5.0.2 bundle update dalli --quiet && bundle exec ruby repro.rb | |
| # DALLI_VERSION=byroot-fix bundle update dalli --quiet && bundle exec ruby repro.rb | |
| require "bundler/setup" | |
| require "dalli" | |
| require "zlib" | |
| puts "Dalli version: #{Dalli::VERSION}" | |
| puts | |
| opts = { | |
| compress: true, | |
| compression_min_size: 1024, | |
| silence_marshal_warning: true, | |
| } | |
| writer = Dalli::Client.new("localhost:11211", **opts) | |
| reader = Dalli::Client.new("localhost:11211", **opts) | |
| writer.flush | |
| sleep 0.1 | |
| failures = 0 | |
| total = 0 | |
| def check(label, expected) | |
| got = | |
| begin | |
| yield | |
| rescue => e | |
| return puts(" FAIL #{label}: RAISED #{e.class}: #{e.message[0, 100]}") || false | |
| end | |
| ok = (got == expected) | |
| status = ok ? "OK " : "FAIL" | |
| detail = | |
| if ok | |
| "#{got.bytesize}B" | |
| elsif got.nil? | |
| "got nil (silent miss)" | |
| elsif got.is_a?(String) | |
| head = got.bytes.first(6).inspect | |
| enc = got.encoding | |
| zlib = got.bytesize >= 2 && got.getbyte(0) == 0x78 ? " RAW ZLIB STREAM" : "" | |
| "got #{got.bytesize}B enc=#{enc} head=#{head}#{zlib}" | |
| else | |
| "got #{got.class}: #{got.inspect[0, 80]}" | |
| end | |
| puts " #{status} #{label}: #{detail}" | |
| ok | |
| end | |
| # --------------------------------------------------------------------------- | |
| # Scenario 1: small UTF-8 + string_fastpath (under compress threshold) | |
| # Fastpath sets FLAG_UTF8 = 0x2. Reader's compressor sees 0x2 and tries to | |
| # Zlib.inflate the raw UTF-8 bytes -> raises. | |
| # --------------------------------------------------------------------------- | |
| puts "Scenario 1: small UTF-8 + string_fastpath (under compress threshold)" | |
| small_utf8 = "hello-world-string" | |
| writer.set("s1", small_utf8, 60, string_fastpath: true) | |
| total += 1; failures += 1 unless check("s1 same-client", small_utf8) { writer.get("s1") } | |
| total += 1; failures += 1 unless check("s1 cross-client", small_utf8) { reader.get("s1") } | |
| # --------------------------------------------------------------------------- | |
| # Scenario 2: large UTF-8 + string_fastpath + compress. | |
| # Both bits collapse to 0x2. Same-client round-trip "passes" because the | |
| # buggy logic is symmetric, but the on-wire flag is ambiguous. | |
| # --------------------------------------------------------------------------- | |
| puts "Scenario 2: large UTF-8 + string_fastpath + compress" | |
| big_utf8 = ("héllo-вселенная-🌍 " * 600).b.force_encoding("UTF-8").freeze | |
| writer.set("s2", big_utf8, 60, string_fastpath: true) | |
| total += 1; failures += 1 unless check("s2 same-client", big_utf8) { writer.get("s2") } | |
| total += 1; failures += 1 unless check("s2 cross-client", big_utf8) { reader.get("s2") } | |
| # --------------------------------------------------------------------------- | |
| # Scenario 3: large BINARY + string_fastpath + compress. | |
| # Fastpath leaves bit at 0x0 for BINARY; compressor sets 0x2. On read the | |
| # serializer sees bit 0x2 cleared (compressor consumed it) and applies the | |
| # wrong default branch -> returns bytes mis-tagged as UTF-8 instead of BINARY. | |
| # Silent encoding corruption. | |
| # --------------------------------------------------------------------------- | |
| puts "Scenario 3: large BINARY + string_fastpath + compress" | |
| big_bin = ("\xFFpayload" * 2000).b.freeze | |
| writer.set("s3", big_bin, 60, string_fastpath: true) | |
| total += 1; failures += 1 unless check("s3 same-client", big_bin) { writer.get("s3") } | |
| total += 1; failures += 1 unless check("s3 cross-client", big_bin) { reader.get("s3") } | |
| # --------------------------------------------------------------------------- | |
| # Scenario 4: rolling-deploy mismatch -- writer opts into fastpath, reader is | |
| # a fresh client with defaults. | |
| # --------------------------------------------------------------------------- | |
| puts "Scenario 4: writer uses fastpath, reader is default (rolling-deploy)" | |
| plain_reader = Dalli::Client.new("localhost:11211", | |
| compress: true, | |
| compression_min_size: 1024, | |
| silence_marshal_warning: true) | |
| writer.set("s4", big_utf8, 60, string_fastpath: true) | |
| total += 1; failures += 1 unless check("s4 fresh-client read", big_utf8) { plain_reader.get("s4") } | |
| # --------------------------------------------------------------------------- | |
| # Scenario 5: NOT A DALLI BUG -- included as a sanity reminder. | |
| # `raw: true` skips serialization on write; round-trips fine through Dalli | |
| # but obviously can't be Marshal.load'd. Always "fails" Marshal step on every | |
| # version. Left in to make output identical across patched/unpatched runs | |
| # except for the collision rows. | |
| # --------------------------------------------------------------------------- | |
| puts "Scenario 5: raw: true round-trip + Marshal sanity (not a Dalli bug)" | |
| writer.set("s5", big_utf8, 60, raw: true) | |
| got_raw = nil | |
| total += 1; failures += 1 unless check("s5 raw round-trip", big_utf8) { got_raw = writer.get("s5") } | |
| if got_raw | |
| begin | |
| Marshal.load(got_raw.to_s) | |
| rescue => e | |
| puts " -- s5 marshal: #{e.class} (expected; raw bytes are not Marshal payloads)" | |
| end | |
| end | |
| # --------------------------------------------------------------------------- | |
| # Scenario 6: 8 threads x 50 ops, half use string_fastpath. | |
| # Reveals interaction under concurrency. | |
| # --------------------------------------------------------------------------- | |
| puts "Scenario 6: 8 threads x 50 ops, half use fastpath" | |
| mismatches = 0 | |
| mtx = Mutex.new | |
| threads = 8.times.map do |i| | |
| Thread.new do | |
| 50.times do |j| | |
| key = "c:#{i}:#{j}" | |
| val = ("payload-#{i}-#{j}-" * 400).freeze | |
| o = (i.even? ? { string_fastpath: true } : {}) | |
| begin | |
| writer.set(key, val, 60, **o) | |
| back = reader.get(key) | |
| mtx.synchronize { mismatches += 1 } unless back == val | |
| rescue | |
| mtx.synchronize { mismatches += 1 } | |
| end | |
| end | |
| end | |
| end | |
| threads.each(&:join) | |
| total += 400; failures += mismatches | |
| puts " #{mismatches.zero? ? 'OK ' : 'FAIL'} concurrent: #{mismatches}/400 mismatches" | |
| puts | |
| puts "=" * 64 | |
| if failures.zero? | |
| puts "RESULT: PASS (#{total} ops, 0 failures) -- Dalli #{Dalli::VERSION} safe" | |
| else | |
| puts "RESULT: FAIL (#{failures}/#{total} broken) -- Dalli #{Dalli::VERSION} BUGGY" | |
| end | |
| puts "=" * 64 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment