之前排查过Redisson RMapCache无法读取缓存bug,在这里记录一下当时排查的思路和具体原因
测试代码
测试redisson版本为3.13.1,先看测试代码
@Slf4j
@SpringBootTest
public class RedissonTest {
@Autowired
private RedissonClient redissonClient;
@Test
public void test() {
RMapCache<String, String> testRMapCache = redissonClient.getMapCache("testRMapCache");
testRMapCache.fastPut("key", "value", 3, TimeUnit.SECONDS);
try {
//等缓存过期
Thread.sleep(3100);
} catch (InterruptedException e) {
e.printStackTrace();
}
// 往缓存写入另外一个值
testRMapCache.put("key", "anotherValue");
String result = testRMapCache.get("key");
// 尝试立刻读缓存,有可能获取到一个null,非必现
System.out.println("result: " + result);
}
}
本地测试还挺容易复现的,最后一步重新读取缓存的时候,直接读取到一个null
redis monitor 监听
这里先开启redis monitor 先看看具体执行了什么脚本(单次运行完需要清理redis中所有关联key,避免redisson定时任务的干扰)
127.0.0.1:6379> MONITOR
OK
// fastput执行的脚本
1746173814.962993 [0 192.168.188.1:39952] "EVAL" "local insertable = false; local value = redis.call('hget', KEYS[1], ARGV[5]); local t, val;if value == false then insertable = true; else t, val = struct.unpack('dLc0', value); local expireDate = 92233720368547758; local expireDateScore = redis.call('zscore', KEYS[2], ARGV[5]); if expireDateScore ~= false then expireDate = tonumber(expireDateScore) end; if t ~= 0 then local expireIdle = redis.call('zscore', KEYS[3], ARGV[5]); if expireIdle ~= false then expireDate = math.min(expireDate, tonumber(expireIdle)) end; end; if expireDate <= tonumber(ARGV[1]) then insertable = true; end; end; if tonumber(ARGV[2]) > 0 then redis.call('zadd', KEYS[2], ARGV[2], ARGV[5]); else redis.call('zrem', KEYS[2], ARGV[5]); end; if tonumber(ARGV[3]) > 0 then redis.call('zadd', KEYS[3], ARGV[3], ARGV[5]); else redis.call('zrem', KEYS[3], ARGV[5]); end; local maxSize = tonumber(redis.call('hget', KEYS[8], 'max-size')); if maxSize ~= nil and maxSize ~= 0 then local currentTime = tonumber(ARGV[1]); local lastAccessTimeSetName = KEYS[6]; redis.call('zadd', lastAccessTimeSetName, currentTime, ARGV[5]); local cacheSize = tonumber(redis.call('hlen', KEYS[1])); if cacheSize >= maxSize then local lruItems = redis.call('zrange', lastAccessTimeSetName, 0, cacheSize - maxSize); for index, lruItem in ipairs(lruItems) do if lruItem and lruItem ~= ARGV[5] then local lruItemValue = redis.call('hget', KEYS[1], lruItem); redis.call('hdel', KEYS[1], lruItem); redis.call('zrem', KEYS[2], lruItem); redis.call('zrem', KEYS[3], lruItem); redis.call('zrem', lastAccessTimeSetName, lruItem); if lruItemValue ~= false then local removedChannelName = KEYS[7]; local msg = struct.pack('Lc0Lc0', string.len(lruItem), lruItem, string.len(lruItemValue), lruItemValue); redis.call('publish', removedChannelName, msg); end; end; end; end; end; local value = struct.pack('dLc0', ARGV[4], string.len(ARGV[6]), ARGV[6]); redis.call('hset', KEYS[1], ARGV[5], value); if insertable == true then local msg = struct.pack('Lc0Lc0', string.len(ARGV[5]), ARGV[5], string.len(ARGV[6]), ARGV[6]); redis.call('publish', KEYS[4], msg); return 1;else local msg = struct.pack('Lc0Lc0Lc0', string.len(ARGV[5]), ARGV[5], string.len(ARGV[6]), ARGV[6], string.len(val), val); redis.call('publish', KEYS[5], msg); return 0;end;" "8" "testRMapCache" "redisson__timeout__set:{testRMapCache}" "redisson__idle__set:{testRMapCache}" "redisson_map_cache_created:{testRMapCache}" "redisson_map_cache_updated:{testRMapCache}" "redisson__map_cache__last_access__set:{testRMapCache}" "redisson_map_cache_removed:{testRMapCache}" "{testRMapCache}:redisson_options" "1746173814893" "1746173817870" "0" "0" "\x04>\x03key" "\x04>\x05value"
1746173814.963279 [0 lua] "hget" "testRMapCache" "\x04>\x03key"
1746173814.963295 [0 lua] "zadd" "redisson__timeout__set:{testRMapCache}" "1746173817870" "\x04>\x03key"
1746173814.963315 [0 lua] "zrem" "redisson__idle__set:{testRMapCache}" "\x04>\x03key"
1746173814.963346 [0 lua] "hget" "{testRMapCache}:redisson_options" "max-size"
1746173814.963361 [0 lua] "hset" "testRMapCache" "\x04>\x03key" "\x00\x00\x00\x00\x00\x00\x00\x00\b\x00\x00\x00\x00\x00\x00\x00\x04>\x05value"
1746173814.963373 [0 lua] "publish" "redisson_map_cache_created:{testRMapCache}" "\x06\x00\x00\x00\x00\x00\x00\x00\x04>\x03key\b\x00\x00\x00\x00\x00\x00\x00\x04>\x05value"
// put运行的脚本
1746173818.078906 [0 192.168.188.1:39914] "EVAL" "local v = redis.call('hget', KEYS[1], ARGV[2]);local exists = false;if v ~= false then local t, val = struct.unpack('dLc0', v); local expireDate = 92233720368547758; local expireDateScore = redis.call('zscore', KEYS[2], ARGV[2]); if expireDateScore ~= false then expireDate = tonumber(expireDateScore) end; if t ~= 0 then local expireIdle = redis.call('zscore', KEYS[3], ARGV[2]); if expireIdle ~= false then expireDate = math.min(expireDate, tonumber(expireIdle)) end; end; if expireDate > tonumber(ARGV[1]) then exists = true; end;end;local value = struct.pack('dLc0', 0, string.len(ARGV[3]), ARGV[3]);redis.call('hset', KEYS[1], ARGV[2], value);local currentTime = tonumber(ARGV[1]);local lastAccessTimeSetName = KEYS[6];local maxSize = tonumber(redis.call('hget', KEYS[8], 'max-size'));if exists == false then if maxSize ~= nil and maxSize ~= 0 then redis.call('zadd', lastAccessTimeSetName, currentTime, ARGV[2]); local cacheSize = tonumber(redis.call('hlen', KEYS[1])); if cacheSize > maxSize then local lruItems = redis.call('zrange', lastAccessTimeSetName, 0, cacheSize - maxSize - 1); for index, lruItem in ipairs(lruItems) do if lruItem and lruItem ~= ARGV[2] then local lruItemValue = redis.call('hget', KEYS[1], lruItem); redis.call('hdel', KEYS[1], lruItem); redis.call('zrem', KEYS[2], lruItem); redis.call('zrem', KEYS[3], lruItem); redis.call('zrem', lastAccessTimeSetName, lruItem); if lruItemValue ~= false then local removedChannelName = KEYS[7]; local msg = struct.pack('Lc0Lc0', string.len(lruItem), lruItem, string.len(lruItemValue), lruItemValue); redis.call('publish', removedChannelName, msg);end; end; end end; end; local msg = struct.pack('Lc0Lc0', string.len(ARGV[2]), ARGV[2], string.len(ARGV[3]), ARGV[3]); redis.call('publish', KEYS[4], msg); return nil;else if maxSize ~= nil and maxSize ~= 0 then redis.call('zadd', lastAccessTimeSetName, currentTime, ARGV[2]); end;end;local t, val = struct.unpack('dLc0', v);local msg = struct.pack('Lc0Lc0Lc0', string.len(ARGV[2]), ARGV[2], string.len(ARGV[3]), ARGV[3], string.len(val), val);redis.call('publish', KEYS[5], msg);return val;" "8" "testRMapCache" "redisson__timeout__set:{testRMapCache}" "redisson__idle__set:{testRMapCache}" "redisson_map_cache_created:{testRMapCache}" "redisson_map_cache_updated:{testRMapCache}" "redisson__map_cache__last_access__set:{testRMapCache}" "redisson_map_cache_removed:{testRMapCache}" "{testRMapCache}:redisson_options" "1746173818075" "\x04>\x03key" "\x04>\x0canotherValue"
1746173818.079148 [0 lua] "hget" "testRMapCache" "\x04>\x03key"
1746173818.079162 [0 lua] "zscore" "redisson__timeout__set:{testRMapCache}" "\x04>\x03key"
1746173818.079181 [0 lua] "hset" "testRMapCache" "\x04>\x03key" "\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x04>\x0canotherValue"
1746173818.079190 [0 lua] "hget" "{testRMapCache}:redisson_options" "max-size"
1746173818.079197 [0 lua] "publish" "redisson_map_cache_created:{testRMapCache}" "\x06\x00\x00\x00\x00\x00\x00\x00\x04>\x03key\x0f\x00\x00\x00\x00\x00\x00\x00\x04>\x0canotherValue"
// get运行的脚本
1746173818.088388 [0 192.168.188.1:39882] "EVAL" "local value = redis.call('hget', KEYS[1], ARGV[2]); if value == false then return nil; end; local t, val = struct.unpack('dLc0', value); local expireDate = 92233720368547758; local expireDateScore = redis.call('zscore', KEYS[2], ARGV[2]); if expireDateScore ~= false then expireDate = tonumber(expireDateScore) end; if t ~= 0 then local expireIdle = redis.call('zscore', KEYS[3], ARGV[2]); if expireIdle ~= false then if tonumber(expireIdle) > tonumber(ARGV[1]) then redis.call('zadd', KEYS[3], t + tonumber(ARGV[1]), ARGV[2]); end; expireDate = math.min(expireDate, tonumber(expireIdle)) end; end; if expireDate <= tonumber(ARGV[1]) then return nil; end; local maxSize = tonumber(redis.call('hget', KEYS[5], 'max-size')); if maxSize ~= nil and maxSize ~= 0 then redis.call('zadd', KEYS[4], tonumber(ARGV[1]), ARGV[2]); end; return val; " "5" "testRMapCache" "redisson__timeout__set:{testRMapCache}" "redisson__idle__set:{testRMapCache}" "redisson__map_cache__last_access__set:{testRMapCache}" "{testRMapCache}:redisson_options" "1746173818080" "\x04>\x03key"
1746173818.088480 [0 lua] "hget" "testRMapCache" "\x04>\x03key"
1746173818.088491 [0 lua] "zscore" "redisson__timeout__set:{testRMapCache}" "\x04>\x03key"分析一下lua脚本
fastput 核心lua脚本
KEYS[1] = "testRMapCache" -- redis hash key
KEYS[2] = "redisson__timeout__set:{testRMapCache}" -- TTL 过期时间有序集合
KEYS[3] = "redisson__idle__set:{testRMapCache}" -- Idle 过期时间有序集合
KEYS[4] = "redisson_map_cache_created:{testRMapCache}" -- 创建事件频道
KEYS[5] = "redisson_map_cache_updated:{testRMapCache}" -- 更新事件频道
KEYS[6] = "redisson__map_cache__last_access__set:{testRMapCache}" -- 最后访问时间集合(LRU)
KEYS[7] = "redisson_map_cache_removed:{testRMapCache}" -- 移除事件频道
KEYS[8] = "{testRMapCache}:redisson_options" -- 缓存配置(如 max-size)
ARGV[1] = "1746173814893" -- 当前时间戳
ARGV[2] = "1746173817870" -- TTL 过期时间
ARGV[3] = "0" -- Idle 过期时间
ARGV[4] = "0" -- 最大闲置时间
ARGV[5] = "\x04>\x03key" -- 键名
ARGV[6] = "\x04>\x05value" -- 值
-- 是否是新增的,用于后面的pubsub通知
local insertable = false;
local value = redis.call('hget', KEYS[1], ARGV[5]);
local t, val;
if value == false then
insertable = true;
else
t, val = struct.unpack('dLc0', value);
local expireDate = 92233720368547758;
local expireDateScore = redis.call('zscore', KEYS[2], ARGV[5]);
if expireDateScore ~= false then expireDate = tonumber(expireDateScore) end
if t ~= 0 then
local expireIdle = redis.call('zscore', KEYS[3], ARGV[5]);
if expireIdle ~= false then
expireDate = math.min(expireDate, tonumber(expireIdle))
end
end
if expireDate <= tonumber(ARGV[1]) then insertable = true; end
end
-- 写入ttl过期时间
if tonumber(ARGV[2]) > 0 then
redis.call('zadd', KEYS[2], ARGV[2], ARGV[5]);
else
redis.call('zrem', KEYS[2], ARGV[5]);
end
-- 写入idle过期时间
if tonumber(ARGV[3]) > 0 then
redis.call('zadd', KEYS[3], ARGV[3], ARGV[5]);
else
redis.call('zrem', KEYS[3], ARGV[5]);
end
-- 测试代码不涉及最大容量,这部分lru淘汰策略不执行,和bug无关
local maxSize = tonumber(redis.call('hget', KEYS[8], 'max-size'));
if maxSize ~= nil and maxSize ~= 0 then
local currentTime = tonumber(ARGV[1]);
local lastAccessTimeSetName = KEYS[6];
redis.call('zadd', lastAccessTimeSetName, currentTime, ARGV[5]);
local cacheSize = tonumber(redis.call('hlen', KEYS[1]));
if cacheSize >= maxSize then
local lruItems = redis.call('zrange', lastAccessTimeSetName, 0,
cacheSize - maxSize);
for index, lruItem in ipairs(lruItems) do
if lruItem and lruItem ~= ARGV[5] then
local lruItemValue = redis.call('hget', KEYS[1], lruItem);
redis.call('hdel', KEYS[1], lruItem);
redis.call('zrem', KEYS[2], lruItem);
redis.call('zrem', KEYS[3], lruItem);
redis.call('zrem', lastAccessTimeSetName, lruItem);
if lruItemValue ~= false then
local removedChannelName = KEYS[7];
local msg = struct.pack('Lc0Lc0', string.len(lruItem),
lruItem, string.len(lruItemValue),
lruItemValue);
redis.call('publish', removedChannelName, msg);
end
end
end
end
end
-- 写入value,value前面拼接了一个idle过期时间
local value = struct.pack('dLc0', ARGV[4], string.len(ARGV[6]), ARGV[6]);
redis.call('hset', KEYS[1], ARGV[5], value);
-- pubsub 给listener用的,和bug没什么关系
if insertable == true then
local msg = struct.pack('Lc0Lc0', string.len(ARGV[5]), ARGV[5],
string.len(ARGV[6]), ARGV[6]);
redis.call('publish', KEYS[4], msg);
return 1;
else
local msg = struct.pack('Lc0Lc0Lc0', string.len(ARGV[5]), ARGV[5],
string.len(ARGV[6]), ARGV[6], string.len(val), val);
redis.call('publish', KEYS[5], msg);
return 0;
end
put 核心lua脚本
KEYS[1] = "testRMapCache" -- redis hash key
KEYS[2] = "redisson__timeout__set:{testRMapCache}" -- TTL 过期时间zset
KEYS[3] = "redisson__idle__set:{testRMapCache}" -- Idle 过期时间zset
KEYS[4] = "redisson_map_cache_created:{testRMapCache}" -- 创建事件频道
KEYS[5] = "redisson_map_cache_updated:{testRMapCache}" -- 更新事件频道
KEYS[6] = "redisson__map_cache__last_access__set:{testRMapCache}" -- 最后访问时间集合(LRU)
KEYS[7] = "redisson_map_cache_removed:{testRMapCache}" -- 移除事件频道
KEYS[8] = "{testRMapCache}:redisson_options" -- 缓存配置(max-size)
ARGV[1] = "1746173818075" -- 当前时间戳
ARGV[2] = "\x04>\x03key" -- key
ARGV[3] = "\x04>\x0canotherValue" -- value
local v = redis.call('hget', KEYS[1], ARGV[2]);
local exists = false;
-- 判断是否已存在,主要是给后面lru淘汰策略使用,和bug暂时无关
if v ~= false then
local t, val = struct.unpack('dLc0', v);
local expireDate = 92233720368547758;
local expireDateScore = redis.call('zscore', KEYS[2], ARGV[2]);
if expireDateScore ~= false then expireDate = tonumber(expireDateScore) end
if t ~= 0 then
local expireIdle = redis.call('zscore', KEYS[3], ARGV[2]);
if expireIdle ~= false then
expireDate = math.min(expireDate, tonumber(expireIdle))
end
end
if expireDate > tonumber(ARGV[1]) then exists = true; end
end
-- value前面拼接了过期时间,不过这个地方不涉及过期时间就拼接了一个0上去
local value = struct.pack('dLc0', 0, string.len(ARGV[3]), ARGV[3]);
redis.call('hset', KEYS[1], ARGV[2], value);
local currentTime = tonumber(ARGV[1]);
local lastAccessTimeSetName = KEYS[6];
local maxSize = tonumber(redis.call('hget', KEYS[8], 'max-size'));
-- 测试代码不涉及最大容量,这部分lru淘汰策略不执行,和bug无关
if exists == false then
if maxSize ~= nil and maxSize ~= 0 then
redis.call('zadd', lastAccessTimeSetName, currentTime, ARGV[2]);
local cacheSize = tonumber(redis.call('hlen', KEYS[1]));
if cacheSize > maxSize then
local lruItems = redis.call('zrange', lastAccessTimeSetName, 0,
cacheSize - maxSize - 1);
for index, lruItem in ipairs(lruItems) do
if lruItem and lruItem ~= ARGV[2] then
local lruItemValue = redis.call('hget', KEYS[1], lruItem);
redis.call('hdel', KEYS[1], lruItem);
redis.call('zrem', KEYS[2], lruItem);
redis.call('zrem', KEYS[3], lruItem);
redis.call('zrem', lastAccessTimeSetName, lruItem);
if lruItemValue ~= false then
local removedChannelName = KEYS[7];
local msg = struct.pack('Lc0Lc0', string.len(lruItem),
lruItem,
string.len(lruItemValue),
lruItemValue);
redis.call('publish', removedChannelName, msg);
end
end
end
end
end
local msg = struct.pack('Lc0Lc0', string.len(ARGV[2]), ARGV[2],
string.len(ARGV[3]), ARGV[3]);
redis.call('publish', KEYS[4], msg);
return nil;
else
-- 刷新一下 最近访问时间
if maxSize ~= nil and maxSize ~= 0 then
redis.call('zadd', lastAccessTimeSetName, currentTime, ARGV[2]);
end
end
-- pubsub通知,和bug无关
local t, val = struct.unpack('dLc0', v);
local msg = struct.pack('Lc0Lc0Lc0', string.len(ARGV[2]), ARGV[2],
string.len(ARGV[3]), ARGV[3], string.len(val), val);
redis.call('publish', KEYS[5], msg);
return val;
get 核心lua脚本
KEYS[1] testRMapCache -- redis hash key
KEYS[2] redisson__timeout__set:{testRMapCache} -- TTL 过期时间 zset
KEYS[3] redisson__idle__set:{testRMapCache} -- Idle 过期时间 zset
KEYS[4] redisson__map_cache__last_access__set:{testRMapCache} -- 最后访问时间集合(LRU)
KEYS[5] {testRMapCache}:redisson_options -- 缓存配置(如 max-size)
ARGV[1] 1746173818080 -- 当前时间戳
ARGV[2] \x04>\x03key -- key
-- 查询具体值
local value = redis.call('hget', KEYS[1], ARGV[2]);
if value == false then return nil; end
local t, val = struct.unpack('dLc0', value);
-- 计算过期时间
local expireDate = 92233720368547758;
local expireDateScore = redis.call('zscore', KEYS[2], ARGV[2]);
if expireDateScore ~= false then expireDate = tonumber(expireDateScore) end
if t ~= 0 then
local expireIdle = redis.call('zscore', KEYS[3], ARGV[2]);
if expireIdle ~= false then
if tonumber(expireIdle) > tonumber(ARGV[1]) then
redis.call('zadd', KEYS[3], t + tonumber(ARGV[1]), ARGV[2]);
end
expireDate = math.min(expireDate, tonumber(expireIdle))
end
end
-- 如果过期了则直接返回null
if expireDate <= tonumber(ARGV[1]) then return nil; end
local maxSize = tonumber(redis.call('hget', KEYS[5], 'max-size'));
if maxSize ~= nil and maxSize ~= 0 then
redis.call('zadd', KEYS[4], tonumber(ARGV[1]), ARGV[2]);
end
return val;
问题定位
问题看出来了,重新分析一下测试代码
public void test() {
RMapCache<String, String> testRMapCache = redissonClient.getMapCache("testRMapCache");
// 往testRMapCache这个hash中写入了 key-value
// ttl zset中也会写入 3s 后的时间戳
testRMapCache.fastPut("key", "value", 3, TimeUnit.SECONDS);
try {
//等缓存过期
Thread.sleep(3100);
} catch (InterruptedException e) {
e.printStackTrace();
}
// 往缓存写入另外一个值
testRMapCache.put("key", "anotherValue");
// 上面一行代码写入值后,此时ttl zset中的过期时间仍然存在,再去拿肯定拿不出来
// 然后解释一下为什么是偶现
// RMapCache有一个定时任务会去清理无效数据(间隔时间最短5s),在上面等待缓存过期期间如果数据被清理了,那么就一切正常
String result = testRMapCache.get("key");
System.out.println("result: " + result);// null
}解决方案
上面的代码写法有点不对称,先写入一个带过期时间的key-value,后面又写入不带过期时间的key-value,这里可以根据业务需求直接改成统一带过期时间,或者都不带过期时间的
升级redisson到3.13.5以上的版本
在redisson 3.13.5版本中,这个bug被修复,现在再看看redisson官方是怎么修改这个bug的
Fixed - fail to insert key with TTL = 0 if the same key was previousl… · redisson/redisson@ccfa4ed
这段代码就是在执行put的时候,直接干掉idle 和 ttl 过期时间对应zset中的数据
小结
本文主要分析了特定用法下redisson 缓存读不出来的bug,其实分析的方式并不是特别好,在抓redis monitor日志的时候,经常会被redisson自带的定时任务给干扰,有这个排查干扰的时间,不如直接去看源码,这样估计能更快定位出来,这次就当是踩坑了,后面有时间再写几篇文章继续分析一下RMapCache这个工具。