]> git.saurik.com Git - redis.git/blob - src/redis-trib.rb
2f5d94bbef5ab6b783ef2050a9d9a46c3172f57a
[redis.git] / src / redis-trib.rb
1 #!/usr/bin/env ruby
2
3 # TODO (temporary here, we'll move this into the Github issues once redis-trib initial
4 # implementation is complted).
5 #
6 # - Make sure that if the rehashing fails in the middle redis-trib will try to recover.
7 # - When redis-trib performs a cluster check, if it detects a slot move in progress it
8 # should prompt the user to continue the move from where it stopped.
9
10 require 'rubygems'
11 require 'redis'
12
13 ClusterHashSlots = 4096
14
15 def xputs(s)
16 printf s
17 STDOUT.flush
18 end
19
20 class ClusterNode
21 def initialize(addr)
22 s = addr.split(":")
23 if s.length != 2
24 puts "Invalid node name #{addr}"
25 exit 1
26 end
27 @r = nil
28 @info = {}
29 @info[:host] = s[0]
30 @info[:port] = s[1]
31 @info[:slots] = {}
32 @dirty = false # True if we need to flush slots info into node.
33 @friends = []
34 end
35
36 def friends
37 @friends
38 end
39
40 def slots
41 @info[:slots]
42 end
43
44 def to_s
45 "#{@info[:host]}:#{@info[:port]}"
46 end
47
48 def connect(o={})
49 return if @r
50 xputs "Connecting to node #{self}: "
51 begin
52 @r = Redis.new(:host => @info[:host], :port => @info[:port])
53 @r.ping
54 rescue
55 puts "ERROR"
56 puts "Sorry, can't connect to node #{self}"
57 exit 1 if o[:abort]
58 @r = nil
59 end
60 puts "OK"
61 end
62
63 def assert_cluster
64 info = @r.info
65 if !info["cluster_enabled"] || info["cluster_enabled"].to_i == 0
66 puts "Error: Node #{self} is not configured as a cluster node."
67 exit 1
68 end
69 end
70
71 def assert_empty
72 if !(@r.cluster("info").split("\r\n").index("cluster_known_nodes:1")) ||
73 (@r.info['db0'])
74 puts "Error: Node #{self} is not empty. Either the node already knows other nodes (check with nodes-info) or contains some key in database 0."
75 exit 1
76 end
77 end
78
79 def load_info(o={})
80 self.connect
81 nodes = @r.cluster("nodes").split("\n")
82 nodes.each{|n|
83 # name addr flags role ping_sent ping_recv link_status slots
84 name,addr,flags,role,ping_sent,ping_recv,link_status,slots = n.split(" ")
85 info = {
86 :name => name,
87 :addr => addr,
88 :flags => flags.split(","),
89 :role => role,
90 :ping_sent => ping_sent.to_i,
91 :ping_recv => ping_recv.to_i,
92 :link_status => link_status
93 }
94 if info[:flags].index("myself")
95 @info = @info.merge(info)
96 @info[:slots] = {}
97 slots.split(",").each{|s|
98 if s.index("-")
99 start,stop = s.split("-")
100 self.add_slots((start.to_i)..(stop.to_i))
101 else
102 self.add_slots((s.to_i)..(s.to_i))
103 end
104 } if slots
105 @dirty = false
106 @r.cluster("info").split("\n").each{|e|
107 k,v=e.split(":")
108 k = k.to_sym
109 v.chop!
110 if k != :cluster_state
111 @info[k] = v.to_i
112 else
113 @info[k] = v
114 end
115 }
116 elsif o[:getfriends]
117 @friends << info
118 end
119 }
120 end
121
122 def add_slots(slots)
123 slots.each{|s|
124 @info[:slots][s] = :new
125 }
126 @dirty = true
127 end
128
129 def flush_node_config
130 return if !@dirty
131 new = []
132 @info[:slots].each{|s,val|
133 if val == :new
134 new << s
135 @info[:slots][s] = true
136 end
137 }
138 @r.cluster("addslots",*new)
139 @dirty = false
140 end
141
142 def info_string
143 # We want to display the hash slots assigned to this node
144 # as ranges, like in: "1-5,8-9,20-25,30"
145 #
146 # Note: this could be easily written without side effects,
147 # we use 'slots' just to split the computation into steps.
148
149 # First step: we want an increasing array of integers
150 # for instance: [1,2,3,4,5,8,9,20,21,22,23,24,25,30]
151 slots = @info[:slots].keys.sort
152
153 # As we want to aggregate adiacent slots we convert all the
154 # slot integers into ranges (with just one element)
155 # So we have something like [1..1,2..2, ... and so forth.
156 slots.map!{|x| x..x}
157
158 # Finally we group ranges with adiacent elements.
159 slots = slots.reduce([]) {|a,b|
160 if !a.empty? && b.first == (a[-1].last)+1
161 a[0..-2] + [(a[-1].first)..(b.last)]
162 else
163 a + [b]
164 end
165 }
166
167 # Now our task is easy, we just convert ranges with just one
168 # element into a number, and a real range into a start-end format.
169 # Finally we join the array using the comma as separator.
170 slots = slots.map{|x|
171 x.count == 1 ? x.first.to_s : "#{x.first}-#{x.last}"
172 }.join(",")
173
174 "[#{@info[:cluster_state].upcase}] #{self.info[:name]} #{self.to_s.ljust(25)} slots:#{slots}"
175 end
176
177 def info
178 @info
179 end
180
181 def is_dirty?
182 @dirty
183 end
184
185 def r
186 @r
187 end
188 end
189
190 class RedisTrib
191 def initialize
192 @nodes = []
193 end
194
195 def check_arity(req_args, num_args)
196 if ((req_args > 0 and num_args != req_args) ||
197 (req_args < 0 and num_args < req_args.abs))
198 puts "Wrong number of arguments for specified sub command"
199 exit 1
200 end
201 end
202
203 def add_node(node)
204 @nodes << node
205 end
206
207 def get_node_by_name(name)
208 @nodes.each{|n|
209 return n if n.info[:name] == name.downcase
210 }
211 return nil
212 end
213
214 def check_cluster
215 puts "Performing Cluster Check (using node #{@nodes[0]})"
216 errors = []
217 show_nodes
218 # Check if all the slots are covered
219 slots = {}
220 @nodes.each{|n|
221 slots = slots.merge(n.slots)
222 }
223 if slots.length == 4096
224 puts "[OK] All 4096 slots covered."
225 else
226 errors << "[ERR] Not all 4096 slots are covered by nodes."
227 puts errors[-1]
228 end
229 return errors
230 end
231
232 def alloc_slots
233 slots_per_node = ClusterHashSlots/@nodes.length
234 i = 0
235 @nodes.each{|n|
236 first = i*slots_per_node
237 last = first+slots_per_node-1
238 last = ClusterHashSlots-1 if i == @nodes.length-1
239 n.add_slots first..last
240 i += 1
241 }
242 end
243
244 def flush_nodes_config
245 @nodes.each{|n|
246 n.flush_node_config
247 }
248 end
249
250 def show_nodes
251 @nodes.each{|n|
252 puts n.info_string
253 }
254 end
255
256 def join_cluster
257 # We use a brute force approach to make sure the node will meet
258 # each other, that is, sending CLUSTER MEET messages to all the nodes
259 # about the very same node.
260 # Thanks to gossip this information should propagate across all the
261 # cluster in a matter of seconds.
262 first = false
263 @nodes.each{|n|
264 if !first then first = n.info; next; end # Skip the first node
265 n.r.cluster("meet",first[:host],first[:port])
266 }
267 end
268
269 def yes_or_die(msg)
270 print "#{msg} (type 'yes' to accept): "
271 STDOUT.flush
272 if !(STDIN.gets.chomp.downcase == "yes")
273 puts "Aborting..."
274 exit 1
275 end
276 end
277
278 def load_cluster_info_from_node(nodeaddr)
279 node = ClusterNode.new(ARGV[1])
280 node.connect(:abort => true)
281 node.assert_cluster
282 node.load_info(:getfriends => true)
283 add_node(node)
284 node.friends.each{|f|
285 fnode = ClusterNode.new(f[:addr])
286 fnode.connect()
287 fnode.load_info()
288 add_node(fnode)
289 }
290 end
291
292 # Given a list of source nodes return a "resharding plan"
293 # with what slots to move in order to move "numslots" slots to another
294 # instance.
295 def compute_reshard_table(sources,numslots)
296 moved = []
297 # Sort from bigger to smaller instance, for two reasons:
298 # 1) If we take less slots than instanes it is better to start getting from
299 # the biggest instances.
300 # 2) We take one slot more from the first instance in the case of not perfect
301 # divisibility. Like we have 3 nodes and need to get 10 slots, we take
302 # 4 from the first, and 3 from the rest. So the biggest is always the first.
303 sources = sources.sort{|a,b| b.slots.length <=> a.slots.length}
304 sources.each_with_index{|s,i|
305 # Every node will provide a number of slots proportional to the
306 # slots it has assigned.
307 n = (numslots.to_f/4096*s.slots.length)
308 if i == 0
309 n = n.ceil
310 else
311 n = n.floor
312 end
313 s.slots.keys.sort[(0...n)].each{|slot|
314 if moved.length < numslots
315 moved << {:source => s, :slot => slot}
316 end
317 }
318 }
319 return moved
320 end
321
322 def show_reshard_table(table)
323 table.each{|e|
324 puts " Moving slot #{e[:slot]} from #{e[:source].info[:name]}"
325 }
326 end
327
328 def move_slot(source,target,slot,o={})
329 # We start marking the slot as importing in the destination node,
330 # and the slot as migrating in the target host. Note that the order of
331 # the operations is important, as otherwise a client may be redirected to
332 # the target node that does not yet know it is importing this slot.
333 print "Moving slot #{slot}: "; STDOUT.flush
334 target.r.cluster("setslot",slot,"importing",source.info[:name])
335 source.r.cluster("setslot",slot,"migrating",source.info[:name])
336 # Migrate all the keys from source to target using the MIGRATE command
337 while true
338 keys = source.r.cluster("getkeysinslot",slot,10)
339 break if keys.length == 0
340 keys.each{|key|
341 source.r.migrate(target.info[:host],target.info[:port],key,0,1)
342 print "." if o[:verbose]
343 STDOUT.flush
344 }
345 end
346 puts
347 # Set the new node as the owner of the slot in all the known nodes.
348 @nodes.each{|n|
349 n.r.cluster("setslot",slot,"node",target.info[:name])
350 }
351 end
352
353 # redis-trib subcommands implementations
354
355 def check_cluster_cmd
356 load_cluster_info_from_node(ARGV[1])
357 check_cluster
358 end
359
360 def reshard_cluster_cmd
361 load_cluster_info_from_node(ARGV[1])
362 errors = check_cluster
363 if errors.length != 0
364 puts "Please fix your cluster problems before resharding."
365 exit 1
366 end
367 numslots = 0
368 while numslots <= 0 or numslots > 4096
369 print "How many slots do you want to move (from 1 to 4096)? "
370 numslots = STDIN.gets.to_i
371 end
372 target = nil
373 while not target
374 print "What is the receiving node ID? "
375 target = get_node_by_name(STDIN.gets.chop)
376 if not target
377 puts "The specified node is not known, please retry."
378 end
379 end
380 sources = []
381 puts "Please enter all the source node IDs."
382 puts " Type 'all' to use all the nodes as source nodes for the hash slots."
383 puts " Type 'done' once you entered all the source nodes IDs."
384 while true
385 print "Source node ##{sources.length+1}:"
386 line = STDIN.gets.chop
387 src = get_node_by_name(line)
388 if line == "done"
389 if sources.length == 0
390 puts "No source nodes given, operation aborted"
391 exit 1
392 else
393 break
394 end
395 elsif line == "all"
396 @nodes.each{|n|
397 next if n.info[:name] == target.info[:name]
398 sources << n
399 }
400 break
401 elsif not src
402 puts "The specified node is not known, please retry."
403 elsif src.info[:name] == target.info[:name]
404 puts "It is not possible to use the target node as source node."
405 else
406 sources << src
407 end
408 end
409 puts "\nReady to move #{numslots} slots."
410 puts " Source nodes:"
411 sources.each{|s| puts " "+s.info_string}
412 puts " Destination node:"
413 puts " #{target.info_string}"
414 reshard_table = compute_reshard_table(sources,numslots)
415 puts " Resharding plan:"
416 show_reshard_table(reshard_table)
417 print "Do you want to proceed with the proposed reshard plan (yes/no)? "
418 yesno = STDIN.gets.chop
419 exit(1) if (yesno != "yes")
420 reshard_table.each{|e|
421 move_slot(e[:source],target,e[:slot],:verbose=>true)
422 }
423 end
424
425 def create_cluster_cmd
426 puts "Creating cluster"
427 ARGV[1..-1].each{|n|
428 node = ClusterNode.new(n)
429 node.connect(:abort => true)
430 node.assert_cluster
431 node.assert_empty
432 add_node(node)
433 }
434 puts "Performing hash slots allocation on #{@nodes.length} nodes..."
435 alloc_slots
436 show_nodes
437 yes_or_die "Can I set the above configuration?"
438 flush_nodes_config
439 puts "** Nodes configuration updated"
440 puts "** Sending CLUSTER MEET messages to join the cluster"
441 join_cluster
442 check_cluster
443 end
444 end
445
446 COMMANDS={
447 "create" => ["create_cluster_cmd", -2, "host1:port host2:port ... hostN:port"],
448 "check" => ["check_cluster_cmd", 2, "host:port"],
449 "reshard" => ["reshard_cluster_cmd", 2, "host:port"]
450 }
451
452 # Sanity check
453 if ARGV.length == 0
454 puts "Usage: redis-trib <command> <arguments ...>"
455 puts
456 COMMANDS.each{|k,v|
457 puts " #{k.ljust(20)} #{v[2]}"
458 }
459 puts
460 exit 1
461 end
462
463 rt = RedisTrib.new
464 cmd_spec = COMMANDS[ARGV[0].downcase]
465 if !cmd_spec
466 puts "Unknown redis-trib subcommand '#{ARGV[0]}'"
467 exit 1
468 end
469 rt.check_arity(cmd_spec[1],ARGV.length)
470
471 # Dispatch
472 rt.send(cmd_spec[0])