]>
git.saurik.com Git - redis.git/blob - src/redis-trib.rb
3 # TODO (temporary here, we'll move this into the Github issues once
4 # redis-trib initial implementation is complted).
6 # - Make sure that if the rehashing fails in the middle redis-trib will try
8 # - When redis-trib performs a cluster check, if it detects a slot move in
9 # progress it should prompt the user to continue the move from where it
11 # - Gracefully handle Ctrl+C in move_slot to prompt the user if really stop
12 # while rehashing, and performing the best cleanup possible if the user
14 # - When doing "fix" set a global Fix to true, and prompt the user to
15 # fix the problem if automatically fixable every time there is something
16 # to fix. For instance:
17 # 1) If there is a node that pretend to receive a slot, or to migrate a
18 # slot, but has no entries in that slot, fix it.
19 # 2) If there is a node having keys in slots that are not owned by it
20 # fix this condiiton moving the entries in the same node.
21 # 3) Perform more possibly slow tests about the state of the cluster.
22 # 4) When aborted slot migration is detected, fix it.
27 ClusterHashSlots
= 4096
38 puts
"Invalid node name #{addr}"
46 @dirty = false # True if we need to flush slots info into node.
59 "#{@info[:host]}:#{@info[:port]}"
64 xputs
"Connecting to node #{self}: "
66 @r = Redis
.new(:host => @info[:host], :port => @info[:port])
70 puts
"Sorry, can't connect to node #{self}"
79 if !info
["cluster_enabled"] || info
["cluster_enabled"].to_i
== 0
80 puts
"Error: Node #{self} is not configured as a cluster node."
86 if !
(@r.cluster("info").split("\r\n").index("cluster_known_nodes:1")) ||
88 puts
"Error: Node #{self} is not empty. Either the node already knows other nodes (check with nodes-info) or contains some key in database 0."
95 nodes
= @r.cluster("nodes").split("\n")
97 # name addr flags role ping_sent ping_recv link_status slots
99 name
,addr
,flags
,role
,ping_sent
,ping_recv
,link_status
= split
[0..6]
104 :flags => flags
.split(","),
106 :ping_sent => ping_sent
.to_i
,
107 :ping_recv => ping_recv
.to_i
,
108 :link_status => link_status
110 if info
[:flags].index("myself")
111 @info = @info.merge(info
)
115 # Fixme: for now skipping migration entries
117 start
,stop
= s
.split("-")
118 self.add_slots((start
.to_i
)..(stop
.to_i
))
120 self.add_slots((s
.to_i
)..(s
.to_i
))
124 @r.cluster("info").split("\n").each
{|e
|
128 if k !
= :cluster_state
142 @info[:slots][s
] = :new
147 def flush_node_config
150 @info[:slots].each
{|s
,val
|
153 @info[:slots][s
] = true
156 @r.cluster("addslots",*new
)
161 # We want to display the hash slots assigned to this node
162 # as ranges, like in: "1-5,8-9,20-25,30"
164 # Note: this could be easily written without side effects,
165 # we use 'slots' just to split the computation into steps.
167 # First step: we want an increasing array of integers
168 # for instance: [1,2,3,4,5,8,9,20,21,22,23,24,25,30]
169 slots
= @info[:slots].keys
.sort
171 # As we want to aggregate adiacent slots we convert all the
172 # slot integers into ranges (with just one element)
173 # So we have something like [1..1,2..2, ... and so forth.
176 # Finally we group ranges with adiacent elements.
177 slots
= slots
.reduce([]) {|a
,b
|
178 if !a
.empty
? && b
.first
== (a
[-1].last
)+
1
179 a
[0..-2] +
[(a
[-1].first
)..(b
.last
)]
185 # Now our task is easy, we just convert ranges with just one
186 # element into a number, and a real range into a start-end format.
187 # Finally we join the array using the comma as separator.
188 slots
= slots
.map
{|x
|
189 x
.count
== 1 ? x
.first
.to_s
: "#{x.first}-#{x.last}"
192 "[#{@info[:cluster_state].upcase}] #{self.info[:name]} #{self.to_s} slots:#{slots} (#{self.slots.length} slots)"
213 def check_arity(req_args
, num_args
)
214 if ((req_args
> 0 and num_args !
= req_args
) ||
215 (req_args
< 0 and num_args
< req_args
.abs
))
216 puts
"Wrong number of arguments for specified sub command"
225 def get_node_by_name(name
)
227 return n
if n
.info
[:name] == name
.downcase
233 puts
"Performing Cluster Check (using node #{@nodes[0]})"
236 # Check if all the slots are covered
239 slots
= slots
.merge(n
.slots
)
241 if slots
.length
== 4096
242 puts
"[OK] All 4096 slots covered."
244 errors
<< "[ERR] Not all 4096 slots are covered by nodes."
251 slots_per_node
= ClusterHashSlots
/@nodes.length
254 first
= i
*slots_per_node
255 last
= first+slots_per_node-1
256 last
= ClusterHashSlots-1
if i
== @nodes.length-1
257 n
.add_slots first
..last
262 def flush_nodes_config
275 # We use a brute force approach to make sure the node will meet
276 # each other, that is, sending CLUSTER MEET messages to all the nodes
277 # about the very same node.
278 # Thanks to gossip this information should propagate across all the
279 # cluster in a matter of seconds.
282 if !first
then first
= n
.info
; next; end # Skip the first node
283 n
.r
.cluster("meet",first
[:host],first
[:port])
288 print
"#{msg} (type 'yes' to accept): "
290 if !
(STDIN.gets
.chomp
.downcase
== "yes")
296 def load_cluster_info_from_node(nodeaddr
)
297 node
= ClusterNode
.new(ARGV[1])
298 node
.connect(:abort => true)
300 node
.load_info(:getfriends => true)
302 node
.friends
.each
{|f
|
303 fnode
= ClusterNode
.new(f
[:addr])
310 # Given a list of source nodes return a "resharding plan"
311 # with what slots to move in order to move "numslots" slots to another
313 def compute_reshard_table(sources
,numslots
)
315 # Sort from bigger to smaller instance, for two reasons:
316 # 1) If we take less slots than instanes it is better to start getting from
317 # the biggest instances.
318 # 2) We take one slot more from the first instance in the case of not perfect
319 # divisibility. Like we have 3 nodes and need to get 10 slots, we take
320 # 4 from the first, and 3 from the rest. So the biggest is always the first.
321 sources
= sources
.sort
{|a
,b
| b
.slots
.length
<=> a
.slots
.length
}
322 source_tot_slots
= sources
.inject(0) {|sum
,source
| sum+source
.slots
.length
}
323 sources
.each_with_index
{|s
,i
|
324 # Every node will provide a number of slots proportional to the
325 # slots it has assigned.
326 n
= (numslots
.to_f
/source_tot_slots
*s
.slots
.length
)
332 s
.slots
.keys
.sort
[(0...n
)].each
{|slot
|
333 if moved
.length
< numslots
334 moved
<< {:source => s
, :slot => slot
}
341 def show_reshard_table(table
)
343 puts
" Moving slot #{e[:slot]} from #{e[:source].info[:name]}"
347 def move_slot(source
,target
,slot
,o
={})
348 # We start marking the slot as importing in the destination node,
349 # and the slot as migrating in the target host. Note that the order of
350 # the operations is important, as otherwise a client may be redirected to
351 # the target node that does not yet know it is importing this slot.
352 print
"Moving slot #{slot} from #{source.info_string}: "; STDOUT.flush
353 target
.r
.cluster("setslot",slot
,"importing",source
.info
[:name])
354 source
.r
.cluster("setslot",slot
,"migrating",source
.info
[:name])
355 # Migrate all the keys from source to target using the MIGRATE command
357 keys
= source
.r
.cluster("getkeysinslot",slot
,10)
358 break if keys
.length
== 0
360 source
.r
.migrate(target
.info
[:host],target
.info
[:port],key
,0,1)
361 print
"." if o
[:verbose]
366 # Set the new node as the owner of the slot in all the known nodes.
368 n
.r
.cluster("setslot",slot
,"node",target
.info
[:name])
372 # redis-trib subcommands implementations
374 def check_cluster_cmd
375 load_cluster_info_from_node(ARGV[1])
379 def reshard_cluster_cmd
380 load_cluster_info_from_node(ARGV[1])
381 errors
= check_cluster
382 if errors
.length !
= 0
383 puts
"Please fix your cluster problems before resharding."
387 while numslots
<= 0 or numslots
> 4096
388 print
"How many slots do you want to move (from 1 to 4096)? "
389 numslots
= STDIN.gets
.to_i
393 print
"What is the receiving node ID? "
394 target
= get_node_by_name(STDIN.gets
.chop
)
396 puts
"The specified node is not known, please retry."
400 puts
"Please enter all the source node IDs."
401 puts
" Type 'all' to use all the nodes as source nodes for the hash slots."
402 puts
" Type 'done' once you entered all the source nodes IDs."
404 print
"Source node ##{sources.length+1}:"
405 line
= STDIN.gets
.chop
406 src
= get_node_by_name(line
)
408 if sources
.length
== 0
409 puts
"No source nodes given, operation aborted"
416 next if n
.info
[:name] == target
.info
[:name]
421 puts
"The specified node is not known, please retry."
422 elsif src
.info
[:name] == target
.info
[:name]
423 puts
"It is not possible to use the target node as source node."
428 puts
"\nReady to move #{numslots} slots."
429 puts
" Source nodes:"
430 sources
.each
{|s
| puts
" "+s
.info_string
}
431 puts
" Destination node:"
432 puts
" #{target.info_string}"
433 reshard_table
= compute_reshard_table(sources
,numslots
)
434 puts
" Resharding plan:"
435 show_reshard_table(reshard_table
)
436 print
"Do you want to proceed with the proposed reshard plan (yes/no)? "
437 yesno
= STDIN.gets
.chop
438 exit(1) if (yesno !
= "yes")
439 reshard_table
.each
{|e
|
440 move_slot(e
[:source],target
,e
[:slot],:verbose=>true)
444 def create_cluster_cmd
445 puts
"Creating cluster"
447 node
= ClusterNode
.new(n
)
448 node
.connect(:abort => true)
454 puts
"Performing hash slots allocation on #{@nodes.length} nodes..."
457 yes_or_die
"Can I set the above configuration?"
459 puts
"** Nodes configuration updated"
460 puts
"** Sending CLUSTER MEET messages to join the cluster"
467 "create" => ["create_cluster_cmd", -2, "host1:port host2:port ... hostN:port"],
468 "check" => ["check_cluster_cmd", 2, "host:port"],
469 "reshard" => ["reshard_cluster_cmd", 2, "host:port"]
474 puts
"Usage: redis-trib <command> <arguments ...>"
477 puts
" #{k.ljust(20)} #{v[2]}"
484 cmd_spec
= COMMANDS
[ARGV[0].downcase
]
486 puts
"Unknown redis-trib subcommand '#{ARGV[0]}'"
489 rt
.check_arity(cmd_spec
[1],ARGV.length
)