".devcontainer/library-script/provide-user.sh" did not exist on "6114d45af5c9b91856bf7d3605a7841d8aa752a4"
Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/python
import os,sys,getopt,pprint
SCRIPT='fix_pgs.py'
def help():
print '%s -m <hostname>'%SCRIPT
print '%s -h'%SCRIPT
def get_pg_failed():
cmd = os.popen("ceph health detail | grep 'pg ' | grep 'failed_repair' | awk '{print $2\" \"$6}'").read()
pgs = cmd.split("\n")
pgs = pgs[0:-1]
pgs_dict = {}
for p in pgs:
e = p.split(' ')
osd_list = e[1][1:-1].split(",")
pgs_dict[e[0]] = osd_list
return pgs_dict
def get_host_osds():
cmd = os.popen("ceph osd tree | grep host | awk '{print $4}'").read()
hosts = cmd.split("\n")
hosts = hosts[0:-1]
hosts_dict = {}
for h in hosts:
cmd = os.popen("ssh %s systemctl list-units --type=service --state=active | grep ceph-osd | awk '{print $1}'"%h).read()
services = cmd.split("\n")
services = services[0:-1]
hosts_dict[h] = []
for i in services:
hosts_dict[h].append((i.split(".")[0]).split("@")[-1])
return hosts_dict
def get_osd_hosts():
cmd = os.popen("ceph osd tree | grep host | awk '{print $4}'").read()
hosts = cmd.split("\n")
hosts = hosts[0:-1]
osds_dict = {}
for h in hosts:
cmd = os.popen("ssh %s systemctl list-units --type=service --state=active | grep ceph-osd | awk '{print $1}'"%h).read()
services = cmd.split("\n")
services = services[0:-1]
for i in services:
osd = (i.split(".")[0]).split("@")[-1]
osds_dict[osd] = h
return osds_dict
def build_commands(pgs,osds):
commands = []
for p in pgs:
print p
for o in pgs[p]:
commands.append("ssh %s systemctl stop ceph-osd@%s.service"%(osds[o],o))
commands.append("ssh %s ceph-osd -i %s --flush-journal"%(osds[o],o))
commands.append("ssh %s systemctl start ceph-osd@%s.service"%(osds[o],o))
commands.append("ceph pg repair %s"%p)
return commands
# stop the OSD that has the wrong object responsible for that PG
# flush the journal (ceph-osd -i <id> --flush-journal)
# move the bad object to another location
# start the OSD again
# call ceph pg repair 17.1c1
def fix_pgs():
pgs = get_pg_failed()
osds_dict = get_osd_hosts()
build_commands(pgs,osds_dict)
commands = build_commands(pgs,osds_dict)
for c in commands:
os.popen(c)
def main(argv):
try:
opts, args = getopt.getopt(argv,"hmpf",["machine=pg_failed"])
except getopt.GetoptError:
help()
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
help()
sys.exit()
elif opt in ("-m", "--machine"):
osds = get_host_osds()
pprint.pprint(osds)
elif opt in ("-p", "--pg_failed"):
pg = get_pg_failed()
pprint.pprint(pg)
elif opt in ("-f", "--fix_pg"):
fix_pgs()
else:
help()
sys.exit()
if __name__ == "__main__":
main(sys.argv[1:])