2019-07-08 12:28:04 +02:00
|
|
|
---
|
|
|
|
title: RAID data scrubbing
|
2021-09-13 19:05:02 +02:00
|
|
|
tags: [tutorial, python, raid-partition, scrubbing, data, systemd, timer]
|
|
|
|
updated: 2021-09-13 17:05:00
|
2019-07-08 12:28:04 +02:00
|
|
|
description: A simple RAID scrubbing python script and its systemd service and timer.
|
|
|
|
---
|
|
|
|
|
|
|
|
## Introduction
|
|
|
|
|
|
|
|
RAID scrubbing checks and fixes errors in RAID arrays.
|
|
|
|
|
|
|
|
<!--more-->
|
|
|
|
|
2019-08-11 19:47:07 +02:00
|
|
|
*Please note that the source code and the steps described here are included in the
|
|
|
|
[automated-tasks](https://github.com/frnmst/automated-tasks) repository.*
|
|
|
|
|
2021-07-09 19:35:50 +02:00
|
|
|
While I was reading an [Arch Wiki page](https://wiki.archlinux.org/index.php?title=RAID&oldid=572096#Scrubbing),
|
|
|
|
I found an [AUR package](https://aur.archlinux.org/packages/raid-check-systemd/)
|
2019-07-08 12:28:04 +02:00
|
|
|
that claims to run periodic RAID scrubbings on the hard drives.
|
2021-07-09 19:35:50 +02:00
|
|
|
The [original script and configuration](https://centos.pkgs.org/7/centos-x86_64/mdadm-4.1-rc1_2.el7.x86_64.rpm.html)
|
2019-07-08 12:28:04 +02:00
|
|
|
is quite confusing so I decided to write my own.
|
|
|
|
It lacks some features compared to the original one, but it does the job.
|
|
|
|
|
|
|
|
## Script
|
|
|
|
|
|
|
|
The only copyright notice I found is the one I have included in the script
|
|
|
|
and I took it from the CentOS package at `/usr/share/doc/mdadm-4.1/mdcheck`.
|
|
|
|
The original script that I translated into python is at `/usr/sbin/raid-check`.
|
|
|
|
|
|
|
|
The license of the original scripts and programs is undoubtedly GPL2+.
|
|
|
|
See also the `COPYING` file in the rpm package.
|
|
|
|
|
|
|
|
```python
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
# Copyright (C) 2014-2017 Neil Brown <neilb@suse.de>
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# Author: Neil Brown
|
|
|
|
# Email: <neilb@suse.com>
|
|
|
|
#
|
|
|
|
# Copyright (C) 2019 Franco Masotti <franco.masotti@live.com>
|
|
|
|
|
|
|
|
import configparser
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
import os
|
|
|
|
import multiprocessing
|
|
|
|
import pathlib
|
|
|
|
import collections
|
|
|
|
|
|
|
|
# Constants.
|
|
|
|
STATUS_CLEAN='clean'
|
|
|
|
STATUS_ACTIVE='active'
|
|
|
|
STATUS_IDLE='idle'
|
|
|
|
|
|
|
|
class UserNotRoot(Exception):
|
|
|
|
"""The user running the script is not root."""
|
|
|
|
|
|
|
|
class NoAvailableArrays(Exception):
|
|
|
|
"""No available arrays."""
|
|
|
|
|
|
|
|
class NoSelectedArraysPresent(Exception):
|
|
|
|
"""None of the arrays in the configuration file exists."""
|
|
|
|
|
|
|
|
def get_active_arrays():
|
|
|
|
active_arrays=list()
|
|
|
|
with open('/proc/mdstat', 'r') as f:
|
|
|
|
line = f.readline()
|
|
|
|
while line:
|
|
|
|
if STATUS_ACTIVE in line:
|
|
|
|
active_arrays.append(line.split()[0])
|
|
|
|
line = f.readline()
|
|
|
|
|
|
|
|
return active_arrays
|
|
|
|
|
|
|
|
def get_array_state(array: str):
|
|
|
|
return open('/sys/block/' + array + '/md/array_state', 'r').read().rstrip()
|
|
|
|
|
|
|
|
def get_sync_action(array: str):
|
|
|
|
return open('/sys/block/' + array + '/md/sync_action', 'r').read().rstrip()
|
|
|
|
|
|
|
|
def run_action(array: str, action: str):
|
|
|
|
with open('/sys/block/' + array + '/md/sync_action', 'w') as f:
|
|
|
|
f.write(action)
|
|
|
|
|
|
|
|
def main_action(array:str):
|
|
|
|
action=devices[array]
|
|
|
|
go = True
|
|
|
|
while go:
|
|
|
|
if get_sync_action(array) == STATUS_IDLE:
|
|
|
|
print ('running ' + action + ' on /dev/' + array + '. pid: ' + str(os.getpid>
|
|
|
|
run_action(array,action)
|
|
|
|
print ('finished pid: ' + str(os.getpid()))
|
|
|
|
go = False
|
|
|
|
if go == True:
|
|
|
|
print ('waiting ' + array + ' to be idle...')
|
|
|
|
time.sleep(timeout_idle_check)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
if os.getuid() != 0:
|
|
|
|
raise UserNotRoot
|
|
|
|
|
|
|
|
configuration_file = sys.argv[1]
|
|
|
|
config = configparser.ConfigParser()
|
|
|
|
config.read(configuration_file)
|
|
|
|
max_concurrent_checks = int(config['DEFAULT']['max concurrent checks'])
|
|
|
|
timeout_idle_check = int(config['DEFAULT']['timeout idle check'])
|
|
|
|
devices = dict()
|
|
|
|
for dev in config['devices']:
|
|
|
|
devices[dev]=config['devices'][dev]
|
|
|
|
|
|
|
|
active_arrays=get_active_arrays()
|
|
|
|
dev_queue=collections.deque()
|
|
|
|
if len(active_arrays) > 0:
|
|
|
|
for dev in active_arrays:
|
|
|
|
if pathlib.Path('/sys/block/' + dev + '/md/sync_action').is_file():
|
|
|
|
state = get_array_state(dev)
|
|
|
|
if state == STATUS_CLEAN or state == STATUS_ACTIVE or state == STATUS_IDLE :
|
|
|
|
try:
|
|
|
|
if devices[dev] != 'ignore' and dev in devices:
|
|
|
|
dev_queue.append(dev)
|
|
|
|
except KeyError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
if len(active_arrays) == 0:
|
|
|
|
raise NoAvailableArrays
|
|
|
|
if len(dev_queue) == 0:
|
|
|
|
raise NoSelectedArraysPresent
|
|
|
|
|
|
|
|
while len(dev_queue) > 0:
|
|
|
|
for i in range(0,max_concurrent_checks):
|
|
|
|
if len(dev_queue) > 0:
|
|
|
|
ready = dev_queue.popleft()
|
|
|
|
p = multiprocessing.Process(target=main_action, args=(ready,))
|
|
|
|
p.start()
|
|
|
|
p.join()
|
|
|
|
```
|
|
|
|
|
|
|
|
### Configuration file
|
|
|
|
|
|
|
|
This should be very clear.
|
|
|
|
|
|
|
|
```
|
|
|
|
[DEFAULT]
|
|
|
|
# The maximum number of concurrent processes.
|
|
|
|
max concurrent checks = 2
|
|
|
|
|
|
|
|
# In seconds.
|
|
|
|
timeout idle check = 10
|
|
|
|
|
|
|
|
# key = RAID device name without the '/dev/' prefix.
|
|
|
|
# value = 'check', 'repair', 'idle', 'ignore'.
|
|
|
|
# The special value of 'ignore' will make the script skip the device.
|
|
|
|
# Absent devices are ignored.
|
|
|
|
[devices]
|
|
|
|
md1 = check
|
|
|
|
md2 = ignore
|
|
|
|
md3 = check
|
|
|
|
md4 = check
|
|
|
|
md5 = check
|
|
|
|
md6 = check
|
|
|
|
md10 = check
|
|
|
|
md21 = ignore
|
|
|
|
```
|
|
|
|
|
|
|
|
## Systemd
|
|
|
|
|
|
|
|
The python script needs to be run by `root`. Follow the instructions reported
|
|
|
|
in the [previous post]({{ site.baseurl}}/notes/from-crontabs-to-systemd-timers.html).
|
|
|
|
Save the python script as `/home/jobs/scripts/by-user/root/mdadm_check.py` and its
|
|
|
|
configuration file as `/home/jobs/scripts/by-user/root/mdadm_check.conf`.
|
|
|
|
|
|
|
|
### Service unit file
|
|
|
|
|
|
|
|
```
|
|
|
|
[Unit]
|
|
|
|
Description=mdadm check
|
|
|
|
|
|
|
|
[Service]
|
|
|
|
Type=simple
|
|
|
|
ExecStart=-/home/jobs/scripts/by-user/root/mdadm_check.py /home/jobs/scripts/by-user/root/mdadm_check.conf
|
|
|
|
User=root
|
|
|
|
Group=root
|
|
|
|
|
|
|
|
[Install]
|
|
|
|
WantedBy=multi-user.target
|
|
|
|
```
|
|
|
|
|
|
|
|
### Timer unit file
|
|
|
|
|
|
|
|
See the [previous post]({{ site.baseurl}}/notes/from-crontabs-to-systemd-timers.html).
|
|
|
|
|
|
|
|
```
|
|
|
|
[Unit]
|
|
|
|
Description=Once a month check mdadm arrays
|
|
|
|
|
|
|
|
[Timer]
|
|
|
|
OnCalendar=Monthly
|
|
|
|
Persistent=true
|
|
|
|
|
|
|
|
[Install]
|
|
|
|
WantedBy=timers.target
|
|
|
|
```
|
|
|
|
|
|
|
|
~
|
|
|
|
|
|
|
|
Have fun :)
|