Official read only mirror of the smartmontools project SVN https://www.smartmontools.org/browser
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
smartmontools/smartmontools/smartd.cpp

5912 lines
199 KiB

/*
* Home page of code is: https://www.smartmontools.org
*
* Copyright (C) 2002-11 Bruce Allen
* Copyright (C) 2008-22 Christian Franke
* Copyright (C) 2000 Michael Cornwell <cornwell@acm.org>
* Copyright (C) 2008 Oliver Bock <brevilo@users.sourceforge.net>
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "config.h"
#define __STDC_FORMAT_MACROS 1 // enable PRI* for C++
// unconditionally included files
#include <inttypes.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h> // umask
#include <signal.h>
#include <fcntl.h>
#include <string.h>
#include <syslog.h>
#include <stdarg.h>
#include <stdlib.h>
#include <errno.h>
#include <time.h>
#include <limits.h>
#include <getopt.h>
#include <algorithm> // std::replace()
#include <map>
#include <stdexcept>
#include <string>
#include <vector>
// conditionally included files
#ifndef _WIN32
#include <sys/wait.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef _WIN32
#include "os_win32/popen.h" // popen_as_rstr_user(), pclose()
#ifdef _MSC_VER
#pragma warning(disable:4761) // "conversion supplied"
typedef unsigned short mode_t;
typedef int pid_t;
#endif
#include <io.h> // umask()
#include <process.h> // getpid()
#endif // _WIN32
#ifdef __CYGWIN__
#include <io.h> // setmode()
#endif // __CYGWIN__
#ifdef HAVE_LIBCAP_NG
#include <cap-ng.h>
#endif // LIBCAP_NG
#ifdef HAVE_LIBSYSTEMD
#include <systemd/sd-daemon.h>
#endif // HAVE_LIBSYSTEMD
// locally included files
#include "atacmds.h"
#include "dev_interface.h"
#include "knowndrives.h"
#include "scsicmds.h"
#include "nvmecmds.h"
#include "utility.h"
#ifdef HAVE_POSIX_API
#include "popen_as_ugid.h"
#endif
#ifdef _WIN32
// fork()/signal()/initd simulation for native Windows
#include "os_win32/daemon_win32.h" // daemon_main/detach/signal()
#define strsignal daemon_strsignal
#define sleep daemon_sleep
// SIGQUIT does not exist, CONTROL-Break signals SIGBREAK.
#define SIGQUIT SIGBREAK
#define SIGQUIT_KEYNAME "CONTROL-Break"
#else // _WIN32
#define SIGQUIT_KEYNAME "CONTROL-\\"
#endif // _WIN32
const char * smartd_cpp_cvsid = "$Id$"
CONFIG_H_CVSID;
extern "C" {
typedef void (*signal_handler_type)(int);
}
static void set_signal_if_not_ignored(int sig, signal_handler_type handler)
{
#if defined(_WIN32)
// signal() emulation
daemon_signal(sig, handler);
#elif defined(HAVE_SIGACTION)
// SVr4, POSIX.1-2001, POSIX.1-2008
struct sigaction sa;
sa.sa_handler = SIG_DFL;
sigaction(sig, (struct sigaction *)0, &sa);
if (sa.sa_handler == SIG_IGN)
return;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = handler;
sa.sa_flags = SA_RESTART; // BSD signal() semantics
sigaction(sig, &sa, (struct sigaction *)0);
#elif defined(HAVE_SIGSET)
// SVr4, POSIX.1-2001, obsoleted in POSIX.1-2008
if (sigset(sig, handler) == SIG_IGN)
sigset(sig, SIG_IGN);
#else
// POSIX.1-2001, POSIX.1-2008, C89, C99, undefined semantics.
// Important: BSD semantics is required. Traditional signal()
// resets the handler to SIG_DFL after the first signal is caught.
if (signal(sig, handler) == SIG_IGN)
signal(sig, SIG_IGN);
#endif
}
using namespace smartmontools;
// smartd exit codes
#define EXIT_BADCMD 1 // command line did not parse
#define EXIT_BADCONF 2 // syntax error in config file
#define EXIT_STARTUP 3 // problem forking daemon
#define EXIT_PID 4 // problem creating pid file
#define EXIT_NOCONF 5 // config file does not exist
#define EXIT_READCONF 6 // config file exists but cannot be read
#define EXIT_NOMEM 8 // out of memory
#define EXIT_BADCODE 10 // internal error - should NEVER happen
#define EXIT_BADDEV 16 // we can't monitor this device
#define EXIT_NODEV 17 // no devices to monitor
#define EXIT_SIGNAL 254 // abort on signal
// command-line: 1=debug mode, 2=print presets
static unsigned char debugmode = 0;
// command-line: how long to sleep between checks
static constexpr int default_checktime = 1800;
static int checktime = default_checktime;
static int checktime_min = 0; // Minimum individual check time, 0 if none
// command-line: name of PID file (empty for no pid file)
static std::string pid_file;
// command-line: path prefix of persistent state file, empty if no persistence.
static std::string state_path_prefix
#ifdef SMARTMONTOOLS_SAVESTATES
= SMARTMONTOOLS_SAVESTATES
#endif
;
// command-line: path prefix of attribute log file, empty if no logs.
static std::string attrlog_path_prefix
#ifdef SMARTMONTOOLS_ATTRIBUTELOG
= SMARTMONTOOLS_ATTRIBUTELOG
#endif
;
// configuration file name
static const char * configfile;
// configuration file "name" if read from stdin
static const char * const configfile_stdin = "<stdin>";
// path of alternate configuration file
static std::string configfile_alt;
// warning script file
static std::string warning_script;
#ifdef HAVE_POSIX_API
// run warning script as non-privileged user
static bool warn_as_user;
static uid_t warn_uid;
static gid_t warn_gid;
static std::string warn_uname, warn_gname;
#elif defined(_WIN32)
// run warning script as restricted user
static bool warn_as_restr_user;
#endif
// command-line: when should we exit?
enum quit_t {
QUIT_NODEV, QUIT_NODEVSTARTUP, QUIT_NEVER, QUIT_ONECHECK,
QUIT_SHOWTESTS, QUIT_ERRORS
};
static quit_t quit = QUIT_NODEV;
static bool quit_nodev0 = false;
// command-line; this is the default syslog(3) log facility to use.
static int facility=LOG_DAEMON;
#ifndef _WIN32
// command-line: fork into background?
static bool do_fork=true;
#endif
// TODO: This smartctl only variable is also used in some os_*.cpp
unsigned char failuretest_permissive = 0;
// set to one if we catch a USR1 (check devices now)
static volatile int caughtsigUSR1=0;
#ifdef _WIN32
// set to one if we catch a USR2 (toggle debug mode)
static volatile int caughtsigUSR2=0;
#endif
// set to one if we catch a HUP (reload config file). In debug mode,
// set to two, if we catch INT (also reload config file).
static volatile int caughtsigHUP=0;
// set to signal value if we catch INT, QUIT, or TERM
static volatile int caughtsigEXIT=0;
// This function prints either to stdout or to the syslog as needed.
static void PrintOut(int priority, const char *fmt, ...)
__attribute_format_printf(2, 3);
#ifdef HAVE_LIBSYSTEMD
// systemd notify support
static bool notify_enabled = false;
static bool notify_ready = false;
static inline void notify_init()
{
if (!getenv("NOTIFY_SOCKET"))
return;
notify_enabled = true;
}
static inline bool notify_post_init()
{
if (!notify_enabled)
return true;
if (do_fork) {
PrintOut(LOG_CRIT, "Option -n (--no-fork) is required if 'Type=notify' is set.\n");
return false;
}
return true;
}
static inline void notify_extend_timeout()
{
if (!notify_enabled)
return;
if (notify_ready)
return;
const char * notify = "EXTEND_TIMEOUT_USEC=20000000"; // typical drive spinup time is 20s tops
if (debugmode) {
pout("sd_notify(0, \"%s\")\n", notify);
return;
}
sd_notify(0, notify);
}
static void notify_msg(const char * msg, bool ready = false)
{
if (!notify_enabled)
return;
if (debugmode) {
pout("sd_notify(0, \"%sSTATUS=%s\")\n", (ready ? "READY=1\\n" : ""), msg);
return;
}
sd_notifyf(0, "%sSTATUS=%s", (ready ? "READY=1\n" : ""), msg);
}
static void notify_check(int numdev)
{
if (!notify_enabled)
return;
char msg[32];
snprintf(msg, sizeof(msg), "Checking %d device%s ...",
numdev, (numdev != 1 ? "s" : ""));
notify_msg(msg);
}
static void notify_wait(time_t wakeuptime, int numdev)
{
if (!notify_enabled)
return;
char ts[16] = ""; struct tm tmbuf;
strftime(ts, sizeof(ts), "%H:%M:%S", time_to_tm_local(&tmbuf, wakeuptime));
char msg[64];
snprintf(msg, sizeof(msg), "Next check of %d device%s will start at %s",
numdev, (numdev != 1 ? "s" : ""), ts);
notify_msg(msg, !notify_ready); // first call notifies READY=1
notify_ready = true;
}
static void notify_exit(int status)
{
if (!notify_enabled)
return;
const char * msg;
switch (status) {
case 0: msg = "Exiting ..."; break;
case EXIT_BADCMD: msg = "Error in command line (see SYSLOG)"; break;
case EXIT_BADCONF: case EXIT_NOCONF:
case EXIT_READCONF: msg = "Error in config file (see SYSLOG)"; break;
case EXIT_BADDEV: msg = "Unable to register a device (see SYSLOG)"; break;
case EXIT_NODEV: msg = "No devices to monitor"; break;
default: msg = "Error (see SYSLOG)"; break;
}
notify_msg(msg);
}
#else // HAVE_LIBSYSTEMD
// No systemd notify support
static inline bool notify_post_init()
{
#ifdef __linux__
if (getenv("NOTIFY_SOCKET")) {
PrintOut(LOG_CRIT, "This version of smartd was build without 'Type=notify' support.\n");
return false;
}
#endif
return true;
}
static inline void notify_init() { }
static inline void notify_extend_timeout() { }
static inline void notify_msg(const char *) { }
static inline void notify_check(int) { }
static inline void notify_wait(time_t, int) { }
static inline void notify_exit(int) { }
#endif // HAVE_LIBSYSTEMD
// Attribute monitoring flags.
// See monitor_attr_flags below.
enum {
MONITOR_IGN_FAILUSE = 0x01,
MONITOR_IGNORE = 0x02,
MONITOR_RAW_PRINT = 0x04,
MONITOR_RAW = 0x08,
MONITOR_AS_CRIT = 0x10,
MONITOR_RAW_AS_CRIT = 0x20,
};
// Array of flags for each attribute.
class attribute_flags
{
public:
bool is_set(int id, unsigned char flag) const
{ return (0 < id && id < (int)sizeof(m_flags) && (m_flags[id] & flag)); }
void set(int id, unsigned char flags)
{
if (0 < id && id < (int)sizeof(m_flags))
m_flags[id] |= flags;
}
private:
unsigned char m_flags[256]{};
};
/// Configuration data for a device. Read from smartd.conf.
/// Supports copy & assignment and is compatible with STL containers.
struct dev_config
{
int lineno{}; // Line number of entry in file
std::string name; // Device name (with optional extra info)
std::string dev_name; // Device name (plain, for SMARTD_DEVICE variable)
std::string dev_type; // Device type argument from -d directive, empty if none
std::string dev_idinfo; // Device identify info for warning emails
std::string state_file; // Path of the persistent state file, empty if none
std::string attrlog_file; // Path of the persistent attrlog file, empty if none
int checktime{}; // Individual check interval, 0 if none
bool ignore{}; // Ignore this entry
bool id_is_unique{}; // True if dev_idinfo is unique (includes S/N or WWN)
bool smartcheck{}; // Check SMART status
bool usagefailed{}; // Check for failed Usage Attributes
bool prefail{}; // Track changes in Prefail Attributes
bool usage{}; // Track changes in Usage Attributes
bool selftest{}; // Monitor number of selftest errors
bool errorlog{}; // Monitor number of ATA errors
bool xerrorlog{}; // Monitor number of ATA errors (Extended Comprehensive error log)
bool offlinests{}; // Monitor changes in offline data collection status
bool offlinests_ns{}; // Disable auto standby if in progress
bool selfteststs{}; // Monitor changes in self-test execution status
bool selfteststs_ns{}; // Disable auto standby if in progress
bool permissive{}; // Ignore failed SMART commands
char autosave{}; // 1=disable, 2=enable Autosave Attributes
char autoofflinetest{}; // 1=disable, 2=enable Auto Offline Test
firmwarebug_defs firmwarebugs; // -F directives from drivedb or smartd.conf
bool ignorepresets{}; // Ignore database of -v options
bool showpresets{}; // Show database entry for this device
bool removable{}; // Device may disappear (not be present)
char powermode{}; // skip check, if disk in idle or standby mode
bool powerquiet{}; // skip powermode 'skipping checks' message
int powerskipmax{}; // how many times can be check skipped
unsigned char tempdiff{}; // Track Temperature changes >= this limit
unsigned char tempinfo{}, tempcrit{}; // Track Temperatures >= these limits as LOG_INFO, LOG_CRIT+mail
regular_expression test_regex; // Regex for scheduled testing
unsigned test_offset_factor{}; // Factor for staggering of scheduled tests
// Configuration of email warning messages
std::string emailcmdline; // script to execute, empty if no messages
std::string emailaddress; // email address, or empty
unsigned char emailfreq{}; // Emails once (1) daily (2) diminishing (3)
bool emailtest{}; // Send test email?
// ATA ONLY
int dev_rpm{}; // rotation rate, 0 = unknown, 1 = SSD, >1 = HDD
int set_aam{}; // disable(-1), enable(1..255->0..254) Automatic Acoustic Management
int set_apm{}; // disable(-1), enable(2..255->1..254) Advanced Power Management
int set_lookahead{}; // disable(-1), enable(1) read look-ahead
int set_standby{}; // set(1..255->0..254) standby timer
bool set_security_freeze{}; // Freeze ATA security
int set_wcache{}; // disable(-1), enable(1) write cache
int set_dsn{}; // disable(0x2), enable(0x1) DSN
bool sct_erc_set{}; // set SCT ERC to:
unsigned short sct_erc_readtime{}; // ERC read time (deciseconds)
unsigned short sct_erc_writetime{}; // ERC write time (deciseconds)
unsigned char curr_pending_id{}; // ID of current pending sector count, 0 if none
unsigned char offl_pending_id{}; // ID of offline uncorrectable sector count, 0 if none
bool curr_pending_incr{}, offl_pending_incr{}; // True if current/offline pending values increase
bool curr_pending_set{}, offl_pending_set{}; // True if '-C', '-U' set in smartd.conf
attribute_flags monitor_attr_flags; // MONITOR_* flags for each attribute
ata_vendor_attr_defs attribute_defs; // -v options
};
// Number of allowed mail message types
static const int SMARTD_NMAIL = 13;
// Type for '-M test' mails (state not persistent)
static const int MAILTYPE_TEST = 0;
// TODO: Add const or enum for all mail types.
struct mailinfo {
int logged{}; // number of times an email has been sent
time_t firstsent{}; // time first email was sent, as defined by time(2)
time_t lastsent{}; // time last email was sent, as defined by time(2)
};
/// Persistent state data for a device.
struct persistent_dev_state
{
unsigned char tempmin{}, tempmax{}; // Min/Max Temperatures
unsigned char selflogcount{}; // total number of self-test errors
unsigned short selfloghour{}; // lifetime hours of last self-test error
time_t scheduled_test_next_check{}; // Time of next check for scheduled self-tests
uint64_t selective_test_last_start{}; // Start LBA of last scheduled selective self-test
uint64_t selective_test_last_end{}; // End LBA of last scheduled selective self-test
mailinfo maillog[SMARTD_NMAIL]; // log info on when mail sent
// ATA ONLY
int ataerrorcount{}; // Total number of ATA errors
// Persistent part of ata_smart_values:
struct ata_attribute {
unsigned char id{};
unsigned char val{};
unsigned char worst{}; // Byte needed for 'raw64' attribute only.
uint64_t raw{};
unsigned char resvd{};
};
ata_attribute ata_attributes[NUMBER_ATA_SMART_ATTRIBUTES];
// SCSI ONLY
struct scsi_error_counter_t {
struct scsiErrorCounter errCounter{};
unsigned char found{};
};
scsi_error_counter_t scsi_error_counters[3];
struct scsi_nonmedium_error_t {
struct scsiNonMediumError nme{};
unsigned char found{};
};
scsi_nonmedium_error_t scsi_nonmedium_error;
// NVMe only
uint64_t nvme_err_log_entries{};
};
/// Non-persistent state data for a device.
struct temp_dev_state
{
bool must_write{}; // true if persistent part should be written
bool skip{}; // skip during next check cycle
time_t wakeuptime{}; // next wakeup time, 0 if unknown or global
bool not_cap_offline{}; // true == not capable of offline testing
bool not_cap_conveyance{};
bool not_cap_short{};
bool not_cap_long{};
bool not_cap_selective{};
unsigned char temperature{}; // last recorded Temperature (in Celsius)
time_t tempmin_delay{}; // time where Min Temperature tracking will start
bool removed{}; // true if open() failed for removable device
bool powermodefail{}; // true if power mode check failed
int powerskipcnt{}; // Number of checks skipped due to idle or standby mode
int lastpowermodeskipped{}; // the last power mode that was skipped
bool attrlog_dirty{}; // true if persistent part has new attr values that
// need to be written to attrlog
// SCSI ONLY
// TODO: change to bool
unsigned char SmartPageSupported{}; // has log sense IE page (0x2f)
unsigned char TempPageSupported{}; // has log sense temperature page (0xd)
unsigned char ReadECounterPageSupported{};
unsigned char WriteECounterPageSupported{};
unsigned char VerifyECounterPageSupported{};
unsigned char NonMediumErrorPageSupported{};
unsigned char SuppressReport{}; // minimize nuisance reports
unsigned char modese_len{}; // mode sense/select cmd len: 0 (don't
// know yet) 6 or 10
// ATA ONLY
uint64_t num_sectors{}; // Number of sectors
ata_smart_values smartval{}; // SMART data
ata_smart_thresholds_pvt smartthres{}; // SMART thresholds
bool offline_started{}; // true if offline data collection was started
bool selftest_started{}; // true if self-test was started
};
/// Runtime state data for a device.
struct dev_state
: public persistent_dev_state,
public temp_dev_state
{
void update_persistent_state();
void update_temp_state();
};
/// Container for configuration info for each device.
typedef std::vector<dev_config> dev_config_vector;
/// Container for state info for each device.
typedef std::vector<dev_state> dev_state_vector;
// Copy ATA attributes to persistent state.
void dev_state::update_persistent_state()
{
for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
const ata_smart_attribute & ta = smartval.vendor_attributes[i];
ata_attribute & pa = ata_attributes[i];
pa.id = ta.id;
if (ta.id == 0) {
pa.val = pa.worst = 0; pa.raw = 0;
continue;
}
pa.val = ta.current;
pa.worst = ta.worst;
pa.raw = ta.raw[0]
| ( ta.raw[1] << 8)
| ( ta.raw[2] << 16)
| ((uint64_t)ta.raw[3] << 24)
| ((uint64_t)ta.raw[4] << 32)
| ((uint64_t)ta.raw[5] << 40);
pa.resvd = ta.reserv;
}
}
// Copy ATA from persistent to temp state.
void dev_state::update_temp_state()
{
for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
const ata_attribute & pa = ata_attributes[i];
ata_smart_attribute & ta = smartval.vendor_attributes[i];
ta.id = pa.id;
if (pa.id == 0) {
ta.current = ta.worst = 0;
memset(ta.raw, 0, sizeof(ta.raw));
continue;
}
ta.current = pa.val;
ta.worst = pa.worst;
ta.raw[0] = (unsigned char) pa.raw;
ta.raw[1] = (unsigned char)(pa.raw >> 8);
ta.raw[2] = (unsigned char)(pa.raw >> 16);
ta.raw[3] = (unsigned char)(pa.raw >> 24);
ta.raw[4] = (unsigned char)(pa.raw >> 32);
ta.raw[5] = (unsigned char)(pa.raw >> 40);
ta.reserv = pa.resvd;
}
}
// Parse a line from a state file.
static bool parse_dev_state_line(const char * line, persistent_dev_state & state)
{
static const regular_expression regex(
"^ *"
"((temperature-min)" // (1 (2)
"|(temperature-max)" // (3)
"|(self-test-errors)" // (4)
"|(self-test-last-err-hour)" // (5)
"|(scheduled-test-next-check)" // (6)
"|(selective-test-last-start)" // (7)
"|(selective-test-last-end)" // (8)
"|(ata-error-count)" // (9)
"|(mail\\.([0-9]+)\\." // (10 (11)
"((count)" // (12 (13)
"|(first-sent-time)" // (14)
"|(last-sent-time)" // (15)
")" // 12)
")" // 10)
"|(ata-smart-attribute\\.([0-9]+)\\." // (16 (17)
"((id)" // (18 (19)
"|(val)" // (20)
"|(worst)" // (21)
"|(raw)" // (22)
"|(resvd)" // (23)
")" // 18)
")" // 16)
"|(nvme-err-log-entries)" // (24)
")" // 1)
" *= *([0-9]+)[ \n]*$" // (25)
);
const int nmatch = 1+25;
regular_expression::match_range match[nmatch];
if (!regex.execute(line, nmatch, match))
return false;
if (match[nmatch-1].rm_so < 0)
return false;
uint64_t val = strtoull(line + match[nmatch-1].rm_so, (char **)0, 10);
int m = 1;
if (match[++m].rm_so >= 0)
state.tempmin = (unsigned char)val;
else if (match[++m].rm_so >= 0)
state.tempmax = (unsigned char)val;
else if (match[++m].rm_so >= 0)
state.selflogcount = (unsigned char)val;
else if (match[++m].rm_so >= 0)
state.selfloghour = (unsigned short)val;
else if (match[++m].rm_so >= 0)
state.scheduled_test_next_check = (time_t)val;
else if (match[++m].rm_so >= 0)
state.selective_test_last_start = val;
else if (match[++m].rm_so >= 0)
state.selective_test_last_end = val;
else if (match[++m].rm_so >= 0)
state.ataerrorcount = (int)val;
else if (match[m+=2].rm_so >= 0) {
int i = atoi(line+match[m].rm_so);
if (!(0 <= i && i < SMARTD_NMAIL))
return false;
if (i == MAILTYPE_TEST) // Don't suppress test mails
return true;
if (match[m+=2].rm_so >= 0)
state.maillog[i].logged = (int)val;
else if (match[++m].rm_so >= 0)
state.maillog[i].firstsent = (time_t)val;
else if (match[++m].rm_so >= 0)
state.maillog[i].lastsent = (time_t)val;
else
return false;
}
else if (match[m+=5+1].rm_so >= 0) {
int i = atoi(line+match[m].rm_so);
if (!(0 <= i && i < NUMBER_ATA_SMART_ATTRIBUTES))
return false;
if (match[m+=2].rm_so >= 0)
state.ata_attributes[i].id = (unsigned char)val;
else if (match[++m].rm_so >= 0)
state.ata_attributes[i].val = (unsigned char)val;
else if (match[++m].rm_so >= 0)
state.ata_attributes[i].worst = (unsigned char)val;
else if (match[++m].rm_so >= 0)
state.ata_attributes[i].raw = val;
else if (match[++m].rm_so >= 0)
state.ata_attributes[i].resvd = (unsigned char)val;
else
return false;
}
else if (match[m+7].rm_so >= 0)
state.nvme_err_log_entries = val;
else
return false;
return true;
}
// Read a state file.
static bool read_dev_state(const char * path, persistent_dev_state & state)
{
stdio_file f(path, "r");
if (!f) {
if (errno != ENOENT)
pout("Cannot read state file \"%s\"\n", path);
return false;
}
#ifdef __CYGWIN__
setmode(fileno(f), O_TEXT); // Allow files with \r\n
#endif
persistent_dev_state new_state;
int good = 0, bad = 0;
char line[256];
while (fgets(line, sizeof(line), f)) {
const char * s = line + strspn(line, " \t");
if (!*s || *s == '#')
continue;
if (!parse_dev_state_line(line, new_state))
bad++;
else
good++;
}
if (bad) {
if (!good) {
pout("%s: format error\n", path);
return false;
}
pout("%s: %d invalid line(s) ignored\n", path, bad);
}
// This sets the values missing in the file to 0.
state = new_state;
return true;
}
static void write_dev_state_line(FILE * f, const char * name, uint64_t val)
{
if (val)
fprintf(f, "%s = %" PRIu64 "\n", name, val);
}
static void write_dev_state_line(FILE * f, const char * name1, int id, const char * name2, uint64_t val)
{
if (val)
fprintf(f, "%s.%d.%s = %" PRIu64 "\n", name1, id, name2, val);
}
// Write a state file
static bool write_dev_state(const char * path, const persistent_dev_state & state)
{
// Rename old "file" to "file~"
std::string pathbak = path; pathbak += '~';
unlink(pathbak.c_str());
rename(path, pathbak.c_str());
stdio_file f(path, "w");
if (!f) {
pout("Cannot create state file \"%s\"\n", path);
return false;
}
fprintf(f, "# smartd state file\n");
write_dev_state_line(f, "temperature-min", state.tempmin);
write_dev_state_line(f, "temperature-max", state.tempmax);
write_dev_state_line(f, "self-test-errors", state.selflogcount);
write_dev_state_line(f, "self-test-last-err-hour", state.selfloghour);
write_dev_state_line(f, "scheduled-test-next-check", state.scheduled_test_next_check);
write_dev_state_line(f, "selective-test-last-start", state.selective_test_last_start);
write_dev_state_line(f, "selective-test-last-end", state.selective_test_last_end);
for (int i = 0; i < SMARTD_NMAIL; i++) {
if (i == MAILTYPE_TEST) // Don't suppress test mails
continue;
const mailinfo & mi = state.maillog[i];
if (!mi.logged)
continue;
write_dev_state_line(f, "mail", i, "count", mi.logged);
write_dev_state_line(f, "mail", i, "first-sent-time", mi.firstsent);
write_dev_state_line(f, "mail", i, "last-sent-time", mi.lastsent);
}
// ATA ONLY
write_dev_state_line(f, "ata-error-count", state.ataerrorcount);
for (int i = 0; i < NUMBER_ATA_SMART_ATTRIBUTES; i++) {
const auto & pa = state.ata_attributes[i];
if (!pa.id)
continue;
write_dev_state_line(f, "ata-smart-attribute", i, "id", pa.id);
write_dev_state_line(f, "ata-smart-attribute", i, "val", pa.val);
write_dev_state_line(f, "ata-smart-attribute", i, "worst", pa.worst);
write_dev_state_line(f, "ata-smart-attribute", i, "raw", pa.raw);
write_dev_state_line(f, "ata-smart-attribute", i, "resvd", pa.resvd);
}
// NVMe only
write_dev_state_line(f, "nvme-err-log-entries", state.nvme_err_log_entries);
return true;
}
// Write to the attrlog file
static bool write_dev_attrlog(const char * path, const dev_state & state)
{
stdio_file f(path, "a");
if (!f) {
pout("Cannot create attribute log file \"%s\"\n", path);
return false;
}
time_t now = time(nullptr);
struct tm tmbuf, * tms = time_to_tm_local(&tmbuf, now);
fprintf(f, "%d-%02d-%02d %02d:%02d:%02d;",
1900+tms->tm_year, 1+tms->tm_mon, tms->tm_mday,
tms->tm_hour, tms->tm_min, tms->tm_sec);
// ATA ONLY
for (const auto & pa : state.ata_attributes) {
if (!pa.id)
continue;
fprintf(f, "\t%d;%d;%" PRIu64 ";", pa.id, pa.val, pa.raw);
}
// SCSI ONLY
const struct scsiErrorCounter * ecp;
const char * pageNames[3] = {"read", "write", "verify"};
for (int k = 0; k < 3; ++k) {
if ( !state.scsi_error_counters[k].found ) continue;
ecp = &state.scsi_error_counters[k].errCounter;
fprintf(f, "\t%s-corr-by-ecc-fast;%" PRIu64 ";"
"\t%s-corr-by-ecc-delayed;%" PRIu64 ";"
"\t%s-corr-by-retry;%" PRIu64 ";"
"\t%s-total-err-corrected;%" PRIu64 ";"
"\t%s-corr-algorithm-invocations;%" PRIu64 ";"
"\t%s-gb-processed;%.3f;"
"\t%s-total-unc-errors;%" PRIu64 ";",
pageNames[k], ecp->counter[0],
pageNames[k], ecp->counter[1],
pageNames[k], ecp->counter[2],
pageNames[k], ecp->counter[3],
pageNames[k], ecp->counter[4],
pageNames[k], (ecp->counter[5] / 1000000000.0),
pageNames[k], ecp->counter[6]);
}
if(state.scsi_nonmedium_error.found && state.scsi_nonmedium_error.nme.gotPC0) {
fprintf(f, "\tnon-medium-errors;%" PRIu64 ";", state.scsi_nonmedium_error.nme.counterPC0);
}
// write SCSI current temperature if it is monitored
if (state.temperature)
fprintf(f, "\ttemperature;%d;", state.temperature);
// end of line
fprintf(f, "\n");
return true;
}
// Write all state files. If write_always is false, don't write
// unless must_write is set.
static void write_all_dev_states(const dev_config_vector & configs,
dev_state_vector & states,
bool write_always = true)
{
for (unsigned i = 0; i < states.size(); i++) {
const dev_config & cfg = configs.at(i);
if (cfg.state_file.empty())
continue;
dev_state & state = states[i];
if (!write_always && !state.must_write)
continue;
if (!write_dev_state(cfg.state_file.c_str(), state))
continue;
state.must_write = false;
if (write_always || debugmode)
PrintOut(LOG_INFO, "Device: %s, state written to %s\n",
cfg.name.c_str(), cfg.state_file.c_str());
}
}
// Write to all attrlog files
static void write_all_dev_attrlogs(const dev_config_vector & configs,
dev_state_vector & states)
{
for (unsigned i = 0; i < states.size(); i++) {
const dev_config & cfg = configs.at(i);
if (cfg.attrlog_file.empty())
continue;
dev_state & state = states[i];
if (state.attrlog_dirty) {
write_dev_attrlog(cfg.attrlog_file.c_str(), state);
state.attrlog_dirty = false;
}
}
}
extern "C" { // signal handlers require C-linkage
// Note if we catch a SIGUSR1
static void USR1handler(int sig)
{
if (SIGUSR1==sig)
caughtsigUSR1=1;
return;
}
#ifdef _WIN32
// Note if we catch a SIGUSR2
static void USR2handler(int sig)
{
if (SIGUSR2==sig)
caughtsigUSR2=1;
return;
}
#endif
// Note if we catch a HUP (or INT in debug mode)
static void HUPhandler(int sig)
{
if (sig==SIGHUP)
caughtsigHUP=1;
else
caughtsigHUP=2;
return;
}
// signal handler for TERM, QUIT, and INT (if not in debug mode)
static void sighandler(int sig)
{
if (!caughtsigEXIT)
caughtsigEXIT=sig;
return;
}
} // extern "C"
#ifdef HAVE_LIBCAP_NG
// capabilities(7) support
static int capabilities_mode /* = 0 */; // 1=enabled, 2=mail
static void capabilities_drop_now()
{
if (!capabilities_mode)
return;
capng_clear(CAPNG_SELECT_BOTH);
capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
CAP_SYS_ADMIN, CAP_MKNOD, CAP_SYS_RAWIO, -1);
if (warn_as_user && (warn_uid || warn_gid)) {
// For popen_as_ugid()
capng_updatev(CAPNG_ADD, (capng_type_t)(CAPNG_EFFECTIVE|CAPNG_PERMITTED),
CAP_SETGID, CAP_SETUID, -1);
}
if (capabilities_mode > 1) {
// For exim MTA
capng_updatev(CAPNG_ADD, CAPNG_BOUNDING_SET,
CAP_SETGID, CAP_SETUID, CAP_CHOWN, CAP_FOWNER, CAP_DAC_OVERRIDE, -1);
}
capng_apply(CAPNG_SELECT_BOTH);
}
static void capabilities_log_error_hint()
{
if (!capabilities_mode)
return;
PrintOut(LOG_INFO, "If mail notification does not work with '--capabilities%s\n",
(capabilities_mode == 1 ? "', try '--capabilities=mail'"
: "=mail', please inform " PACKAGE_BUGREPORT));
}
#else // HAVE_LIBCAP_NG
// No capabilities(7) support
static inline void capabilities_drop_now() { }
static inline void capabilities_log_error_hint() { }
#endif // HAVE_LIBCAP_NG
// a replacement for setenv() which is not available on all platforms.
// Note that the string passed to putenv must not be freed or made
// invalid, since a pointer to it is kept by putenv(). This means that
// it must either be a static buffer or allocated off the heap. The
// string can be freed if the environment variable is redefined via
// another call to putenv(). There is no portable way to unset a variable
// with putenv(). So we manage the buffer in a static object.
// Using setenv() if available is not considered because some
// implementations may produce memory leaks.
class env_buffer
{
public:
env_buffer() = default;
env_buffer(const env_buffer &) = delete;
void operator=(const env_buffer &) = delete;
void set(const char * name, const char * value);
private:
char * m_buf = nullptr;
};
void env_buffer::set(const char * name, const char * value)
{
int size = strlen(name) + 1 + strlen(value) + 1;
char * newbuf = new char[size];
snprintf(newbuf, size, "%s=%s", name, value);
if (putenv(newbuf))
throw std::runtime_error("putenv() failed");
// This assumes that the same NAME is passed on each call
delete [] m_buf;
m_buf = newbuf;
}
#define EBUFLEN 1024
static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
__attribute_format_printf(4, 5);
// If either address or executable path is non-null then send and log
// a warning email, or execute executable
static void MailWarning(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
{
static const char * const whichfail[] = {
"EmailTest", // 0
"Health", // 1
"Usage", // 2
"SelfTest", // 3
"ErrorCount", // 4
"FailedHealthCheck", // 5
"FailedReadSmartData", // 6
"FailedReadSmartErrorLog", // 7
"FailedReadSmartSelfTestLog", // 8
"FailedOpenDevice", // 9
"CurrentPendingSector", // 10
"OfflineUncorrectableSector", // 11
"Temperature" // 12
};
// See if user wants us to send mail
if (cfg.emailaddress.empty() && cfg.emailcmdline.empty())
return;
std::string address = cfg.emailaddress;
const char * executable = cfg.emailcmdline.c_str();
// which type of mail are we sending?
mailinfo * mail=(state.maillog)+which;
// checks for sanity
if (cfg.emailfreq<1 || cfg.emailfreq>3) {
PrintOut(LOG_CRIT,"internal error in MailWarning(): cfg.mailwarn->emailfreq=%d\n",cfg.emailfreq);
return;
}
if (which<0 || which>=SMARTD_NMAIL || sizeof(whichfail)!=SMARTD_NMAIL*sizeof(char *)) {
PrintOut(LOG_CRIT,"Contact " PACKAGE_BUGREPORT "; internal error in MailWarning(): which=%d, size=%d\n",
which, (int)sizeof(whichfail));
return;
}
// Return if a single warning mail has been sent.
if ((cfg.emailfreq==1) && mail->logged)
return;
// Return if this is an email test and one has already been sent.
if (which == 0 && mail->logged)
return;
// To decide if to send mail, we need to know what time it is.
time_t epoch = time(nullptr);
// Return if less than one day has gone by
const int day = 24*3600;
if (cfg.emailfreq==2 && mail->logged && epoch<(mail->lastsent+day))
return;
// Return if less than 2^(logged-1) days have gone by
if (cfg.emailfreq==3 && mail->logged) {
int days = 0x01 << (mail->logged - 1);
days*=day;
if (epoch<(mail->lastsent+days))
return;
}
// record the time of this mail message, and the first mail message
if (!mail->logged)
mail->firstsent=epoch;
mail->lastsent=epoch;
// print warning string into message
// Note: Message length may reach ~300 characters as device names may be
// very long on certain platforms (macOS ~230 characters).
// Message length must not exceed email line length limit, see RFC 5322:
// "... MUST be no more than 998 characters, ... excluding the CRLF."
char message[512];
va_list ap;
va_start(ap, fmt);
vsnprintf(message, sizeof(message), fmt, ap);
va_end(ap);
// replace commas by spaces to separate recipients
std::replace(address.begin(), address.end(), ',', ' ');
// Export information in environment variables that will be useful
// for user scripts
static env_buffer env[13];
env[0].set("SMARTD_MAILER", executable);
env[1].set("SMARTD_MESSAGE", message);
char dates[DATEANDEPOCHLEN];
snprintf(dates, sizeof(dates), "%d", mail->logged);
env[2].set("SMARTD_PREVCNT", dates);
dateandtimezoneepoch(dates, mail->firstsent);
env[3].set("SMARTD_TFIRST", dates);
snprintf(dates, DATEANDEPOCHLEN,"%d", (int)mail->firstsent);
env[4].set("SMARTD_TFIRSTEPOCH", dates);
env[5].set("SMARTD_FAILTYPE", whichfail[which]);
env[6].set("SMARTD_ADDRESS", address.c_str());
env[7].set("SMARTD_DEVICESTRING", cfg.name.c_str());
// Allow 'smartctl ... -d $SMARTD_DEVICETYPE $SMARTD_DEVICE'
env[8].set("SMARTD_DEVICETYPE",
(!cfg.dev_type.empty() ? cfg.dev_type.c_str() : "auto"));
env[9].set("SMARTD_DEVICE", cfg.dev_name.c_str());
env[10].set("SMARTD_DEVICEINFO", cfg.dev_idinfo.c_str());
dates[0] = 0;
if (which) switch (cfg.emailfreq) {
case 2: dates[0] = '1'; dates[1] = 0; break;
case 3: snprintf(dates, sizeof(dates), "%d", (0x01)<<mail->logged);
}
env[11].set("SMARTD_NEXTDAYS", dates);
// Avoid false positive recursion detection by smartd_warning.{sh,cmd}
env[12].set("SMARTD_SUBJECT", "");
// now construct a command to send this as EMAIL
if (!*executable)
executable = "<mail>";
const char * newadd = (!address.empty()? address.c_str() : "<nomailer>");
const char * newwarn = (which? "Warning via" : "Test of");
char command[256];
#ifdef _WIN32
// Path may contain spaces
snprintf(command, sizeof(command), "\"%s\" 2>&1", warning_script.c_str());
#else
snprintf(command, sizeof(command), "%s 2>&1", warning_script.c_str());
#endif
// tell SYSLOG what we are about to do...
PrintOut(LOG_INFO,"%s %s to %s%s ...\n",
(which ? "Sending warning via" : "Executing test of"), executable, newadd,
(
#ifdef HAVE_POSIX_API
warn_as_user ?
strprintf(" (uid=%u(%s) gid=%u(%s))",
(unsigned)warn_uid, warn_uname.c_str(),
(unsigned)warn_gid, warn_gname.c_str() ).c_str() :
#elif defined(_WIN32)
warn_as_restr_user ? " (restricted user)" :
#endif
""
)
);
// issue the command to send mail or to run the user's executable
errno=0;
FILE * pfp;
#ifdef HAVE_POSIX_API
if (warn_as_user) {
pfp = popen_as_ugid(command, "r", warn_uid, warn_gid);
} else
#endif
{
#ifdef _WIN32
pfp = popen_as_restr_user(command, "r", warn_as_restr_user);
#else
pfp = popen(command, "r");
#endif
}
if (!pfp)
// failed to popen() mail process
PrintOut(LOG_CRIT,"%s %s to %s: failed (fork or pipe failed, or no memory) %s\n",
newwarn, executable, newadd, errno?strerror(errno):"");
else {
// pipe succeeded!
int len;
char buffer[EBUFLEN];
// if unexpected output on stdout/stderr, null terminate, print, and flush
if ((len=fread(buffer, 1, EBUFLEN, pfp))) {
int count=0;
int newlen = len<EBUFLEN ? len : EBUFLEN-1;
buffer[newlen]='\0';
PrintOut(LOG_CRIT,"%s %s to %s produced unexpected output (%s%d bytes) to STDOUT/STDERR: \n%s\n",
newwarn, executable, newadd, len!=newlen?"here truncated to ":"", newlen, buffer);
// flush pipe if needed
while (fread(buffer, 1, EBUFLEN, pfp) && count<EBUFLEN)
count++;
// tell user that pipe was flushed, or that something is really wrong
if (count && count<EBUFLEN)
PrintOut(LOG_CRIT,"%s %s to %s: flushed remaining STDOUT/STDERR\n",
newwarn, executable, newadd);
else if (count)
PrintOut(LOG_CRIT,"%s %s to %s: more than 1 MB STDOUT/STDERR flushed, breaking pipe\n",
newwarn, executable, newadd);
}
// if something went wrong with mail process, print warning
errno=0;
int status;
#ifdef HAVE_POSIX_API
if (warn_as_user) {
status = pclose_as_ugid(pfp);
} else
#endif
{
status = pclose(pfp);
}
if (status == -1)
PrintOut(LOG_CRIT,"%s %s to %s: pclose(3) failed %s\n", newwarn, executable, newadd,
errno?strerror(errno):"");
else {
// mail process apparently succeeded. Check and report exit status
if (WIFEXITED(status)) {
// exited 'normally' (but perhaps with nonzero status)
int status8 = WEXITSTATUS(status);
if (status8>128)
PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d) perhaps caught signal %d [%s]\n",
newwarn, executable, newadd, status, status8, status8-128, strsignal(status8-128));
else if (status8) {
PrintOut(LOG_CRIT,"%s %s to %s: failed (32-bit/8-bit exit status: %d/%d)\n",
newwarn, executable, newadd, status, status8);
capabilities_log_error_hint();
}
else
PrintOut(LOG_INFO,"%s %s to %s: successful\n", newwarn, executable, newadd);
}
if (WIFSIGNALED(status))
PrintOut(LOG_INFO,"%s %s to %s: exited because of uncaught signal %d [%s]\n",
newwarn, executable, newadd, WTERMSIG(status), strsignal(WTERMSIG(status)));
// this branch is probably not possible. If subprocess is
// stopped then pclose() should not return.
if (WIFSTOPPED(status))
PrintOut(LOG_CRIT,"%s %s to %s: process STOPPED because it caught signal %d [%s]\n",
newwarn, executable, newadd, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
}
}
// increment mail sent counter
mail->logged++;
}
static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
__attribute_format_printf(4, 5);
static void reset_warning_mail(const dev_config & cfg, dev_state & state, int which, const char *fmt, ...)
{
if (!(0 <= which && which < SMARTD_NMAIL))
return;
// Return if no mail sent yet
mailinfo & mi = state.maillog[which];
if (!mi.logged)
return;
// Format & print message
char msg[256];
va_list ap;
va_start(ap, fmt);
vsnprintf(msg, sizeof(msg), fmt, ap);
va_end(ap);
PrintOut(LOG_INFO, "Device: %s, %s, warning condition reset after %d email%s\n", cfg.name.c_str(),
msg, mi.logged, (mi.logged==1 ? "" : "s"));
// Clear mail counter and timestamps
mi = mailinfo();
state.must_write = true;
}
#ifndef _WIN32
// Output multiple lines via separate syslog(3) calls.
__attribute_format_printf(2, 0)
static void vsyslog_lines(int priority, const char * fmt, va_list ap)
{
char buf[512+EBUFLEN]; // enough space for exec cmd output in MailWarning()
vsnprintf(buf, sizeof(buf), fmt, ap);
for (char * p = buf, * q; p && *p; p = q) {
if ((q = strchr(p, '\n')))
*q++ = 0;
if (*p)
syslog(priority, "%s\n", p);
}
}
#else // _WIN32
// os_win32/syslog_win32.cpp supports multiple lines.
#define vsyslog_lines vsyslog
#endif // _WIN32
// Printing function for watching ataprint commands, or losing them
// [From GLIBC Manual: Since the prototype doesn't specify types for
// optional arguments, in a call to a variadic function the default
// argument promotions are performed on the optional argument
// values. This means the objects of type char or short int (whether
// signed or not) are promoted to either int or unsigned int, as
// appropriate.]
void pout(const char *fmt, ...){
va_list ap;
// get the correct time in syslog()
FixGlibcTimeZoneBug();
// initialize variable argument list
va_start(ap,fmt);
// in debugmode==1 mode we will print the output from the ataprint.o functions!
if (debugmode && debugmode != 2) {
FILE * f = stdout;
#ifdef _WIN32
if (facility == LOG_LOCAL1) // logging to stdout
f = stderr;
#endif
vfprintf(f, fmt, ap);
fflush(f);
}
// in debugmode==2 mode we print output from knowndrives.o functions
else if (debugmode==2 || ata_debugmode || scsi_debugmode) {
openlog("smartd", LOG_PID, facility);
vsyslog_lines(LOG_INFO, fmt, ap);
closelog();
}
va_end(ap);
return;
}
// This function prints either to stdout or to the syslog as needed.
static void PrintOut(int priority, const char *fmt, ...){
va_list ap;
// get the correct time in syslog()
FixGlibcTimeZoneBug();
// initialize variable argument list
va_start(ap,fmt);
if (debugmode) {
FILE * f = stdout;
#ifdef _WIN32
if (facility == LOG_LOCAL1) // logging to stdout
f = stderr;
#endif
vfprintf(f, fmt, ap);
fflush(f);
}
else {
openlog("smartd", LOG_PID, facility);
vsyslog_lines(priority, fmt, ap);
closelog();
}
va_end(ap);
return;
}
// Used to warn users about invalid checksums. Called from atacmds.cpp.
void checksumwarning(const char * string)
{
pout("Warning! %s error: invalid SMART checksum.\n", string);
}
#ifndef _WIN32
// Wait for the pid file to show up, this makes sure a calling program knows
// that the daemon is really up and running and has a pid to kill it
static bool WaitForPidFile()
{
int waited, max_wait = 10;
struct stat stat_buf;
if (pid_file.empty() || debugmode)
return true;
for(waited = 0; waited < max_wait; ++waited) {
if (!stat(pid_file.c_str(), &stat_buf)) {
return true;
} else
sleep(1);
}
return false;
}
#endif // _WIN32
// Forks new process if needed, closes ALL file descriptors,
// redirects stdin, stdout, and stderr. Not quite daemon().
// See https://www.linuxjournal.com/article/2335
// for a good description of why we do things this way.
static int daemon_init()
{
#ifndef _WIN32
// flush all buffered streams. Else we might get two copies of open
// streams since both parent and child get copies of the buffers.
fflush(nullptr);
if (do_fork) {
pid_t pid;
if ((pid=fork()) < 0) {
// unable to fork!
PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
return EXIT_STARTUP;
}
if (pid) {
// we are the parent process, wait for pid file, then exit cleanly
if(!WaitForPidFile()) {
PrintOut(LOG_CRIT,"PID file %s didn't show up!\n", pid_file.c_str());
return EXIT_STARTUP;
}
return 0;
}
// from here on, we are the child process.
setsid();
// Fork one more time to avoid any possibility of having terminals
if ((pid=fork()) < 0) {
// unable to fork!
PrintOut(LOG_CRIT,"smartd unable to fork daemon process!\n");
return EXIT_STARTUP;
}
if (pid)
// we are the parent process -- exit cleanly
return 0;
// Now we are the child's child...
}
// close any open file descriptors
for (int i = sysconf(_SC_OPEN_MAX); --i >= 0; )
close(i);
// redirect any IO attempts to /dev/null and change to root directory
int fd = open("/dev/null", O_RDWR);
if (!(fd == 0 && dup(fd) == 1 && dup(fd) == 2 && !chdir("/"))) {
PrintOut(LOG_CRIT, "smartd unable to redirect to /dev/null or to chdir to root!\n");
return EXIT_STARTUP;
}
umask(0022);
if (do_fork)
PrintOut(LOG_INFO, "smartd has fork()ed into background mode. New PID=%d.\n", (int)getpid());
#else // _WIN32
// No fork() on native Win32
// Detach this process from console
fflush(nullptr);
if (daemon_detach("smartd")) {
PrintOut(LOG_CRIT,"smartd unable to detach from console!\n");
return EXIT_STARTUP;
}
// stdin/out/err now closed if not redirected
#endif // _WIN32
// No error, continue in main_worker()
return -1;
}
// create a PID file containing the current process id
static bool write_pid_file()
{
if (!pid_file.empty()) {
pid_t pid = getpid();
mode_t old_umask;
#ifndef __CYGWIN__
old_umask = umask(0077); // rwx------
#else
// Cygwin: smartd service runs on system account, ensure PID file can be read by admins
old_umask = umask(0033); // rwxr--r--
#endif
stdio_file f(pid_file.c_str(), "w");
umask(old_umask);
if (!(f && fprintf(f, "%d\n", (int)pid) > 0 && f.close())) {
PrintOut(LOG_CRIT, "unable to write PID file %s - exiting.\n", pid_file.c_str());
return false;
}
PrintOut(LOG_INFO, "file %s written containing PID %d\n", pid_file.c_str(), (int)pid);
}
return true;
}
// Prints header identifying version of code and home
static void PrintHead()
{
PrintOut(LOG_INFO, "%s\n", format_version_info("smartd").c_str());
}
// prints help info for configuration file Directives
static void Directives()
{
PrintOut(LOG_INFO,
"Configuration file (%s) Directives (after device name):\n"
" -d TYPE Set the device type: auto, ignore, removable,\n"
" %s\n"
" -T TYPE Set the tolerance to one of: normal, permissive\n"
" -o VAL Enable/disable automatic offline tests (on/off)\n"
" -S VAL Enable/disable attribute autosave (on/off)\n"
" -n MODE No check if: never, sleep[,N][,q], standby[,N][,q], idle[,N][,q]\n"
" -H Monitor SMART Health Status, report if failed\n"
" -s REG Do Self-Test at time(s) given by regular expression REG\n"
" -l TYPE Monitor SMART log or self-test status:\n"
" error, selftest, xerror, offlinests[,ns], selfteststs[,ns]\n"
" -l scterc,R,W Set SCT Error Recovery Control\n"
" -e Change device setting: aam,[N|off], apm,[N|off], dsn,[on|off],\n"
" lookahead,[on|off], security-freeze, standby,[N|off], wcache,[on|off]\n"
" -f Monitor 'Usage' Attributes, report failures\n"
" -m ADD Send email warning to address ADD\n"
" -M TYPE Modify email warning behavior (see man page)\n"
" -p Report changes in 'Prefailure' Attributes\n"
" -u Report changes in 'Usage' Attributes\n"
" -t Equivalent to -p and -u Directives\n"
" -r ID Also report Raw values of Attribute ID with -p, -u or -t\n"
" -R ID Track changes in Attribute ID Raw value with -p, -u or -t\n"
" -i ID Ignore Attribute ID for -f Directive\n"
" -I ID Ignore Attribute ID for -p, -u or -t Directive\n"
" -C ID[+] Monitor [increases of] Current Pending Sectors in Attribute ID\n"
" -U ID[+] Monitor [increases of] Offline Uncorrectable Sectors in Attribute ID\n"
" -W D,I,C Monitor Temperature D)ifference, I)nformal limit, C)ritical limit\n"
" -v N,ST Modifies labeling of Attribute N (see man page) \n"
" -P TYPE Drive-specific presets: use, ignore, show, showall\n"
" -a Default: -H -f -t -l error -l selftest -l selfteststs -C 197 -U 198\n"
" -F TYPE Use firmware bug workaround:\n"
" %s\n"
" -c i=N Set interval between disk checks to N seconds\n"
" # Comment: text after a hash sign is ignored\n"
" \\ Line continuation character\n"
"Attribute ID is a decimal integer 1 <= ID <= 255\n"
"Use ID = 0 to turn off -C and/or -U Directives\n"
"Example: /dev/sda -a\n",
configfile,
smi()->get_valid_dev_types_str().c_str(),
get_valid_firmwarebug_args());
}
/* Returns a pointer to a static string containing a formatted list of the valid
arguments to the option opt or nullptr on failure. */
static const char *GetValidArgList(char opt)
{
switch (opt) {
case 'A':
case 's':
return "<PATH_PREFIX>, -";
case 'B':
return "[+]<FILE_NAME>";
case 'c':
return "<FILE_NAME>, -";
case 'l':
return "daemon, local0, local1, local2, local3, local4, local5, local6, local7";
case 'q':
return "nodev[0], errors[,nodev0], nodev[0]startup, never, onecheck, showtests";
case 'r':
return "ioctl[,N], ataioctl[,N], scsiioctl[,N], nvmeioctl[,N]";
case 'p':
case 'w':
return "<FILE_NAME>";
case 'i':
return "<INTEGER_SECONDS>";
#ifdef HAVE_POSIX_API
case 'u':
return "<USER>[:<GROUP>], -";
#elif defined(_WIN32)
case 'u':
return "restricted, unchanged";
#endif
#ifdef HAVE_LIBCAP_NG
case 'C':
return "mail, <no_argument>";
#endif
default:
return nullptr;
}
}
/* prints help information for command syntax */
static void Usage()
{
PrintOut(LOG_INFO,"Usage: smartd [options]\n\n");
#ifdef SMARTMONTOOLS_ATTRIBUTELOG
PrintOut(LOG_INFO," -A PREFIX|-, --attributelog=PREFIX|-\n");
#else
PrintOut(LOG_INFO," -A PREFIX, --attributelog=PREFIX\n");
#endif
PrintOut(LOG_INFO," Log attribute information to {PREFIX}MODEL-SERIAL.TYPE.csv\n");
#ifdef SMARTMONTOOLS_ATTRIBUTELOG
PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_ATTRIBUTELOG "MODEL-SERIAL.TYPE.csv]\n");
#endif
PrintOut(LOG_INFO,"\n");
PrintOut(LOG_INFO," -B [+]FILE, --drivedb=[+]FILE\n");
PrintOut(LOG_INFO," Read and replace [add] drive database from FILE\n");
PrintOut(LOG_INFO," [default is +%s", get_drivedb_path_add());
#ifdef SMARTMONTOOLS_DRIVEDBDIR
PrintOut(LOG_INFO,"\n");
PrintOut(LOG_INFO," and then %s", get_drivedb_path_default());
#endif
PrintOut(LOG_INFO,"]\n\n");
PrintOut(LOG_INFO," -c NAME|-, --configfile=NAME|-\n");
PrintOut(LOG_INFO," Read configuration file NAME or stdin\n");
PrintOut(LOG_INFO," [default is %s]\n\n", configfile);
#ifdef HAVE_LIBCAP_NG
PrintOut(LOG_INFO," -C, --capabilities[=mail]\n");
PrintOut(LOG_INFO," Drop unneeded Linux process capabilities.\n"
" Warning: Mail notification may not work when used.\n\n");
#endif
PrintOut(LOG_INFO," -d, --debug\n");
PrintOut(LOG_INFO," Start smartd in debug mode\n\n");
PrintOut(LOG_INFO," -D, --showdirectives\n");
PrintOut(LOG_INFO," Print the configuration file Directives and exit\n\n");
PrintOut(LOG_INFO," -h, --help, --usage\n");
PrintOut(LOG_INFO," Display this help and exit\n\n");
PrintOut(LOG_INFO," -i N, --interval=N\n");
PrintOut(LOG_INFO," Set interval between disk checks to N seconds, where N >= 10\n\n");
PrintOut(LOG_INFO," -l local[0-7], --logfacility=local[0-7]\n");
#ifndef _WIN32
PrintOut(LOG_INFO," Use syslog facility local0 - local7 or daemon [default]\n\n");
#else
PrintOut(LOG_INFO," Log to \"./smartd.log\", stdout, stderr [default is event log]\n\n");
#endif
#ifndef _WIN32
PrintOut(LOG_INFO," -n, --no-fork\n");
PrintOut(LOG_INFO," Do not fork into background\n");
#ifdef HAVE_LIBSYSTEMD
PrintOut(LOG_INFO," (systemd 'Type=notify' is assumed if $NOTIFY_SOCKET is set)\n");
#endif // HAVE_LIBSYSTEMD
PrintOut(LOG_INFO,"\n");
#endif // WIN32
PrintOut(LOG_INFO," -p NAME, --pidfile=NAME\n");
PrintOut(LOG_INFO," Write PID file NAME\n\n");
PrintOut(LOG_INFO," -q WHEN, --quit=WHEN\n");
PrintOut(LOG_INFO," Quit on one of: %s\n\n", GetValidArgList('q'));
PrintOut(LOG_INFO," -r, --report=TYPE\n");
PrintOut(LOG_INFO," Report transactions for one of: %s\n\n", GetValidArgList('r'));
#ifdef SMARTMONTOOLS_SAVESTATES
PrintOut(LOG_INFO," -s PREFIX|-, --savestates=PREFIX|-\n");
#else
PrintOut(LOG_INFO," -s PREFIX, --savestates=PREFIX\n");
#endif
PrintOut(LOG_INFO," Save disk states to {PREFIX}MODEL-SERIAL.TYPE.state\n");
#ifdef SMARTMONTOOLS_SAVESTATES
PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SAVESTATES "MODEL-SERIAL.TYPE.state]\n");
#endif
PrintOut(LOG_INFO,"\n");
PrintOut(LOG_INFO," -w NAME, --warnexec=NAME\n");
PrintOut(LOG_INFO," Run executable NAME on warnings\n");
#ifndef _WIN32
PrintOut(LOG_INFO," [default is " SMARTMONTOOLS_SMARTDSCRIPTDIR "/smartd_warning.sh]\n\n");
#else
PrintOut(LOG_INFO," [default is %s/smartd_warning.cmd]\n\n", get_exe_dir().c_str());
#endif
#ifdef HAVE_POSIX_API
PrintOut(LOG_INFO," -u USER[:GROUP], --warn-as-user=USER[:GROUP]\n");
PrintOut(LOG_INFO," Run warning script as non-privileged USER\n\n");
#elif defined(_WIN32)
PrintOut(LOG_INFO," -u MODE, --warn-as-user=MODE\n");
PrintOut(LOG_INFO," Run warning script with modified access token: %s\n\n", GetValidArgList('u'));
#endif
#ifdef _WIN32
PrintOut(LOG_INFO," --service\n");
PrintOut(LOG_INFO," Running as windows service (see man page), install with:\n");
PrintOut(LOG_INFO," smartd install [options]\n");
PrintOut(LOG_INFO," Remove service with:\n");
PrintOut(LOG_INFO," smartd remove\n\n");
#endif // _WIN32
PrintOut(LOG_INFO," -V, --version, --license, --copyright\n");
PrintOut(LOG_INFO," Print License, Copyright, and version information\n");
}
static int CloseDevice(smart_device * device, const char * name)
{
if (!device->close()){
PrintOut(LOG_INFO,"Device: %s, %s, close() failed\n", name, device->get_errmsg());
return 1;
}
// device successfully closed
return 0;
}
// Replace invalid characters in cfg.dev_idinfo
static bool sanitize_dev_idinfo(std::string & s)
{
bool changed = false;
for (unsigned i = 0; i < s.size(); i++) {
char c = s[i];
STATIC_ASSERT(' ' == 0x20 && '~' == 0x07e); // Assume ASCII
// Don't pass possible command escapes ('~! COMMAND') to the 'mail' command.
if ((' ' <= c && c <= '~') && !(i == 0 && c == '~'))
continue;
s[i] = '?';
changed = true;
}
return changed;
}
// return true if a char is not allowed in a state file name
static bool not_allowed_in_filename(char c)
{
return !( ('0' <= c && c <= '9')
|| ('A' <= c && c <= 'Z')
|| ('a' <= c && c <= 'z'));
}
// Read error count from Summary or Extended Comprehensive SMART error log
// Return -1 on error
static int read_ata_error_count(ata_device * device, const char * name,
firmwarebug_defs firmwarebugs, bool extended)
{
if (!extended) {
ata_smart_errorlog log;
if (ataReadErrorLog(device, &log, firmwarebugs)){
PrintOut(LOG_INFO,"Device: %s, Read Summary SMART Error Log failed\n",name);
return -1;
}
return (log.error_log_pointer ? log.ata_error_count : 0);
}
else {
ata_smart_exterrlog logx;
if (!ataReadExtErrorLog(device, &logx, 0, 1 /*first sector only*/, firmwarebugs)) {
PrintOut(LOG_INFO,"Device: %s, Read Extended Comprehensive SMART Error Log failed\n",name);
return -1;
}
// Some disks use the reserved byte as index, see ataprint.cpp.
return (logx.error_log_index || logx.reserved1 ? logx.device_error_count : 0);
}
}
// returns <0 if problem. Otherwise, bottom 8 bits are the self test
// error count, and top bits are the power-on hours of the last error.
static int SelfTestErrorCount(ata_device * device, const char * name,
firmwarebug_defs firmwarebugs)
{
struct ata_smart_selftestlog log;
if (ataReadSelfTestLog(device, &log, firmwarebugs)){
PrintOut(LOG_INFO,"Device: %s, Read SMART Self Test Log Failed\n",name);
return -1;
}
if (!log.mostrecenttest)
// No tests logged
return 0;
// Count failed self-tests
int errcnt = 0, hours = 0;
for (int i = 20; i >= 0; i--) {
int j = (i + log.mostrecenttest) % 21;
const ata_smart_selftestlog_struct & entry = log.selftest_struct[j];
if (!nonempty(&entry, sizeof(entry)))
continue;
int status = entry.selfteststatus >> 4;
if (status == 0x0 && (entry.selftestnumber & 0x7f) == 0x02)
// First successful extended self-test, stop count
break;
if (0x3 <= status && status <= 0x8) {
// Self-test showed an error
errcnt++;
// Keep track of time of most recent error
if (!hours)
hours = entry.timestamp;
}
}
return ((hours << 8) | errcnt);
}
#define SELFTEST_ERRORCOUNT(x) (x & 0xff)
#define SELFTEST_ERRORHOURS(x) ((x >> 8) & 0xffff)
// Check offline data collection status
static inline bool is_offl_coll_in_progress(unsigned char status)
{
return ((status & 0x7f) == 0x03);
}
// Check self-test execution status
static inline bool is_self_test_in_progress(unsigned char status)
{
return ((status >> 4) == 0xf);
}
// Log offline data collection status
static void log_offline_data_coll_status(const char * name, unsigned char status)
{
const char * msg;
switch (status & 0x7f) {
case 0x00: msg = "was never started"; break;
case 0x02: msg = "was completed without error"; break;
case 0x03: msg = "is in progress"; break;
case 0x04: msg = "was suspended by an interrupting command from host"; break;
case 0x05: msg = "was aborted by an interrupting command from host"; break;
case 0x06: msg = "was aborted by the device with a fatal error"; break;
default: msg = nullptr;
}
if (msg)
PrintOut(((status & 0x7f) == 0x06 ? LOG_CRIT : LOG_INFO),
"Device: %s, offline data collection %s%s\n", name, msg,
((status & 0x80) ? " (auto:on)" : ""));
else
PrintOut(LOG_INFO, "Device: %s, unknown offline data collection status 0x%02x\n",
name, status);
}
// Log self-test execution status
static void log_self_test_exec_status(const char * name, unsigned char status)
{
const char * msg;
switch (status >> 4) {
case 0x0: msg = "completed without error"; break;
case 0x1: msg = "was aborted by the host"; break;
case 0x2: msg = "was interrupted by the host with a reset"; break;
case 0x3: msg = "could not complete due to a fatal or unknown error"; break;
case 0x4: msg = "completed with error (unknown test element)"; break;
case 0x5: msg = "completed with error (electrical test element)"; break;
case 0x6: msg = "completed with error (servo/seek test element)"; break;
case 0x7: msg = "completed with error (read test element)"; break;
case 0x8: msg = "completed with error (handling damage?)"; break;
default: msg = nullptr;
}
if (msg)
PrintOut(((status >> 4) >= 0x4 ? LOG_CRIT : LOG_INFO),
"Device: %s, previous self-test %s\n", name, msg);
else if ((status >> 4) == 0xf)
PrintOut(LOG_INFO, "Device: %s, self-test in progress, %u0%% remaining\n",
name, status & 0x0f);
else
PrintOut(LOG_INFO, "Device: %s, unknown self-test status 0x%02x\n",
name, status);
}
// Check pending sector count id (-C, -U directives).
static bool check_pending_id(const dev_config & cfg, const dev_state & state,
unsigned char id, const char * msg)
{
// Check attribute index
int i = ata_find_attr_index(id, state.smartval);
if (i < 0) {
PrintOut(LOG_INFO, "Device: %s, can't monitor %s count - no Attribute %d\n",
cfg.name.c_str(), msg, id);
return false;
}
// Check value
uint64_t rawval = ata_get_attr_raw_value(state.smartval.vendor_attributes[i],
cfg.attribute_defs);
if (rawval >= (state.num_sectors ? state.num_sectors : 0xffffffffULL)) {
PrintOut(LOG_INFO, "Device: %s, ignoring %s count - bogus Attribute %d value %" PRIu64 " (0x%" PRIx64 ")\n",
cfg.name.c_str(), msg, id, rawval, rawval);
return false;
}
return true;
}
// Called by ATA/SCSI/NVMeDeviceScan() after successful device check
static void finish_device_scan(dev_config & cfg, dev_state & state)
{
// Set cfg.emailfreq if user hasn't set it
if ((!cfg.emailaddress.empty() || !cfg.emailcmdline.empty()) && !cfg.emailfreq) {
// Avoid that emails are suppressed forever due to state persistence
if (cfg.state_file.empty())
cfg.emailfreq = 1; // '-M once'
else
cfg.emailfreq = 2; // '-M daily'
}
// Start self-test regex check now if time was not read from state file
if (!cfg.test_regex.empty() && !state.scheduled_test_next_check)
state.scheduled_test_next_check = time(nullptr);
}
// Common function to format result message for ATA setting
static void format_set_result_msg(std::string & msg, const char * name, bool ok,
int set_option = 0, bool has_value = false)
{
if (!msg.empty())
msg += ", ";
msg += name;
if (!ok)
msg += ":--";
else if (set_option < 0)
msg += ":off";
else if (has_value)
msg += strprintf(":%d", set_option-1);
else if (set_option > 0)
msg += ":on";
}
// Return true and print message if CFG.dev_idinfo is already in PREV_CFGS
static bool is_duplicate_dev_idinfo(const dev_config & cfg, const dev_config_vector & prev_cfgs)
{
if (!cfg.id_is_unique)
return false;
for (const auto & prev_cfg : prev_cfgs) {
if (!prev_cfg.id_is_unique)
continue;
if (cfg.dev_idinfo != prev_cfg.dev_idinfo)
continue;
PrintOut(LOG_INFO, "Device: %s, same identity as %s, ignored\n",
cfg.dev_name.c_str(), prev_cfg.dev_name.c_str());
return true;
}
return false;
}
// TODO: Add '-F swapid' directive
const bool fix_swapped_id = false;
// scan to see what ata devices there are, and if they support SMART
static int ATADeviceScan(dev_config & cfg, dev_state & state, ata_device * atadev,
const dev_config_vector * prev_cfgs)
{
int supported=0;
struct ata_identify_device drive;
const char *name = cfg.name.c_str();
int retid;
// Device must be open
// Get drive identity structure
if ((retid = ata_read_identity(atadev, &drive, fix_swapped_id))) {
if (retid<0)
// Unable to read Identity structure
PrintOut(LOG_INFO,"Device: %s, not ATA, no IDENTIFY DEVICE Structure\n",name);
else
PrintOut(LOG_INFO,"Device: %s, packet devices [this device %s] not SMART capable\n",
name, packetdevicetype(retid-1));
CloseDevice(atadev, name);
return 2;
}
// Get drive identity, size and rotation rate (HDD/SSD)
char model[40+1], serial[20+1], firmware[8+1];
ata_format_id_string(model, drive.model, sizeof(model)-1);
ata_format_id_string(serial, drive.serial_no, sizeof(serial)-1);
ata_format_id_string(firmware, drive.fw_rev, sizeof(firmware)-1);
ata_size_info sizes;
ata_get_size_info(&drive, sizes);
state.num_sectors = sizes.sectors;
cfg.dev_rpm = ata_get_rotation_rate(&drive);
char wwn[64]; wwn[0] = 0;
unsigned oui = 0; uint64_t unique_id = 0;
int naa = ata_get_wwn(&drive, oui, unique_id);
if (naa >= 0)
snprintf(wwn, sizeof(wwn), "WWN:%x-%06x-%09" PRIx64 ", ", naa, oui, unique_id);
// Format device id string for warning emails
char cap[32];
cfg.dev_idinfo = strprintf("%s, S/N:%s, %sFW:%s, %s", model, serial, wwn, firmware,
format_capacity(cap, sizeof(cap), sizes.capacity, "."));
cfg.id_is_unique = true; // TODO: Check serial?
if (sanitize_dev_idinfo(cfg.dev_idinfo))
cfg.id_is_unique = false;
PrintOut(LOG_INFO, "Device: %s, %s\n", name, cfg.dev_idinfo.c_str());
// Check for duplicates
if (prev_cfgs && is_duplicate_dev_idinfo(cfg, *prev_cfgs)) {
CloseDevice(atadev, name);
return 1;
}
// Show if device in database, and use preset vendor attribute
// options unless user has requested otherwise.
if (cfg.ignorepresets)
PrintOut(LOG_INFO, "Device: %s, smartd database not searched (Directive: -P ignore).\n", name);
else {
// Apply vendor specific presets, print warning if present
std::string dbversion;
const drive_settings * dbentry = lookup_drive_apply_presets(
&drive, cfg.attribute_defs, cfg.firmwarebugs, dbversion);
if (!dbentry)
PrintOut(LOG_INFO, "Device: %s, not found in smartd database%s%s.\n", name,
(!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""));
else {
PrintOut(LOG_INFO, "Device: %s, found in smartd database%s%s%s%s\n",
name, (!dbversion.empty() ? " " : ""), (!dbversion.empty() ? dbversion.c_str() : ""),
(*dbentry->modelfamily ? ": " : "."), (*dbentry->modelfamily ? dbentry->modelfamily : ""));
if (*dbentry->warningmsg)
PrintOut(LOG_CRIT, "Device: %s, WARNING: %s\n", name, dbentry->warningmsg);
}
}
// Check for ATA Security LOCK
unsigned short word128 = drive.words088_255[128-88];
bool locked = ((word128 & 0x0007) == 0x0007); // LOCKED|ENABLED|SUPPORTED
if (locked)
PrintOut(LOG_INFO, "Device: %s, ATA Security is **LOCKED**\n", name);
// Set default '-C 197[+]' if no '-C ID' is specified.
if (!cfg.curr_pending_set)
cfg.curr_pending_id = get_unc_attr_id(false, cfg.attribute_defs, cfg.curr_pending_incr);
// Set default '-U 198[+]' if no '-U ID' is specified.
if (!cfg.offl_pending_set)
cfg.offl_pending_id = get_unc_attr_id(true, cfg.attribute_defs, cfg.offl_pending_incr);
// If requested, show which presets would be used for this drive
if (cfg.showpresets) {
int savedebugmode=debugmode;
PrintOut(LOG_INFO, "Device %s: presets are:\n", name);
if (!debugmode)
debugmode=2;
show_presets(&drive);
debugmode=savedebugmode;
}
// see if drive supports SMART
supported=ataSmartSupport(&drive);
if (supported!=1) {
if (supported==0)
// drive does NOT support SMART
PrintOut(LOG_INFO,"Device: %s, lacks SMART capability\n",name);
else
// can't tell if drive supports SMART
PrintOut(LOG_INFO,"Device: %s, ATA IDENTIFY DEVICE words 82-83 don't specify if SMART capable.\n",name);
// should we proceed anyway?
if (cfg.permissive) {
PrintOut(LOG_INFO,"Device: %s, proceeding since '-T permissive' Directive given.\n",name);
}
else {
PrintOut(LOG_INFO,"Device: %s, to proceed anyway, use '-T permissive' Directive.\n",name);
CloseDevice(atadev, name);
return 2;
}
}
if (ataEnableSmart(atadev)) {
// Enable SMART command has failed
PrintOut(LOG_INFO,"Device: %s, could not enable SMART capability\n",name);
if (ataIsSmartEnabled(&drive) <= 0) {
if (!cfg.permissive) {
PrintOut(LOG_INFO, "Device: %s, to proceed anyway, use '-T permissive' Directive.\n", name);
CloseDevice(atadev, name);
return 2;
}
PrintOut(LOG_INFO, "Device: %s, proceeding since '-T permissive' Directive given.\n", name);
}
else {
PrintOut(LOG_INFO, "Device: %s, proceeding since SMART is already enabled\n", name);
}
}
// disable device attribute autosave...
if (cfg.autosave==1) {
if (ataDisableAutoSave(atadev))
PrintOut(LOG_INFO,"Device: %s, could not disable SMART Attribute Autosave.\n",name);
else
PrintOut(LOG_INFO,"Device: %s, disabled SMART Attribute Autosave.\n",name);
}
// or enable device attribute autosave
if (cfg.autosave==2) {
if (ataEnableAutoSave(atadev))
PrintOut(LOG_INFO,"Device: %s, could not enable SMART Attribute Autosave.\n",name);
else
PrintOut(LOG_INFO,"Device: %s, enabled SMART Attribute Autosave.\n",name);
}
// capability check: SMART status
if (cfg.smartcheck && ataSmartStatus2(atadev) == -1) {
PrintOut(LOG_INFO,"Device: %s, not capable of SMART Health Status check\n",name);
cfg.smartcheck = false;
}
// capability check: Read smart values and thresholds. Note that
// smart values are ALSO needed even if we ONLY want to know if the
// device is self-test log or error-log capable! After ATA-5, this
// information was ALSO reproduced in the IDENTIFY DEVICE response,
// but sadly not for ATA-5. Sigh.
// do we need to get SMART data?
bool smart_val_ok = false;
if ( cfg.autoofflinetest || cfg.selftest
|| cfg.errorlog || cfg.xerrorlog
|| cfg.offlinests || cfg.selfteststs
|| cfg.usagefailed || cfg.prefail || cfg.usage
|| cfg.tempdiff || cfg.tempinfo || cfg.tempcrit
|| cfg.curr_pending_id || cfg.offl_pending_id ) {
if (ataReadSmartValues(atadev, &state.smartval)) {
PrintOut(LOG_INFO, "Device: %s, Read SMART Values failed\n", name);
cfg.usagefailed = cfg.prefail = cfg.usage = false;
cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
cfg.curr_pending_id = cfg.offl_pending_id = 0;
}
else {
smart_val_ok = true;
if (ataReadSmartThresholds(atadev, &state.smartthres)) {
PrintOut(LOG_INFO, "Device: %s, Read SMART Thresholds failed%s\n",
name, (cfg.usagefailed ? ", ignoring -f Directive" : ""));
cfg.usagefailed = false;
// Let ata_get_attr_state() return ATTRSTATE_NO_THRESHOLD:
memset(&state.smartthres, 0, sizeof(state.smartthres));
}
}
// see if the necessary Attribute is there to monitor offline or
// current pending sectors or temperature
if ( cfg.curr_pending_id
&& !check_pending_id(cfg, state, cfg.curr_pending_id,
"Current_Pending_Sector"))
cfg.curr_pending_id = 0;
if ( cfg.offl_pending_id
&& !check_pending_id(cfg, state, cfg.offl_pending_id,
"Offline_Uncorrectable"))
cfg.offl_pending_id = 0;
if ( (cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)
&& !ata_return_temperature_value(&state.smartval, cfg.attribute_defs)) {
PrintOut(LOG_INFO, "Device: %s, can't monitor Temperature, ignoring -W %d,%d,%d\n",
name, cfg.tempdiff, cfg.tempinfo, cfg.tempcrit);
cfg.tempdiff = cfg.tempinfo = cfg.tempcrit = 0;
}
// Report ignored '-r' or '-R' directives
for (int id = 1; id <= 255; id++) {
if (cfg.monitor_attr_flags.is_set(id, MONITOR_RAW_PRINT)) {
char opt = (!cfg.monitor_attr_flags.is_set(id, MONITOR_RAW) ? 'r' : 'R');
const char * excl = (cfg.monitor_attr_flags.is_set(id,
(opt == 'r' ? MONITOR_AS_CRIT : MONITOR_RAW_AS_CRIT)) ? "!" : "");
int idx = ata_find_attr_index(id, state.smartval);
if (idx < 0)
PrintOut(LOG_INFO,"Device: %s, no Attribute %d, ignoring -%c %d%s\n", name, id, opt, id, excl);
else {
bool prefail = !!ATTRIBUTE_FLAGS_PREFAILURE(state.smartval.vendor_attributes[idx].flags);
if (!((prefail && cfg.prefail) || (!prefail && cfg.usage)))
PrintOut(LOG_INFO,"Device: %s, not monitoring %s Attributes, ignoring -%c %d%s\n", name,
(prefail ? "Prefailure" : "Usage"), opt, id, excl);
}
}
}
}
// enable/disable automatic on-line testing
if (cfg.autoofflinetest) {
// is this an enable or disable request?
const char *what=(cfg.autoofflinetest==1)?"disable":"enable";
if (!smart_val_ok)
PrintOut(LOG_INFO,"Device: %s, could not %s SMART Automatic Offline Testing.\n",name, what);
else {
// if command appears unsupported, issue a warning...
if (!isSupportAutomaticTimer(&state.smartval))
PrintOut(LOG_INFO,"Device: %s, SMART Automatic Offline Testing unsupported...\n",name);
// ... but then try anyway
if ((cfg.autoofflinetest==1)?ataDisableAutoOffline(atadev):ataEnableAutoOffline(atadev))
PrintOut(LOG_INFO,"Device: %s, %s SMART Automatic Offline Testing failed.\n", name, what);
else
PrintOut(LOG_INFO,"Device: %s, %sd SMART Automatic Offline Testing.\n", name, what);
}
}
// Read log directories if required for capability check
ata_smart_log_directory smart_logdir, gp_logdir;
bool smart_logdir_ok = false, gp_logdir_ok = false;
if ( isGeneralPurposeLoggingCapable(&drive)
&& (cfg.errorlog || cfg.selftest)
&& !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
if (!ataReadLogDirectory(atadev, &smart_logdir, false))
smart_logdir_ok = true;
}
if (cfg.xerrorlog && !cfg.firmwarebugs.is_set(BUG_NOLOGDIR)) {
if (!ataReadLogDirectory(atadev, &gp_logdir, true))
gp_logdir_ok = true;
}
// capability check: self-test-log
state.selflogcount = 0; state.selfloghour = 0;
if (cfg.selftest) {
int retval;
if (!( cfg.permissive
|| ( smart_logdir_ok && smart_logdir.entry[0x06-1].numsectors)
|| (!smart_logdir_ok && smart_val_ok && isSmartTestLogCapable(&state.smartval, &drive)))) {
PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest (override with -T permissive)\n", name);
cfg.selftest = false;
}
else if ((retval = SelfTestErrorCount(atadev, name, cfg.firmwarebugs)) < 0) {
PrintOut(LOG_INFO, "Device: %s, no SMART Self-test Log, ignoring -l selftest\n", name);
cfg.selftest = false;
}
else {
state.selflogcount=SELFTEST_ERRORCOUNT(retval);
state.selfloghour =SELFTEST_ERRORHOURS(retval);
}
}
// capability check: ATA error log
state.ataerrorcount = 0;
if (cfg.errorlog) {
int errcnt1;
if (!( cfg.permissive
|| ( smart_logdir_ok && smart_logdir.entry[0x01-1].numsectors)
|| (!smart_logdir_ok && smart_val_ok && isSmartErrorLogCapable(&state.smartval, &drive)))) {
PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error (override with -T permissive)\n", name);
cfg.errorlog = false;
}
else if ((errcnt1 = read_ata_error_count(atadev, name, cfg.firmwarebugs, false)) < 0) {
PrintOut(LOG_INFO, "Device: %s, no SMART Error Log, ignoring -l error\n", name);
cfg.errorlog = false;
}
else
state.ataerrorcount = errcnt1;
}
if (cfg.xerrorlog) {
int errcnt2;
if (!( cfg.permissive || cfg.firmwarebugs.is_set(BUG_NOLOGDIR)
|| (gp_logdir_ok && gp_logdir.entry[0x03-1].numsectors) )) {
PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror (override with -T permissive)\n",
name);
cfg.xerrorlog = false;
}
else if ((errcnt2 = read_ata_error_count(atadev, name, cfg.firmwarebugs, true)) < 0) {
PrintOut(LOG_INFO, "Device: %s, no Extended Comprehensive SMART Error Log, ignoring -l xerror\n", name);
cfg.xerrorlog = false;
}
else if (cfg.errorlog && state.ataerrorcount != errcnt2) {
PrintOut(LOG_INFO, "Device: %s, SMART Error Logs report different error counts: %d != %d\n",
name, state.ataerrorcount, errcnt2);
// Record max error count
if (errcnt2 > state.ataerrorcount)
state.ataerrorcount = errcnt2;
}
else
state.ataerrorcount = errcnt2;
}
// capability check: self-test and offline data collection status
if (cfg.offlinests || cfg.selfteststs) {
if (!(cfg.permissive || (smart_val_ok && state.smartval.offline_data_collection_capability))) {
if (cfg.offlinests)
PrintOut(LOG_INFO, "Device: %s, no SMART Offline Data Collection capability, ignoring -l offlinests (override with -T permissive)\n", name);
if (cfg.selfteststs)
PrintOut(LOG_INFO, "Device: %s, no SMART Self-test capability, ignoring -l selfteststs (override with -T permissive)\n", name);
cfg.offlinests = cfg.selfteststs = false;
}
}
// capabilities check -- does it support powermode?
if (cfg.powermode) {
int powermode = ataCheckPowerMode(atadev);
if (-1 == powermode) {
PrintOut(LOG_CRIT, "Device: %s, no ATA CHECK POWER STATUS support, ignoring -n Directive\n", name);
cfg.powermode=0;
}
else if (powermode!=0x00 && powermode!=0x01
&& powermode!=0x40 && powermode!=0x41
&& powermode!=0x80 && powermode!=0x81 && powermode!=0x82 && powermode!=0x83
&& powermode!=0xff) {
PrintOut(LOG_CRIT, "Device: %s, CHECK POWER STATUS returned %d, not ATA compliant, ignoring -n Directive\n",
name, powermode);
cfg.powermode=0;
}
}
// Apply ATA settings
std::string msg;
if (cfg.set_aam)
format_set_result_msg(msg, "AAM", (cfg.set_aam > 0 ?
ata_set_features(atadev, ATA_ENABLE_AAM, cfg.set_aam-1) :
ata_set_features(atadev, ATA_DISABLE_AAM)), cfg.set_aam, true);
if (cfg.set_apm)
format_set_result_msg(msg, "APM", (cfg.set_apm > 0 ?
ata_set_features(atadev, ATA_ENABLE_APM, cfg.set_apm-1) :
ata_set_features(atadev, ATA_DISABLE_APM)), cfg.set_apm, true);
if (cfg.set_lookahead)
format_set_result_msg(msg, "Rd-ahead", ata_set_features(atadev,
(cfg.set_lookahead > 0 ? ATA_ENABLE_READ_LOOK_AHEAD : ATA_DISABLE_READ_LOOK_AHEAD)),
cfg.set_lookahead);
if (cfg.set_wcache)
format_set_result_msg(msg, "Wr-cache", ata_set_features(atadev,
(cfg.set_wcache > 0? ATA_ENABLE_WRITE_CACHE : ATA_DISABLE_WRITE_CACHE)), cfg.set_wcache);
if (cfg.set_dsn)
format_set_result_msg(msg, "DSN", ata_set_features(atadev,
ATA_ENABLE_DISABLE_DSN, (cfg.set_dsn > 0 ? 0x1 : 0x2)));
if (cfg.set_security_freeze)
format_set_result_msg(msg, "Security freeze",
ata_nodata_command(atadev, ATA_SECURITY_FREEZE_LOCK));
if (cfg.set_standby)
format_set_result_msg(msg, "Standby",
ata_nodata_command(atadev, ATA_IDLE, cfg.set_standby-1), cfg.set_standby, true);
// Report as one log entry
if (!msg.empty())
PrintOut(LOG_INFO, "Device: %s, ATA settings applied: %s\n", name, msg.c_str());
// set SCT Error Recovery Control if requested
if (cfg.sct_erc_set) {
if (!isSCTErrorRecoveryControlCapable(&drive))
PrintOut(LOG_INFO, "Device: %s, no SCT Error Recovery Control support, ignoring -l scterc\n",
name);
else if (locked)
PrintOut(LOG_INFO, "Device: %s, no SCT support if ATA Security is LOCKED, ignoring -l scterc\n",
name);
else if ( ataSetSCTErrorRecoveryControltime(atadev, 1, cfg.sct_erc_readtime, false, false )
|| ataSetSCTErrorRecoveryControltime(atadev, 2, cfg.sct_erc_writetime, false, false))
PrintOut(LOG_INFO, "Device: %s, set of SCT Error Recovery Control failed\n", name);
else
PrintOut(LOG_INFO, "Device: %s, SCT Error Recovery Control set to: Read: %u, Write: %u\n",
name, cfg.sct_erc_readtime, cfg.sct_erc_writetime);
}
// If no tests available or selected, return
if (!( cfg.smartcheck || cfg.selftest
|| cfg.errorlog || cfg.xerrorlog
|| cfg.offlinests || cfg.selfteststs
|| cfg.usagefailed || cfg.prefail || cfg.usage
|| cfg.tempdiff || cfg.tempinfo || cfg.tempcrit)) {
CloseDevice(atadev, name);
return 3;
}
// tell user we are registering device
PrintOut(LOG_INFO,"Device: %s, is SMART capable. Adding to \"monitor\" list.\n",name);
// close file descriptor
CloseDevice(atadev, name);
if (!state_path_prefix.empty() || !attrlog_path_prefix.empty()) {
// Build file name for state file
std::replace_if(model, model+strlen(model), not_allowed_in_filename, '_');
std::replace_if(serial, serial+strlen(serial), not_allowed_in_filename, '_');
if (!state_path_prefix.empty()) {
cfg.state_file = strprintf("%s%s-%s.ata.state", state_path_prefix.c_str(), model, serial);
// Read previous state
if (read_dev_state(cfg.state_file.c_str(), state)) {
PrintOut(LOG_INFO, "Device: %s, state read from %s\n", name, cfg.state_file.c_str());
// Copy ATA attribute values to temp state
state.update_temp_state();
}
}
if (!attrlog_path_prefix.empty())
cfg.attrlog_file = strprintf("%s%s-%s.ata.csv", attrlog_path_prefix.c_str(), model, serial);
}
finish_device_scan(cfg, state);
return 0;
}
// on success, return 0. On failure, return >0. Never return <0,
// please.
static int SCSIDeviceScan(dev_config & cfg, dev_state & state, scsi_device * scsidev,
const dev_config_vector * prev_cfgs)
{
int err, req_len, avail_len, version, len;
const char *device = cfg.name.c_str();
struct scsi_iec_mode_page iec;
uint8_t tBuf[64];
uint8_t inqBuf[96];
uint8_t vpdBuf[252];
char lu_id[64], serial[256], vendor[40], model[40];
// Device must be open
memset(inqBuf, 0, 96);
req_len = 36;
if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
/* Marvell controllers fail on a 36 bytes StdInquiry, but 64 suffices */
req_len = 64;
if ((err = scsiStdInquiry(scsidev, inqBuf, req_len))) {
PrintOut(LOG_INFO, "Device: %s, Both 36 and 64 byte INQUIRY failed; "
"skip device\n", device);
return 2;
}
}
version = (inqBuf[2] & 0x7f); /* Accept old ISO/IEC 9316:1995 variants */
avail_len = inqBuf[4] + 5;
len = (avail_len < req_len) ? avail_len : req_len;
if (len < 36) {
PrintOut(LOG_INFO, "Device: %s, INQUIRY response less than 36 bytes; "
"skip device\n", device);
return 2;
}
int pdt = inqBuf[0] & 0x1f;
switch (pdt) {
case SCSI_PT_DIRECT_ACCESS:
case SCSI_PT_WO:
case SCSI_PT_CDROM:
case SCSI_PT_OPTICAL:
case SCSI_PT_RBC: /* Reduced Block commands */
case SCSI_PT_HOST_MANAGED: /* Zoned disk */
break;
default:
PrintOut(LOG_INFO, "Device: %s, not a disk like device [PDT=0x%x], "
"skip\n", device, pdt);
return 2;
}
if (supported_vpd_pages_p) {
delete supported_vpd_pages_p;
supported_vpd_pages_p = nullptr;
}
supported_vpd_pages_p = new supported_vpd_pages(scsidev);
lu_id[0] = '\0';
if (version >= 0x3) {
/* SPC to SPC-5, assume SPC-6 is version==8 or higher */
if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_DEVICE_IDENTIFICATION,
vpdBuf, sizeof(vpdBuf))) {
len = vpdBuf[3];
scsi_decode_lu_dev_id(vpdBuf + 4, len, lu_id, sizeof(lu_id), nullptr);
}
}
serial[0] = '\0';
if (0 == scsiInquiryVpd(scsidev, SCSI_VPD_UNIT_SERIAL_NUMBER,
vpdBuf, sizeof(vpdBuf))) {
len = vpdBuf[3];
vpdBuf[4 + len] = '\0';
scsi_format_id_string(serial, &vpdBuf[4], len);
}
char si_str[64];
struct scsi_readcap_resp srr;
uint64_t capacity = scsiGetSize(scsidev, scsidev->use_rcap16(), &srr);
if (capacity)
format_capacity(si_str, sizeof(si_str), capacity, ".");
else
si_str[0] = '\0';
// Format device id string for warning emails