Date: Sat, 28 May 2016 17:43:41 +0000 (UTC) From: Alan Somers <asomers@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r300906 - in head: cddl/usr.sbin cddl/usr.sbin/zfsd cddl/usr.sbin/zfsd/tests etc/defaults etc/mtree etc/rc.d lib lib/libdevdctl lib/libdevdctl/tests share/mk sys/cddl/contrib/opensolari... Message-ID: <201605281743.u4SHhfNo019820@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: asomers Date: Sat May 28 17:43:40 2016 New Revision: 300906 URL: https://svnweb.freebsd.org/changeset/base/300906 Log: zfsd(8), the ZFS fault management daemon Add zfsd, which deals with hard drive faults in ZFS pools. It manages hotspares and replements in drive slots that publish physical paths. cddl/usr.sbin/zfsd Add zfsd(8) and its unit tests cddl/usr.sbin/Makefile Add zfsd to the build lib/libdevdctl A C++ library that helps devd clients process events lib/Makefile share/mk/bsd.libnames.mk share/mk/src.libnames.mk Add libdevdctl to the build. It's a private library, unusable by out-of-tree software. etc/defaults/rc.conf By default, set zfsd_enable to NO etc/mtree/BSD.include.dist Add a directory for libdevdctl's include files etc/mtree/BSD.tests.dist Add a directory for zfsd's unit tests etc/mtree/BSD.var.dist Add /var/db/zfsd/cases, where zfsd stores case files while it's shut down. etc/rc.d/Makefile etc/rc.d/zfsd Add zfsd's rc script sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c Fix the resource.fs.zfs.statechange message. It had a number of problems: It was only being emitted on a transition to the HEALTHY state. That made it impossible for zfsd to take actions based on drives getting sicker. It compared the new state to vdev_prevstate, which is the state that the vdev had the last time it was opened. That doesn't make sense, because a vdev can change state multiple times without being reopened. vdev_set_state contains logic that will change the device's new state based on various conditions. However, the statechange event was being posted _before_ that logic took effect. Now it's being posted after. Submitted by: gibbs, asomers, mav, allanjude Reviewed by: mav, delphij Relnotes: yes Sponsored by: Spectra Logic Corp, iX Systems Differential Revision: https://reviews.freebsd.org/D6564 Added: head/cddl/usr.sbin/zfsd/ head/cddl/usr.sbin/zfsd/Makefile (contents, props changed) head/cddl/usr.sbin/zfsd/Makefile.common (contents, props changed) head/cddl/usr.sbin/zfsd/callout.cc (contents, props changed) head/cddl/usr.sbin/zfsd/callout.h (contents, props changed) head/cddl/usr.sbin/zfsd/case_file.cc (contents, props changed) head/cddl/usr.sbin/zfsd/case_file.h (contents, props changed) head/cddl/usr.sbin/zfsd/tests/ head/cddl/usr.sbin/zfsd/tests/Makefile (contents, props changed) head/cddl/usr.sbin/zfsd/tests/libmocks.c (contents, props changed) head/cddl/usr.sbin/zfsd/tests/libmocks.h (contents, props changed) head/cddl/usr.sbin/zfsd/tests/zfsd_unittest.cc (contents, props changed) head/cddl/usr.sbin/zfsd/tests/zfsd_unittest.supp (contents, props changed) head/cddl/usr.sbin/zfsd/vdev.cc (contents, props changed) head/cddl/usr.sbin/zfsd/vdev.h (contents, props changed) head/cddl/usr.sbin/zfsd/vdev_iterator.cc (contents, props changed) head/cddl/usr.sbin/zfsd/vdev_iterator.h (contents, props changed) head/cddl/usr.sbin/zfsd/zfsd.8 (contents, props changed) head/cddl/usr.sbin/zfsd/zfsd.cc (contents, props changed) head/cddl/usr.sbin/zfsd/zfsd.h (contents, props changed) head/cddl/usr.sbin/zfsd/zfsd_event.cc (contents, props changed) head/cddl/usr.sbin/zfsd/zfsd_event.h (contents, props changed) head/cddl/usr.sbin/zfsd/zfsd_exception.cc (contents, props changed) head/cddl/usr.sbin/zfsd/zfsd_exception.h (contents, props changed) head/cddl/usr.sbin/zfsd/zfsd_main.cc (contents, props changed) head/cddl/usr.sbin/zfsd/zpool_list.cc (contents, props changed) head/cddl/usr.sbin/zfsd/zpool_list.h (contents, props changed) head/etc/rc.d/zfsd (contents, props changed) head/lib/libdevdctl/ head/lib/libdevdctl/Makefile (contents, props changed) head/lib/libdevdctl/consumer.cc (contents, props changed) head/lib/libdevdctl/consumer.h (contents, props changed) head/lib/libdevdctl/event.cc (contents, props changed) head/lib/libdevdctl/event.h (contents, props changed) head/lib/libdevdctl/event_factory.cc (contents, props changed) head/lib/libdevdctl/event_factory.h (contents, props changed) head/lib/libdevdctl/exception.cc (contents, props changed) head/lib/libdevdctl/exception.h (contents, props changed) head/lib/libdevdctl/guid.cc (contents, props changed) head/lib/libdevdctl/guid.h (contents, props changed) head/lib/libdevdctl/tests/ head/lib/libdevdctl/tests/Makefile (contents, props changed) head/lib/libdevdctl/tests/libdevdctl_unittest.cc (contents, props changed) Modified: head/cddl/usr.sbin/Makefile head/etc/defaults/rc.conf head/etc/mtree/BSD.include.dist head/etc/mtree/BSD.tests.dist head/etc/mtree/BSD.var.dist head/etc/rc.d/Makefile head/lib/Makefile head/share/mk/bsd.libnames.mk head/share/mk/src.libnames.mk head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c Modified: head/cddl/usr.sbin/Makefile ============================================================================== --- head/cddl/usr.sbin/Makefile Sat May 28 16:38:09 2016 (r300905) +++ head/cddl/usr.sbin/Makefile Sat May 28 17:43:40 2016 (r300906) @@ -7,6 +7,7 @@ SUBDIR= ${_dtrace} \ ${_plockstat} \ ${_tests} \ ${_zdb} \ + ${_zfsd} \ ${_zhack} .if ${MK_TESTS} != "no" @@ -18,6 +19,9 @@ _tests= tests _zdb= zdb _zhack= zhack .endif +. if ${MK_CXX} != "no" +_zfsd= zfsd +. endif .endif .if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "i386" Added: head/cddl/usr.sbin/zfsd/Makefile ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/cddl/usr.sbin/zfsd/Makefile Sat May 28 17:43:40 2016 (r300906) @@ -0,0 +1,13 @@ +# $FreeBSD$ + +SRCDIR=${.CURDIR}/../../.. +.include "Makefile.common" + +PROG_CXX= zfsd +MAN= zfsd.8 + +.include <bsd.prog.mk> + +# The unittests require devel/googletest and devel/googlemock from ports. +# Don't automatically build them. +SUBDIR= Added: head/cddl/usr.sbin/zfsd/Makefile.common ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/cddl/usr.sbin/zfsd/Makefile.common Sat May 28 17:43:40 2016 (r300906) @@ -0,0 +1,42 @@ +# $FreeBSD$ + +SRCS= callout.cc \ + case_file.cc \ + zfsd_event.cc \ + vdev.cc \ + vdev_iterator.cc \ + zfsd.cc \ + zfsd_exception.cc \ + zpool_list.cc \ + zfsd_main.cc + +WARNS?= 3 + +# Ignore warnings about Solaris specific pragmas. +IGNORE_PRAGMA= YES + +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzpool/common +INCFLAGS+= -I${SRCDIR}/cddl/compat/opensolaris/include +INCFLAGS+= -I${SRCDIR}/cddl/compat/opensolaris/lib/libumem +INCFLAGS+= -I${SRCDIR}/sys/cddl/compat/opensolaris +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/head +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libuutil/common +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libumem/common +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzfs_core/common +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libzfs/common +INCFLAGS+= -I${SRCDIR}/cddl/contrib/opensolaris/lib/libnvpair +INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/common/zfs +INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common +INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs +INCFLAGS+= -I${SRCDIR}/sys/cddl/contrib/opensolaris/uts/common/sys + +CFLAGS= -g -DNEED_SOLARIS_BOOLEAN ${INCFLAGS} + +DPADD= ${LIBDEVDCTL} ${LIBZFS} ${LIBZFS_CORE} ${LIBUTIL} ${LIBGEOM} \ + ${LIBBSDXML} ${LIBSBUF} ${LIBNVPAIR} ${LIBUUTIL} +LIBADD= devdctl zfs zfs_core util geom bsdxml sbuf nvpair uutil + +cscope: + find ${.CURDIR} -type f -a \( -name "*.[ch]" -o -name "*.cc" \) \ + > ${.CURDIR}/cscope.files + cd ${.CURDIR} && cscope -buq ${INCFLAGS} Added: head/cddl/usr.sbin/zfsd/callout.cc ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/cddl/usr.sbin/zfsd/callout.cc Sat May 28 17:43:40 2016 (r300906) @@ -0,0 +1,219 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file callout.cc + * + * \brief Implementation of the Callout class - multi-client + * timer services built on top of the POSIX interval timer. + */ + +#include <sys/time.h> + +#include <signal.h> +#include <syslog.h> + +#include <climits> +#include <list> +#include <map> +#include <string> + +#include <devdctl/guid.h> +#include <devdctl/event.h> +#include <devdctl/event_factory.h> +#include <devdctl/consumer.h> +#include <devdctl/exception.h> + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd.h" +#include "zfsd_exception.h" + +std::list<Callout *> Callout::s_activeCallouts; +bool Callout::s_alarmFired(false); + +void +Callout::Init() +{ + signal(SIGALRM, Callout::AlarmSignalHandler); +} + +bool +Callout::Stop() +{ + if (!IsPending()) + return (false); + + for (std::list<Callout *>::iterator it(s_activeCallouts.begin()); + it != s_activeCallouts.end(); it++) { + if (*it != this) + continue; + + it = s_activeCallouts.erase(it); + if (it != s_activeCallouts.end()) { + + /* + * Maintain correct interval for the + * callouts that follow the just removed + * entry. + */ + timeradd(&(*it)->m_interval, &m_interval, + &(*it)->m_interval); + } + break; + } + m_pending = false; + return (true); +} + +bool +Callout::Reset(const timeval &interval, CalloutFunc_t *func, void *arg) +{ + bool cancelled(false); + + if (!timerisset(&interval)) + throw ZfsdException("Callout::Reset: interval of 0"); + + cancelled = Stop(); + + m_interval = interval; + m_func = func; + m_arg = arg; + m_pending = true; + + std::list<Callout *>::iterator it(s_activeCallouts.begin()); + for (; it != s_activeCallouts.end(); it++) { + + if (timercmp(&(*it)->m_interval, &m_interval, <=)) { + /* + * Decrease our interval by those that come + * before us. + */ + timersub(&m_interval, &(*it)->m_interval, &m_interval); + } else { + /* + * Account for the time between the newly + * inserted event and those that follow. + */ + timersub(&(*it)->m_interval, &m_interval, + &(*it)->m_interval); + break; + } + } + s_activeCallouts.insert(it, this); + + + if (s_activeCallouts.front() == this) { + itimerval timerval = { {0, 0}, m_interval }; + + setitimer(ITIMER_REAL, &timerval, NULL); + } + + return (cancelled); +} + +void +Callout::AlarmSignalHandler(int) +{ + s_alarmFired = true; + ZfsDaemon::WakeEventLoop(); +} + +void +Callout::ExpireCallouts() +{ + if (!s_alarmFired) + return; + + s_alarmFired = false; + if (s_activeCallouts.empty()) { + /* Callout removal/SIGALRM race was lost. */ + return; + } + + /* + * Expire the first callout (the one we used to set the + * interval timer) as well as any callouts following that + * expire at the same time (have a zero interval from + * the callout before it). + */ + do { + Callout *cur(s_activeCallouts.front()); + s_activeCallouts.pop_front(); + cur->m_pending = false; + cur->m_func(cur->m_arg); + } while (!s_activeCallouts.empty() + && timerisset(&s_activeCallouts.front()->m_interval) == 0); + + if (!s_activeCallouts.empty()) { + Callout *next(s_activeCallouts.front()); + itimerval timerval = { { 0, 0 }, next->m_interval }; + + setitimer(ITIMER_REAL, &timerval, NULL); + } +} + +timeval +Callout::TimeRemaining() const +{ + /* + * Outline: Add the m_interval for each callout in s_activeCallouts + * ahead of this, except for the first callout. Add to that the result + * of getitimer (That's because the first callout stores its original + * interval setting while the timer is ticking). + */ + itimerval timervalToAlarm; + timeval timeToExpiry; + std::list<Callout *>::iterator it; + + if (!IsPending()) { + timeToExpiry.tv_sec = INT_MAX; + timeToExpiry.tv_usec = 999999; /*maximum normalized value*/ + return (timeToExpiry); + } + + timerclear(&timeToExpiry); + getitimer(ITIMER_REAL, &timervalToAlarm); + timeval& timeToAlarm = timervalToAlarm.it_value; + timeradd(&timeToExpiry, &timeToAlarm, &timeToExpiry); + + it =s_activeCallouts.begin(); + it++; /*skip the first callout in the list*/ + for (; it != s_activeCallouts.end(); it++) { + timeradd(&timeToExpiry, &(*it)->m_interval, &timeToExpiry); + if ((*it) == this) + break; + } + return (timeToExpiry); +} Added: head/cddl/usr.sbin/zfsd/callout.h ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/cddl/usr.sbin/zfsd/callout.h Sat May 28 17:43:40 2016 (r300906) @@ -0,0 +1,185 @@ +/*- + * Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + * + * $FreeBSD$ + */ + +/** + * \file callout.h + * + * \brief Interface for timer based callback services. + * + * Header requirements: + * + * #include <sys/time.h> + * + * #include <list> + */ + +#ifndef _CALLOUT_H_ +#define _CALLOUT_H_ + +/** + * \brief Type of the function callback from a Callout. + */ +typedef void CalloutFunc_t(void *); + +/** + * \brief Interface to a schedulable one-shot timer with the granularity + * of the system clock (see setitimer(2)). + * + * Determination of callback expiration is triggered by the SIGALRM + * signal. Callout callbacks are always delivered from Zfsd's event + * processing loop. + * + * Periodic actions can be triggered via the Callout mechanisms by + * resetting the Callout from within its callback. + */ +class Callout +{ +public: + + /** + * Initialize the Callout subsystem. + */ + static void Init(); + + /** + * Function called (via SIGALRM) when our interval + * timer expires. + */ + static void AlarmSignalHandler(int); + + /** + * Execute callbacks for all callouts that have the same + * expiration time as the first callout in the list. + */ + static void ExpireCallouts(); + + /** Constructor. */ + Callout(); + + /** + * Returns true if callout has not been stopped, + * or deactivated since the last time the callout was + * reset. + */ + bool IsActive() const; + + /** + * Returns true if callout is still waiting to expire. + */ + bool IsPending() const; + + /** + * Disestablish a callout. + */ + bool Stop(); + + /** + * \brief Establish or change a timeout. + * + * \param interval Timeval indicating the time which must elapse + * before this callout fires. + * \param func Pointer to the callback funtion + * \param arg Argument pointer to pass to callback function + * + * \return Cancellation status. + * true: The previous callback was pending and therefore + * was cancelled. + * false: The callout was not pending at the time of this + * reset request. + * In all cases, a new callout is established. + */ + bool Reset(const timeval &interval, CalloutFunc_t *func, void *arg); + + /** + * \brief Calculate the remaining time until this Callout's timer + * expires. + * + * The return value will be slightly greater than the actual time to + * expiry. + * + * If the callout is not pending, returns INT_MAX. + */ + timeval TimeRemaining() const; + +private: + /** + * All active callouts sorted by expiration time. The callout + * with the nearest expiration time is at the head of the list. + */ + static std::list<Callout *> s_activeCallouts; + + /** + * The interval timer has expired. This variable is set from + * signal handler context and tested from Zfsd::EventLoop() + * context via ExpireCallouts(). + */ + static bool s_alarmFired; + + /** + * Time, relative to others in the active list, until + * this callout is fired. + */ + timeval m_interval; + + /** Callback function argument. */ + void *m_arg; + + /** + * The callback function associated with this timer + * entry. + */ + CalloutFunc_t *m_func; + + /** State of this callout. */ + bool m_pending; +}; + +//- Callout public const methods ---------------------------------------------- +inline bool +Callout::IsPending() const +{ + return (m_pending); +} + +//- Callout public methods ---------------------------------------------------- +inline +Callout::Callout() + : m_arg(0), + m_func(NULL), + m_pending(false) +{ + timerclear(&m_interval); +} + +#endif /* CALLOUT_H_ */ Added: head/cddl/usr.sbin/zfsd/case_file.cc ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/cddl/usr.sbin/zfsd/case_file.cc Sat May 28 17:43:40 2016 (r300906) @@ -0,0 +1,1104 @@ +/*- + * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + * + * Authors: Justin T. Gibbs (Spectra Logic Corporation) + */ + +/** + * \file case_file.cc + * + * We keep case files for any leaf vdev that is not in the optimal state. + * However, we only serialize to disk those events that need to be preserved + * across reboots. For now, this is just a log of soft errors which we + * accumulate in order to mark a device as degraded. + */ +#include <sys/cdefs.h> +#include <sys/time.h> + +#include <sys/fs/zfs.h> + +#include <dirent.h> +#include <iomanip> +#include <fstream> +#include <functional> +#include <sstream> +#include <syslog.h> +#include <unistd.h> + +#include <libzfs.h> + +#include <list> +#include <map> +#include <string> + +#include <devdctl/guid.h> +#include <devdctl/event.h> +#include <devdctl/event_factory.h> +#include <devdctl/exception.h> +#include <devdctl/consumer.h> + +#include "callout.h" +#include "vdev_iterator.h" +#include "zfsd_event.h" +#include "case_file.h" +#include "vdev.h" +#include "zfsd.h" +#include "zfsd_exception.h" +#include "zpool_list.h" + +__FBSDID("$FreeBSD$"); + +/*============================ Namespace Control =============================*/ +using std::auto_ptr; +using std::hex; +using std::ifstream; +using std::stringstream; +using std::setfill; +using std::setw; + +using DevdCtl::Event; +using DevdCtl::EventBuffer; +using DevdCtl::EventFactory; +using DevdCtl::EventList; +using DevdCtl::Guid; +using DevdCtl::ParseException; + +/*--------------------------------- CaseFile ---------------------------------*/ +//- CaseFile Static Data ------------------------------------------------------- + +CaseFileList CaseFile::s_activeCases; +const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases"; +const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/}; + +//- CaseFile Static Public Methods --------------------------------------------- +CaseFile * +CaseFile::Find(Guid poolGUID, Guid vdevGUID) +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) { + + if ((*curCase)->PoolGUID() != poolGUID + || (*curCase)->VdevGUID() != vdevGUID) + continue; + + /* + * We only carry one active case per-vdev. + */ + return (*curCase); + } + return (NULL); +} + +CaseFile * +CaseFile::Find(const string &physPath) +{ + CaseFile *result = NULL; + + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) { + + if ((*curCase)->PhysicalPath() != physPath) + continue; + + if (result != NULL) { + syslog(LOG_WARNING, "Multiple casefiles found for " + "physical path %s. " + "This is most likely a bug in zfsd", + physPath.c_str()); + } + result = *curCase; + } + return (result); +} + + +void +CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event) +{ + CaseFileList::iterator casefile; + for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){ + CaseFileList::iterator next = casefile; + next++; + if (poolGUID == (*casefile)->PoolGUID()) + (*casefile)->ReEvaluate(event); + casefile = next; + } +} + +CaseFile & +CaseFile::Create(Vdev &vdev) +{ + CaseFile *activeCase; + + activeCase = Find(vdev.PoolGUID(), vdev.GUID()); + if (activeCase == NULL) + activeCase = new CaseFile(vdev); + + return (*activeCase); +} + +void +CaseFile::DeSerialize() +{ + struct dirent **caseFiles; + + int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles, + DeSerializeSelector, /*compar*/NULL)); + + if (numCaseFiles == -1) + return; + if (numCaseFiles == 0) { + free(caseFiles); + return; + } + + for (int i = 0; i < numCaseFiles; i++) { + + DeSerializeFile(caseFiles[i]->d_name); + free(caseFiles[i]); + } + free(caseFiles); +} + +void +CaseFile::LogAll() +{ + for (CaseFileList::iterator curCase = s_activeCases.begin(); + curCase != s_activeCases.end(); curCase++) + (*curCase)->Log(); +} + +void +CaseFile::PurgeAll() +{ + /* + * Serialize casefiles before deleting them so that they can be reread + * and revalidated during BuildCaseFiles. + * CaseFiles remove themselves from this list on destruction. + */ + while (s_activeCases.size() != 0) { + CaseFile *casefile = s_activeCases.front(); + casefile->Serialize(); + delete casefile; + } + +} + +//- CaseFile Public Methods ---------------------------------------------------- +bool +CaseFile::RefreshVdevState() +{ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front()); + if (casePool == NULL) + return (false); + + Vdev vd(casePool, CaseVdev(casePool)); + if (vd.DoesNotExist()) + return (false); + + m_vdevState = vd.State(); + m_vdevPhysPath = vd.PhysicalPath(); + return (true); +} + +bool +CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev) +{ + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front()); + + if (pool == NULL || !RefreshVdevState()) { + /* + * The pool or vdev for this case file is no longer + * part of the configuration. This can happen + * if we process a device arrival notification + * before seeing the ZFS configuration change + * event. + */ + syslog(LOG_INFO, + "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. " + "Closing\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + Close(); + + /* + * Since this event was not used to close this + * case, do not report it as consumed. + */ + return (/*consumed*/false); + } + + if (VdevState() > VDEV_STATE_CANT_OPEN) { + /* + * For now, newly discovered devices only help for + * devices that are missing. In the future, we might + * use a newly inserted spare to replace a degraded + * or faulted device. + */ + syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored", + PoolGUIDString().c_str(), VdevGUIDString().c_str()); + return (/*consumed*/false); + } + + if (vdev != NULL + && vdev->PoolGUID() == m_poolGUID + && vdev->GUID() == m_vdevGUID) { + + zpool_vdev_online(pool, vdev->GUIDString().c_str(), + ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, + &m_vdevState); + syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n", + zpool_get_name(pool), vdev->GUIDString().c_str(), + devPath.c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + + /* + * Check the vdev state post the online action to see + * if we can retire this case. + */ + CloseIfSolved(); + + return (/*consumed*/true); + } + + /* + * If the auto-replace policy is enabled, and we have physical + * path information, try a physical path replacement. + */ + if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): AutoReplace not set. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (/*consumed*/false); + } + + if (PhysicalPath().empty()) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): No physical path information. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (/*consumed*/false); + } + + if (physPath != PhysicalPath()) { + syslog(LOG_INFO, + "CaseFile(%s:%s:%s): Physical path mismatch. " + "Ignoring device insertion.\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str(), + zpool_state_to_name(VdevState(), VDEV_AUX_NONE)); + return (/*consumed*/false); + } + + /* Write a label on the newly inserted disk. */ + if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) { + syslog(LOG_ERR, + "Replace vdev(%s/%s) by physical path (label): %s: %s\n", + zpool_get_name(pool), VdevGUIDString().c_str(), + libzfs_error_action(g_zfsHandle), + libzfs_error_description(g_zfsHandle)); + return (/*consumed*/false); + } + + syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s", + PoolGUIDString().c_str(), VdevGUIDString().c_str(), + devPath.c_str()); + return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false)); +} + +bool +CaseFile::ReEvaluate(const ZfsEvent &event) +{ + bool consumed(false); + + if (event.Value("type") == "misc.fs.zfs.vdev_remove") { + /* + * The Vdev we represent has been removed from the + * configuration. This case is no longer of value. + */ + Close(); + + return (/*consumed*/true); + } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") { + /* This Pool has been destroyed. Discard the case */ + Close(); + + return (/*consumed*/true); + } else if (event.Value("type") == "misc.fs.zfs.config_sync") { + RefreshVdevState(); + if (VdevState() < VDEV_STATE_HEALTHY) + consumed = ActivateSpare(); + } + + + if (event.Value("class") == "resource.fs.zfs.removed") { + bool spare_activated; + + if (!RefreshVdevState()) { + /* + * The pool or vdev for this case file is no longer + * part of the configuration. This can happen + * if we process a device arrival notification + * before seeing the ZFS configuration change + * event. + */ + syslog(LOG_INFO, + "CaseFile::ReEvaluate(%s,%s) Pool/Vdev " + "unconfigured. Closing\n", + PoolGUIDString().c_str(), + VdevGUIDString().c_str()); + /* + * Close the case now so we won't waste cycles in the + * system rescan + */ + Close(); + + /* + * Since this event was not used to close this + * case, do not report it as consumed. + */ + return (/*consumed*/false); + } + + /* + * Discard any tentative I/O error events for + * this case. They were most likely caused by the + * hot-unplug of this device. + */ + PurgeTentativeEvents(); + + /* Try to activate spares if they are available */ + spare_activated = ActivateSpare(); + + /* + * Rescan the drives in the system to see if a recent + * drive arrival can be used to solve this case. + */ + ZfsDaemon::RequestSystemRescan(); + + /* + * Consume the event if we successfully activated a spare. + * Otherwise, leave it in the unconsumed events list so that the + * future addition of a spare to this pool might be able to + * close the case + */ + consumed = spare_activated; + } else if (event.Value("class") == "resource.fs.zfs.statechange") { + RefreshVdevState(); + /* + * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to + * activate a hotspare. Otherwise, ignore the event + */ + if (VdevState() == VDEV_STATE_FAULTED || + VdevState() == VDEV_STATE_DEGRADED || + VdevState() == VDEV_STATE_CANT_OPEN) + (void) ActivateSpare(); + consumed = true; + } + else if (event.Value("class") == "ereport.fs.zfs.io" || + event.Value("class") == "ereport.fs.zfs.checksum") { + + m_tentativeEvents.push_front(event.DeepCopy()); + RegisterCallout(event); + consumed = true; + } + + bool closed(CloseIfSolved()); + + return (consumed || closed); +} + + +bool +CaseFile::ActivateSpare() { + nvlist_t *config, *nvroot; + nvlist_t **spares; + char *devPath, *vdev_type; + const char *poolname; + u_int nspares, i; + int error; + + ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID); + zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front()); + if (zhp == NULL) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " + "for pool_guid %"PRIu64".", (uint64_t)m_poolGUID); + return (false); + } + poolname = zpool_get_name(zhp); + config = zpool_get_config(zhp, NULL); + if (config == NULL) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool " + "config for pool %s", poolname); + return (false); + } + error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot); + if (error != 0){ + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev " + "tree for pool %s", poolname); + return (false); + } + nspares = 0; + nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares, + &nspares); + if (nspares == 0) { + /* The pool has no spares configured */ + syslog(LOG_INFO, "CaseFile::ActivateSpare: " + "No spares available for pool %s", poolname); + return (false); + } + for (i = 0; i < nspares; i++) { + uint64_t *nvlist_array; + vdev_stat_t *vs; + uint_t nstats; + + if (nvlist_lookup_uint64_array(spares[i], + ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) { + syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not " + "find vdev stats for pool %s, spare %d", + poolname, i); + return (false); + } + vs = reinterpret_cast<vdev_stat_t *>(nvlist_array); + + if ((vs->vs_aux != VDEV_AUX_SPARED) + && (vs->vs_state == VDEV_STATE_HEALTHY)) { + /* We found a usable spare */ + break; + } + } + + if (i == nspares) { + /* No available spares were found */ + return (false); *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201605281743.u4SHhfNo019820>