From owner-svn-src-user@FreeBSD.ORG Fri Nov 18 17:39:20 2011 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 8C613106566B; Fri, 18 Nov 2011 17:39:20 +0000 (UTC) (envelope-from sbruno@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 7B6678FC23; Fri, 18 Nov 2011 17:39:20 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id pAIHdKNR088484; Fri, 18 Nov 2011 17:39:20 GMT (envelope-from sbruno@svn.freebsd.org) Received: (from sbruno@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id pAIHdKjn088479; Fri, 18 Nov 2011 17:39:20 GMT (envelope-from sbruno@svn.freebsd.org) Message-Id: <201111181739.pAIHdKjn088479@svn.freebsd.org> From: Sean Bruno Date: Fri, 18 Nov 2011 17:39:20 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r227669 - user/sbruno/mptd X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 18 Nov 2011 17:39:20 -0000 Author: sbruno Date: Fri Nov 18 17:39:20 2011 New Revision: 227669 URL: http://svn.freebsd.org/changeset/base/227669 Log: Add the Yahoo! mpt(4) montioring utility for review. Compiles against amd64 freebsd-current at this time. Installs a /usr/sbin/mptd and can be started/stopped via the included rc script. Obtained from: Yahoo! Inc. and jhb@ in a former life Added: user/sbruno/mptd/ user/sbruno/mptd/Makefile user/sbruno/mptd/mpt_cam.c user/sbruno/mptd/mpt_cmd.c user/sbruno/mptd/mptd.c user/sbruno/mptd/mptd.h user/sbruno/mptd/mptd.rc (contents, props changed) Added: user/sbruno/mptd/Makefile ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/sbruno/mptd/Makefile Fri Nov 18 17:39:20 2011 (r227669) @@ -0,0 +1,12 @@ +PROG= mptd +BINDIR= /usr/sbin + +SRCS= mptd.c mpt_cam.c mpt_cmd.c + +CFLAGS+= -g -Wall -Wunused + +MAN= + +LDADD+= -lcam + +.include Added: user/sbruno/mptd/mpt_cam.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/sbruno/mptd/mpt_cam.c Fri Nov 18 17:39:20 2011 (r227669) @@ -0,0 +1,139 @@ +/*- + * Copyright (c) 2011 Yahoo! Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mptd.h" + +static int xptfd; + +static int +xpt_open(void) +{ + + if (xptfd == 0) + xptfd = open(XPT_DEVICE, O_RDWR); + return (xptfd); +} + +int +mpt_query_disk(int fd, int unit, U8 VolumeBus, U8 VolumeID, + struct mpt_query_disk *qd) +{ + struct bus_match_pattern *b; + struct periph_match_pattern *p; + struct periph_match_result *r; + union ccb ccb; + size_t bufsize; + int i; + + /* mpt(4) only handles devices on bus 0. */ + if (VolumeBus != 0) + return (ENXIO); + + if (xpt_open() < 0) + return (ENXIO); + + bzero(&ccb, sizeof(ccb)); + + ccb.ccb_h.func_code = XPT_DEV_MATCH; + + bufsize = sizeof(struct dev_match_result) * 5; + ccb.cdm.num_matches = 0; + ccb.cdm.match_buf_len = bufsize; + ccb.cdm.matches = calloc(1, bufsize); + + bufsize = sizeof(struct dev_match_pattern) * 2; + ccb.cdm.num_patterns = 2; + ccb.cdm.pattern_buf_len = bufsize; + ccb.cdm.patterns = calloc(1, bufsize); + + /* Match mptX bus. */ + ccb.cdm.patterns[0].type = DEV_MATCH_BUS; + b = &ccb.cdm.patterns[0].pattern.bus_pattern; + snprintf(b->dev_name, sizeof(b->dev_name), "mpt"); + b->unit_number = unit; + b->flags = BUS_MATCH_NAME | BUS_MATCH_UNIT; + + /* Look for a "da" device at the specified target and lun. */ + ccb.cdm.patterns[1].type = DEV_MATCH_PERIPH; + p = &ccb.cdm.patterns[1].pattern.periph_pattern; + snprintf(p->periph_name, sizeof(p->periph_name), "da"); + p->target_id = VolumeID; + p->flags = PERIPH_MATCH_NAME | PERIPH_MATCH_TARGET; + + if (ioctl(xptfd, CAMIOCOMMAND, &ccb) < 0) { + i = errno; + free(ccb.cdm.matches); + free(ccb.cdm.patterns); + return (i); + } + free(ccb.cdm.patterns); + + if (((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) || + (ccb.cdm.status != CAM_DEV_MATCH_LAST)) { + warnx("mpt_query_disk got CAM error %#x, CDM error %d\n", + ccb.ccb_h.status, ccb.cdm.status); + free(ccb.cdm.matches); + return (EIO); + } + + /* + * We should have exactly 2 matches, 1 for the bus and 1 for + * the peripheral. + */ + if (ccb.cdm.num_matches != 2) { + warnx("mpt_query_disk got %d matches, expected 2", + ccb.cdm.num_matches); + free(ccb.cdm.matches); + return (EIO); + } + if (ccb.cdm.matches[0].type != DEV_MATCH_BUS || + ccb.cdm.matches[1].type != DEV_MATCH_PERIPH) { + warnx("mpt_query_disk got wrong CAM matches"); + free(ccb.cdm.matches); + return (EIO); + } + + /* Copy out the data. */ + r = &ccb.cdm.matches[1].result.periph_result; + snprintf(qd->devname, sizeof(qd->devname), "%s%d", r->periph_name, + r->unit_number); + free(ccb.cdm.matches); + + return (0); +} Added: user/sbruno/mptd/mpt_cmd.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/sbruno/mptd/mpt_cmd.c Fri Nov 18 17:39:20 2011 (r227669) @@ -0,0 +1,409 @@ +/*- + * Copyright (c) 2011 Yahoo! Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "mptd.h" + +static const char *mpt_ioc_status_codes[] = { + "Success", /* 0x0000 */ + "Invalid function", + "Busy", + "Invalid scatter-gather list", + "Internal error", + "Reserved", + "Insufficient resources", + "Invalid field", + "Invalid state", /* 0x0008 */ + "Operation state not supported", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0010 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0018 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "Invalid configuration action", /* 0x0020 */ + "Invalid configuration type", + "Invalid configuration page", + "Invalid configuration data", + "No configuration defaults", + "Unable to commit configuration change", + NULL, + NULL, + NULL, /* 0x0028 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0030 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0038 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "Recovered SCSI error", /* 0x0040 */ + "Invalid SCSI bus", + "Invalid SCSI target ID", + "SCSI device not there", + "SCSI data overrun", + "SCSI data underrun", + "SCSI I/O error", + "SCSI protocol error", + "SCSI task terminated", /* 0x0048 */ + "SCSI residual mismatch", + "SCSI task management failed", + "SCSI I/O controller terminated", + "SCSI external controller terminated", + "EEDP guard error", + "EEDP reference tag error", + "EEDP application tag error", + NULL, /* 0x0050 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0058 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "SCSI target priority I/O", /* 0x0060 */ + "Invalid SCSI target port", + "Invalid SCSI target I/O index", + "SCSI target aborted", + "No connection retryable", + "No connection", + "FC aborted", + "Invalid FC receive ID", + "FC did invalid", /* 0x0068 */ + "FC node logged out", + "Transfer count mismatch", + "STS data not set", + "FC exchange canceled", + "Data offset error", + "Too much write data", + "IU too short", + "ACK NAK timeout", /* 0x0070 */ + "NAK received", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, /* 0x0078 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "LAN device not found", /* 0x0080 */ + "LAN device failure", + "LAN transmit error", + "LAN transmit aborted", + "LAN receive error", + "LAN receive aborted", + "LAN partial packet", + "LAN canceled", + NULL, /* 0x0088 */ + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "SAS SMP request failed", /* 0x0090 */ + "SAS SMP data overrun", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "Inband aborted", /* 0x0098 */ + "No inband connection", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + "Diagnostic released", /* 0x00A0 */ +}; + +#if 0 +static const char *mpt_raid_action_status_codes[] = { + "Success", + "Invalid action", + "Failure", + "Operation in progress", +}; +#endif + +const char * +mpt_ioc_status(U16 IOCStatus) +{ + static char buffer[16]; + + IOCStatus &= MPI_IOCSTATUS_MASK; + if (IOCStatus < sizeof(mpt_ioc_status_codes) / sizeof(char *) && + mpt_ioc_status_codes[IOCStatus] != NULL) + return (mpt_ioc_status_codes[IOCStatus]); + snprintf(buffer, sizeof(buffer), "Status: 0x%04x", IOCStatus); + return (buffer); +} + +#if 0 +const char * +mpt_raid_status(U16 ActionStatus) +{ + static char buffer[16]; + + if (ActionStatus < sizeof(mpt_raid_action_status_codes) / + sizeof(char *)) + return (mpt_raid_action_status_codes[ActionStatus]); + snprintf(buffer, sizeof(buffer), "Status: 0x%04x", ActionStatus); + return (buffer); +} + +const char * +mpt_raid_level(U8 VolumeType) +{ + static char buf[16]; + + switch (VolumeType) { + case MPI_RAID_VOL_TYPE_IS: + return ("RAID-0"); + case MPI_RAID_VOL_TYPE_IM: + return ("RAID-1"); + case MPI_RAID_VOL_TYPE_IME: + return ("RAID-1E"); + case MPI_RAID_VOL_TYPE_RAID_5: + return ("RAID-5"); + case MPI_RAID_VOL_TYPE_RAID_6: + return ("RAID-6"); + case MPI_RAID_VOL_TYPE_RAID_10: + return ("RAID-10"); + case MPI_RAID_VOL_TYPE_RAID_50: + return ("RAID-50"); + default: + sprintf(buf, "LVL 0x%02x", VolumeType); + return (buf); + } +} +#endif + +const char * +mpt_volume_name(int fd, int unit, U8 VolumeBus, U8 VolumeID) +{ + static struct mpt_query_disk info; + static char buf[16]; + + if (mpt_query_disk(fd, unit, VolumeBus, VolumeID, &info) != 0) { + /* + * We only print out the bus number if it is non-zero + * since mpt(4) only supports devices on bus zero + * anyway. + */ + if (VolumeBus == 0) + snprintf(buf, sizeof(buf), "%d", VolumeID); + else + snprintf(buf, sizeof(buf), "%d:%d", VolumeBus, + VolumeID); + return (buf); + } + return (info.devname); +} + +void * +mpt_read_config_page(int fd, U8 PageType, U8 PageNumber, U32 PageAddress) +{ + struct mpt_cfg_page_req req; + void *buf; + int save_errno; + + bzero(&req, sizeof(req)); + req.header.PageType = PageType; + req.header.PageNumber = PageNumber; + req.page_address = PageAddress; + if (ioctl(fd, MPTIO_READ_CFG_HEADER, &req) < 0) + return (NULL); + if (!IOC_STATUS_SUCCESS(req.ioc_status)) { + warnx("Reading config page header failed: %s", + mpt_ioc_status(req.ioc_status)); + errno = EIO; + return (NULL); + } + req.len = req.header.PageLength * 4; + buf = malloc(req.len); + req.buf = buf; + bcopy(&req.header, buf, sizeof(req.header)); + if (ioctl(fd, MPTIO_READ_CFG_PAGE, &req) < 0) { + save_errno = errno; + free(buf); + errno = save_errno; + return (NULL); + } + if (!IOC_STATUS_SUCCESS(req.ioc_status)) { + warnx("Reading config page failed: %s", + mpt_ioc_status(req.ioc_status)); + free(buf); + errno = EIO; + return (NULL); + } + return (buf); +} + +int +mpt_write_config_page(int fd, void *buf) +{ + CONFIG_PAGE_HEADER *hdr; + struct mpt_cfg_page_req req; + + bzero(&req, sizeof(req)); + req.buf = buf; + hdr = buf; + req.len = hdr->PageLength * 4; + if (ioctl(fd, MPTIO_WRITE_CFG_PAGE, &req) < 0) + return (-1); + if (!IOC_STATUS_SUCCESS(req.ioc_status)) { + warnx("Writing config page failed: %s", + mpt_ioc_status(req.ioc_status)); + errno = EIO; + return (-1); + } + return (0); +} + +#if 0 +int +mpt_raid_action(int fd, U8 Action, U8 VolumeBus, U8 VolumeID, U8 PhysDiskNum, + U32 ActionDataWord, void *buf, int len, RAID_VOL0_STATUS *VolumeStatus, + U32 *ActionData, int datalen, U16 *IOCStatus, U16 *ActionStatus, int write) +{ + struct mpt_raid_action raid_act; + + if (IOCStatus != NULL) + *IOCStatus = MPI_IOCSTATUS_SUCCESS; + if (datalen > sizeof(raid_act.action_data)) { + errno = EINVAL; + return (-1); + } + bzero(&raid_act, sizeof(raid_act)); + raid_act.action = Action; + raid_act.volume_bus = VolumeBus; + raid_act.volume_id = VolumeID; + raid_act.phys_disk_num = PhysDiskNum; + raid_act.action_data_word = ActionDataWord; + if (buf != NULL && len != 0) { + raid_act.buf = buf; + raid_act.len = len; + raid_act.write = write; + } + + if (ioctl(fd, MPTIO_RAID_ACTION, &raid_act) < 0) + return (-1); + + if (!IOC_STATUS_SUCCESS(raid_act.ioc_status)) { + if (IOCStatus != NULL) { + *IOCStatus = raid_act.ioc_status; + return (0); + } + warnx("RAID action failed: %s", + mpt_ioc_status(raid_act.ioc_status)); + errno = EIO; + return (-1); + } + + if (ActionStatus != NULL) + *ActionStatus = raid_act.action_status; + if (raid_act.action_status != MPI_RAID_ACTION_ASTATUS_SUCCESS) { + if (ActionStatus != NULL) + return (0); + warnx("RAID action failed: %s", + mpt_raid_status(raid_act.action_status)); + errno = EIO; + return (-1); + } + + if (VolumeStatus != NULL) + *((U32 *)VolumeStatus) = raid_act.volume_status; + if (ActionData != NULL) + bcopy(raid_act.action_data, ActionData, datalen); + return (0); +} +#endif Added: user/sbruno/mptd/mptd.c ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ user/sbruno/mptd/mptd.c Fri Nov 18 17:39:20 2011 (r227669) @@ -0,0 +1,871 @@ +/*- + * Copyright (c) 2011 Yahoo! Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mptd.h" + +#define MAX_UNIT 16 + +#define DRIVE_FAILED(Status) \ + ((Status).State != MPI_PHYSDISK0_STATUS_ONLINE && \ + (Status).State != MPI_PHYSDISK0_STATUS_MISSING && \ + (Status).State != MPI_PHYSDISK0_STATUS_NOT_COMPATIBLE && \ + (Status).State != MPI_PHYSDISK0_STATUS_INITIALIZING) + +#define DRIVE_MISSING(Status) \ + ((Status).State == MPI_PHYSDISK0_STATUS_MISSING) + +#define DRIVE_REBUILDING(Status) \ + ((Status).State == MPI_PHYSDISK0_STATUS_ONLINE && \ + ((Status).Flags & MPI_PHYSDISK0_STATUS_FLAG_OUT_OF_SYNC)) + +#define VOLUME_DEGRADED(Status) \ + ((Status.State) != MPI_RAIDVOL0_STATUS_STATE_OPTIMAL) + +static char hostname[MAXHOSTNAMELEN]; +static char *mailto = "root@localhost"; +static int notifyminutes = 720; /* send mail every 12 hours by default */ +static int dostdout; + +/* Maximum target_id and device_id of volumes and drives, respectively. */ +#define MPT_MAX_VOL_ID 65536 +#define MPT_MAX_PD_ID 256 + +struct mpt_physdisk { + uint32_t generation; + U8 PhysDiskBus; + U8 PhysDiskID; + U8 PhysDiskNum; + RAID_PHYS_DISK0_STATUS Status; + uint8_t spare; +}; + +struct mpt_volume { + CONFIG_PAGE_RAID_VOL_0 *config; + RAID_VOL0_STATUS prev_status; + int sentcnt; + uint32_t generation; + U8 VolumeBus; + U8 VolumeID; + int missing_drives; + int prev_missing_drives; +}; + +struct mpt_controller { + int fd; + int unit; + uint32_t generation; + int missing_drives; + int prev_missing_drives; + int bad_drives; + int prev_bad_drives; + int sentcnt; + struct mpt_volume *volumes[MPT_MAX_VOL_ID]; + struct mpt_physdisk *physdisks[MPT_MAX_PD_ID]; +}; + +static struct mpt_controller controllers[MAX_UNIT]; +static int ncontrollers; + +static int +mpt_drive_location(char *p, struct mpt_physdisk *pd) +{ + + return (sprintf(p, "bus %d id %d", pd->PhysDiskBus, pd->PhysDiskID)); +} + +static void +mpt_scan_drive(struct mpt_controller *c, U8 PhysDiskNum, struct mpt_volume *v) +{ + CONFIG_PAGE_RAID_PHYS_DISK_0 *info; + struct mpt_physdisk *pd; + + info = mpt_pd_info(c->fd, PhysDiskNum); + if (info == NULL) { + warn("mpt%d:disk%d: failed to fetch disk info", c->unit, + PhysDiskNum); + return; + } + + /* See if we have seen this drive before. */ + pd = c->physdisks[PhysDiskNum]; + if (pd == NULL) { + pd = calloc(1, sizeof(struct mpt_physdisk)); + pd->PhysDiskNum = PhysDiskNum; + c->physdisks[PhysDiskNum] = pd; + pd->PhysDiskBus = info->PhysDiskBus; + pd->PhysDiskID = info->PhysDiskID; + } + + /* Update generation count and other state. */ + pd->generation = c->generation; + pd->Status = info->PhysDiskStatus; + pd->spare = (info->PhysDiskSettings.HotSparePool != 0); + if (DRIVE_MISSING(info->PhysDiskStatus)) { + if (v != NULL) + v->missing_drives++; + else + c->missing_drives++; + } + free(info); +} + +static void +mpt_scan_volume(struct mpt_controller *c, CONFIG_PAGE_IOC_2_RAID_VOL *vol) +{ + CONFIG_PAGE_RAID_VOL_0 *info; + RAID_VOL0_PHYS_DISK *disk; + struct mpt_volume *v; + int i; + + info = mpt_vol_info(c->fd, vol->VolumeBus, vol->VolumeID); + if (info == NULL) { + warn("mpt%d:%d:%d: failed to fetch volume info", c->unit, + vol->VolumeBus, vol->VolumeID); + return; + } + + /* See if we have seen this drive before. */ + v = c->volumes[vol->VolumeBus * 256 + vol->VolumeID]; + if (v == NULL) { + v = calloc(1, sizeof(struct mpt_volume)); + v->VolumeBus = vol->VolumeBus; + v->VolumeID = vol->VolumeID; + c->volumes[v->VolumeBus * 256 + vol->VolumeID] = v; + + v->prev_status = info->VolumeStatus; + } else { + v->prev_status = v->config->VolumeStatus; + free(v->config); + } + + /* Update generation count and other state. */ + v->generation = c->generation; + v->config = info; + + /* Scan all the drives this volume spans. */ + v->prev_missing_drives = v->missing_drives; + v->missing_drives = 0; + disk = info->PhysDisk; + for (i = 0; i < info->NumPhysDisks; disk++, i++) + mpt_scan_drive(c, disk->PhysDiskNum, v); +} + +static void +mpt_scan_volumes(struct mpt_controller *c) +{ + CONFIG_PAGE_IOC_2 *ioc2; + CONFIG_PAGE_IOC_2_RAID_VOL *vol; + struct mpt_volume *mv; + int i; + + /* Get the volume list from the controller. */ + ioc2 = mpt_read_ioc_page(c->fd, 2); + if (ioc2 == NULL) { + warn("mpt%d: Failed to get volume list", c->unit); + return; + } + + /* Scan all the volumes. */ + vol = ioc2->RaidVolume; + for (i = 0; i < ioc2->NumActiveVolumes; vol++, i++) { + mpt_scan_volume(c, vol); + } + + /* Throw away all the volumes that disappeared. */ + for (i = 0; i < MPT_MAX_VOL_ID; i++) { + mv = c->volumes[i]; + if (mv == NULL) + continue; + if (mv->generation != c->generation) { + c->volumes[i] = NULL; + free(mv); + } + } + free(ioc2); +} + +static void +mpt_scan_drives(struct mpt_controller *c) +{ + CONFIG_PAGE_IOC_5 *ioc5; + IOC_5_HOT_SPARE *spare; + struct mpt_physdisk *pd; + int i; + + /* + * Drives from active volumes are scanned when the volumes are + * scanned. The only thing left for us to look at are the + * spare drives. + */ + ioc5 = mpt_read_ioc_page(c->fd, 5); + if (ioc5 == NULL) { + warn("mpt%d: Failed to get spare drive list", c->unit); + return; + } + + /* Scan all the spares. */ + c->prev_missing_drives = c->missing_drives; + c->missing_drives = 0; + spare = ioc5->HotSpare; + for (i = 0; i < ioc5->NumHotSpares; spare++, i++) + mpt_scan_drive(c, spare->PhysDiskNum, NULL); + free(ioc5); + + /* + * If a drive fails when there is a hot spare, the failing + * drive swaps places with the hot spare. In this case, the + * failed drive won't be associated with a volume, so we track + * them via a controller-wide bad drives count. + */ + c->prev_bad_drives = c->bad_drives; + c->bad_drives = 0; + for (i = 0; i < MPT_MAX_PD_ID; i++) { + if (c->physdisks[i] == NULL) + continue; + if (!DRIVE_FAILED(c->physdisks[i]->Status)) + continue; + if (!c->physdisks[i]->spare) + continue; + c->bad_drives++; + } + + /* Throw away all the drives that disappeared. */ + for (i = 0; i < MPT_MAX_PD_ID; i++) { + pd = c->physdisks[i]; + if (pd == NULL) + continue; + if (pd->generation != c->generation) { + c->physdisks[i] = NULL; + free(pd); + } + } +} + +static void +mpt_scan_controller(struct mpt_controller *c) +{ + + /* Bump the overall generation count. */ + c->generation++; + + mpt_scan_volumes(c); + mpt_scan_drives(c); +} + +static void +mpt_scan_all(void) +{ + int i; + + for (i = 0; i < ncontrollers; i++) + mpt_scan_controller(&controllers[i]); +} + +static int +mpt_open(void) +{ + CONFIG_PAGE_IOC_2 *ioc2; + char path[MAXPATHLEN]; + int fd, unit; + + ncontrollers = 0; + for (unit = 0; unit < MAX_UNIT; unit++) { + snprintf(path, sizeof(path), "/dev/mpt%d", unit); + fd = open(path, O_RDWR); + if (fd < 0) + continue; + + /* + * Don't bother monitoring controllers that don't + * support RAID volumes. The emulated mpt(4) + * controllers in VMWare crash the VM when queried for + * a list of hot spare drives via IOC page 5, so this + * test lets us avoid them altogether. + */ + ioc2 = mpt_read_ioc_page(fd, 2); + if (ioc2 == NULL || ioc2->MaxPhysDisks == 0) { + if (ioc2) + free(ioc2); + close(fd); + continue; + } + free(ioc2); + controllers[ncontrollers].fd = fd; + controllers[ncontrollers].unit = unit; + ncontrollers++; + } + if (ncontrollers == 0) + return (ncontrollers); + + mpt_scan_all(); + + return (ncontrollers); +} + +static FILE * +mailer_open(void) +{ + FILE *fp; + + if (dostdout) + fp = stdout; + else + fp = popen("/usr/sbin/sendmail -t", "w"); + fprintf(fp, "To: %s\n", mailto); + return fp; +} + +static void +mailer_close(FILE *fp) +{ + + if (dostdout == 0) + pclose(fp); + else + fflush(fp); +} + +static void +mailer_write(FILE *fp, const char *fmt, ...) +{ + va_list ap; + char *mfmt, *pfmt = NULL; + + pfmt = mfmt = strdup(fmt); + + va_start (ap, fmt); + vfprintf (fp, fmt, ap); + va_end (ap); + + /* XXX: Hack for Subject: */ + if (strncmp(fmt, "Subject: ", 9) == 0) { + char *p; + pfmt += strlen("Subject: "); + if ((p = strchr(pfmt, '\n')) != NULL) + *p = '\0'; + } + + if (dostdout == 0) { + va_start (ap, fmt); + vsyslog(LOG_CRIT, pfmt, ap); + va_end (ap); + } + + if (mfmt) + free(mfmt); +} + +/* Look for any failed disks in this volume. */ +char * +mpt_show_failed(struct mpt_controller *c, struct mpt_volume *v) +{ + RAID_VOL0_PHYS_DISK *disk; + struct mpt_physdisk *pd; + int i, comma = 0, instate; + char *str, *p; + + instate = 0; + disk = v->config->PhysDisk; + for (i = 0; i < v->config->NumPhysDisks; disk++, i++) { + pd = c->physdisks[disk->PhysDiskNum]; + if (pd == NULL) + continue; + if (DRIVE_FAILED(pd->Status)) + instate++; + } + + if (instate == 0) + return (NULL); + + str = calloc(instate * 64, sizeof(char)); + if (str == NULL) + return (NULL); + + p = str; + *p++ = '('; + + disk = v->config->PhysDisk; + for (i = 0; i < v->config->NumPhysDisks; disk++, i++) { *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***