From owner-freebsd-stable@FreeBSD.ORG Wed Oct 17 18:46:12 2012 Return-Path: Delivered-To: freebsd-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 45156C4 for ; Wed, 17 Oct 2012 18:46:12 +0000 (UTC) (envelope-from jhb@freebsd.org) Received: from bigwig.baldwin.cx (bigknife-pt.tunnel.tserv9.chi1.ipv6.he.net [IPv6:2001:470:1f10:75::2]) by mx1.freebsd.org (Postfix) with ESMTP id F40788FC17 for ; Wed, 17 Oct 2012 18:46:11 +0000 (UTC) Received: from jhbbsd.localnet (unknown [209.249.190.124]) by bigwig.baldwin.cx (Postfix) with ESMTPSA id 5857BB98E; Wed, 17 Oct 2012 14:46:11 -0400 (EDT) From: John Baldwin To: freebsd-stable@freebsd.org Subject: Re: mpt irq timeout problem after reboot - only if non-verbose booting !?! Date: Wed, 17 Oct 2012 13:19:32 -0400 User-Agent: KMail/1.13.5 (FreeBSD/8.2-CBSD-20110714-p20; KDE/4.5.5; amd64; ; ) References: <507D27DC.5030104@omnilan.de> In-Reply-To: <507D27DC.5030104@omnilan.de> MIME-Version: 1.0 Content-Type: Text/Plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <201210171319.32815.jhb@freebsd.org> X-Greylist: Sender succeeded SMTP AUTH, not delayed by milter-greylist-4.2.7 (bigwig.baldwin.cx); Wed, 17 Oct 2012 14:46:11 -0400 (EDT) Cc: Harald Schmalzbauer X-BeenThere: freebsd-stable@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: Production branch of FreeBSD source code List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 17 Oct 2012 18:46:12 -0000 On Tuesday, October 16, 2012 5:24:44 am Harald Schmalzbauer wrote: > Hello, >=20 > I have 9.1-RC2 running in an ESXi 5.1 guest. > I use 'lsisas' as virtual SCSI-Controller and mpt attaches and finds 1068= E. >=20 > Everything is working fine until the first 'shutdown -r now': > The second boot pauses for ~2 minutes after probing disks and continues > with this error: > mpt0: Timedout requests already complete. Interrupts may not be functioni= ng. To be clear, you only see this at the end of reboot, and the hardware is fi= ne once the machine is back up? > This problem was also obeserved with real 1068 hardware: > http://lists.freebsd.org/pipermail/freebsd-stable/2011-September/063937.h= tml >=20 > When I power off the virtual machine instead of rebooting, the problem > doesn't occur. >=20 > Accidentally I found a workarround ;-) : > If I set 'verbose_boot' in loader.conf, the problem vanisehs!?!?!? >=20 > Any idea how =E2=80=9Everbose_boot=E2=80=9C affects the operation of the = mpt driver? Extra printfs affect the timing most likely. Are you using any RAID volumes? The only shutdown handler in mpt that looks like it might want interrupts to work is mpt_raid_shutdown(). It needs to = use polled I/O instead of disabling interrupts I think. Try this: Index: mpt_raid.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =2D-- mpt_raid.c (revision 241641) +++ mpt_raid.c (working copy) @@ -115,7 +115,7 @@ static timeout_t mpt_raid_timer; static void mpt_enable_vol(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol, int enable); #endif =2Dstatic void mpt_verify_mwce(struct mpt_softc *, struct mpt_raid_volume *= ); +static void mpt_verify_mwce(struct mpt_softc *, struct mpt_raid_volume *, = int); static void mpt_adjust_queue_depth(struct mpt_softc *, struct mpt_raid_vol= ume *, struct cam_path *); #if __FreeBSD_version < 500000 @@ -135,7 +135,7 @@ static void mpt_disk_prt(struct mpt_softc *mpt, st static int mpt_issue_raid_req(struct mpt_softc *mpt, struct mpt_raid_volume *vol, struct mpt_raid_disk *disk, request_t *re= q, u_int Action, uint32_t ActionDataWord, bus_addr_t addr, bus_size_t len, =2D int write, int wait); + int write, int wait, int sleep_ok); =20 static int mpt_refresh_raid_data(struct mpt_softc *mpt); static void mpt_schedule_raid_refresh(struct mpt_softc *mpt); @@ -517,7 +517,7 @@ mpt_raid_shutdown(struct mpt_softc *mpt) =20 mpt->raid_mwce_setting =3D MPT_RAID_MWCE_OFF; RAID_VOL_FOREACH(mpt, mpt_vol) { =2D mpt_verify_mwce(mpt, mpt_vol); + mpt_verify_mwce(mpt, mpt_vol, FALSE); } } =20 @@ -592,7 +592,7 @@ static int mpt_issue_raid_req(struct mpt_softc *mpt, struct mpt_raid_volume *vol, struct mpt_raid_disk *disk, request_t *req, u_int Action, uint32_t ActionDataWord, bus_addr_t addr, bus_size_t len, =2D int write, int wait) + int write, int wait, int sleep_ok) { MSG_RAID_ACTION_REQUEST *rap; SGE_SIMPLE32 *se; @@ -623,7 +623,7 @@ mpt_issue_raid_req(struct mpt_softc *mpt, struct m =20 if (wait) { return (mpt_wait_req(mpt, req, REQ_STATE_DONE, REQ_STATE_DONE, =2D /*sleep_ok*/FALSE, /*time_ms*/2000)); + sleep_ok, /*time_ms*/2000)); } else { return (0); } @@ -763,7 +763,7 @@ mpt_raid_quiesce_disk(struct mpt_softc *mpt, struc MPI_RAID_ACTION_QUIESCE_PHYS_IO, /*ActionData*/0, /*addr*/0, /*len*/0, /*write*/FALSE, =2D /*wait*/FALSE); + /*wait*/FALSE, /*sleep_ok*/FALSE); if (rv !=3D 0) return (CAM_REQ_CMP_ERR); =20 @@ -882,7 +882,7 @@ mpt_enable_vol(struct mpt_softc *mpt, struct mpt_r enable ? MPI_RAID_ACTION_ENABLE_VOLUME : MPI_RAID_ACTION_DISABLE_VOLUME, /*data*/0, /*addr*/0, /*len*/0, =2D /*write*/FALSE, /*wait*/TRUE); + /*write*/FALSE, /*wait*/TRUE, /*sleep_ok*/TRUE); if (rv =3D=3D ETIMEDOUT) { mpt_vol_prt(mpt, mpt_vol, "mpt_enable_vol: " "%s Volume Timed-out\n", @@ -903,7 +903,8 @@ mpt_enable_vol(struct mpt_softc *mpt, struct mpt_r #endif =20 static void =2Dmpt_verify_mwce(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol) +mpt_verify_mwce(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol, + int sleep_ok) { request_t *req; struct mpt_raid_action_result *ar; @@ -950,7 +951,7 @@ static void return; } =20 =2D req =3D mpt_get_request(mpt, /*sleep_ok*/TRUE); + req =3D mpt_get_request(mpt, sleep_ok); if (req =3D=3D NULL) { mpt_vol_prt(mpt, mpt_vol, "mpt_verify_mwce: Get request failed!\n"); @@ -965,7 +966,7 @@ static void rv =3D mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req, MPI_RAID_ACTION_CHANGE_VOLUME_SETTINGS, data, /*addr*/0, /*len*/0, =2D /*write*/FALSE, /*wait*/TRUE); + /*write*/FALSE, /*wait*/TRUE, sleep_ok); if (rv =3D=3D ETIMEDOUT) { mpt_vol_prt(mpt, mpt_vol, "mpt_verify_mwce: " "Write Cache Enable Timed-out\n"); @@ -1018,7 +1019,8 @@ mpt_verify_resync_rate(struct mpt_softc *mpt, stru rv =3D mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req, MPI_RAID_ACTION_SET_RESYNC_RATE, mpt->raid_resync_rate, /*addr*/0, =2D /*len*/0, /*write*/FALSE, /*wait*/TRUE); + /*len*/0, /*write*/FALSE, /*wait*/TRUE, + /*sleep_ok*/TRUE); if (rv =3D=3D ETIMEDOUT) { mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_data: " "Resync Rate Setting Timed-out\n"); @@ -1054,7 +1056,8 @@ mpt_verify_resync_rate(struct mpt_softc *mpt, stru rv =3D mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req, MPI_RAID_ACTION_CHANGE_VOLUME_SETTINGS, data, /*addr*/0, /*len*/0, =2D /*write*/FALSE, /*wait*/TRUE); + /*write*/FALSE, /*wait*/TRUE, + /*sleep_ok*/TRUE); if (rv =3D=3D ETIMEDOUT) { mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_data: " "Resync Rate Setting Timed-out\n"); @@ -1314,7 +1317,7 @@ mpt_refresh_raid_vol(struct mpt_softc *mpt, struct return; } rv =3D mpt_issue_raid_req(mpt, mpt_vol, NULL, req, =2D MPI_RAID_ACTION_INDICATOR_STRUCT, 0, 0, 0, FALSE, TRUE); + MPI_RAID_ACTION_INDICATOR_STRUCT, 0, 0, 0, FALSE, TRUE, TRUE); if (rv =3D=3D ETIMEDOUT) { mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_vol: Progress Indicator fetch timeout\n"); @@ -1474,7 +1477,7 @@ mpt_refresh_raid_data(struct mpt_softc *mpt) mpt_vol->flags |=3D MPT_RVF_UP2DATE; mpt_vol_prt(mpt, mpt_vol, "%s - %s\n", mpt_vol_type(mpt_vol), mpt_vol_state(mpt_vol)); =2D mpt_verify_mwce(mpt, mpt_vol); + mpt_verify_mwce(mpt, mpt_vol, TRUE); =20 if (vol_pg->VolumeStatus.Flags =3D=3D 0) { continue; @@ -1752,7 +1755,7 @@ mpt_raid_set_vol_mwce(struct mpt_softc *mpt, mpt_r mpt_vol_prt(mpt, mpt_vol, "WARNING - Unsafe shutdown " "detected. Suggest full resync.\n"); } =2D mpt_verify_mwce(mpt, mpt_vol); + mpt_verify_mwce(mpt, mpt_vol, TRUE); } mpt->raid_mwce_set =3D 1; MPT_UNLOCK(mpt); =2D-=20 John Baldwin