Date: Thu, 18 Oct 2012 08:31:31 +0200 From: Harald Schmalzbauer <h.schmalzbauer@omnilan.de> To: John Baldwin <jhb@freebsd.org> Cc: freebsd-stable@freebsd.org, freebsdlists@bsdunix.ch Subject: Re: mpt irq timeout problem after reboot - only if non-verbose booting !?! Message-ID: <507FA243.6020207@omnilan.de> In-Reply-To: <201210171319.32815.jhb@freebsd.org> References: <507D27DC.5030104@omnilan.de> <201210171319.32815.jhb@freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
schrieb John Baldwin am 17.10.2012 19:19 (localtime):
> Are you using any RAID volumes? The only shutdown handler in mpt that looks
> like it might want interrupts to work is mpt_raid_shutdown(). It needs to use
> polled I/O instead of disabling interrupts I think. Try this:
>
> Index: mpt_raid.c
> ===================================================================
> --- mpt_raid.c (revision 241641)
> +++ mpt_raid.c (working copy)
> @@ -115,7 +115,7 @@ static timeout_t mpt_raid_timer;
> static void mpt_enable_vol(struct mpt_softc *mpt,
> struct mpt_raid_volume *mpt_vol, int enable);
> #endif
> -static void mpt_verify_mwce(struct mpt_softc *, struct mpt_raid_volume *);
> +static void mpt_verify_mwce(struct mpt_softc *, struct mpt_raid_volume *, int);
> static void mpt_adjust_queue_depth(struct mpt_softc *, struct mpt_raid_volume *,
> struct cam_path *);
> #if __FreeBSD_version < 500000
> @@ -135,7 +135,7 @@ static void mpt_disk_prt(struct mpt_softc *mpt, st
> static int mpt_issue_raid_req(struct mpt_softc *mpt,
> struct mpt_raid_volume *vol, struct mpt_raid_disk *disk, request_t *req,
> u_int Action, uint32_t ActionDataWord, bus_addr_t addr, bus_size_t len,
> - int write, int wait);
> + int write, int wait, int sleep_ok);
>
> static int mpt_refresh_raid_data(struct mpt_softc *mpt);
> static void mpt_schedule_raid_refresh(struct mpt_softc *mpt);
> @@ -517,7 +517,7 @@ mpt_raid_shutdown(struct mpt_softc *mpt)
>
> mpt->raid_mwce_setting = MPT_RAID_MWCE_OFF;
> RAID_VOL_FOREACH(mpt, mpt_vol) {
> - mpt_verify_mwce(mpt, mpt_vol);
> + mpt_verify_mwce(mpt, mpt_vol, FALSE);
> }
> }
>
> @@ -592,7 +592,7 @@ static int
> mpt_issue_raid_req(struct mpt_softc *mpt, struct mpt_raid_volume *vol,
> struct mpt_raid_disk *disk, request_t *req, u_int Action,
> uint32_t ActionDataWord, bus_addr_t addr, bus_size_t len,
> - int write, int wait)
> + int write, int wait, int sleep_ok)
> {
> MSG_RAID_ACTION_REQUEST *rap;
> SGE_SIMPLE32 *se;
> @@ -623,7 +623,7 @@ mpt_issue_raid_req(struct mpt_softc *mpt, struct m
>
> if (wait) {
> return (mpt_wait_req(mpt, req, REQ_STATE_DONE, REQ_STATE_DONE,
> - /*sleep_ok*/FALSE, /*time_ms*/2000));
> + sleep_ok, /*time_ms*/2000));
> } else {
> return (0);
> }
> @@ -763,7 +763,7 @@ mpt_raid_quiesce_disk(struct mpt_softc *mpt, struc
> MPI_RAID_ACTION_QUIESCE_PHYS_IO,
> /*ActionData*/0, /*addr*/0,
> /*len*/0, /*write*/FALSE,
> - /*wait*/FALSE);
> + /*wait*/FALSE, /*sleep_ok*/FALSE);
> if (rv != 0)
> return (CAM_REQ_CMP_ERR);
>
> @@ -882,7 +882,7 @@ mpt_enable_vol(struct mpt_softc *mpt, struct mpt_r
> enable ? MPI_RAID_ACTION_ENABLE_VOLUME
> : MPI_RAID_ACTION_DISABLE_VOLUME,
> /*data*/0, /*addr*/0, /*len*/0,
> - /*write*/FALSE, /*wait*/TRUE);
> + /*write*/FALSE, /*wait*/TRUE, /*sleep_ok*/TRUE);
> if (rv == ETIMEDOUT) {
> mpt_vol_prt(mpt, mpt_vol, "mpt_enable_vol: "
> "%s Volume Timed-out\n",
> @@ -903,7 +903,8 @@ mpt_enable_vol(struct mpt_softc *mpt, struct mpt_r
> #endif
>
> static void
> -mpt_verify_mwce(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol)
> +mpt_verify_mwce(struct mpt_softc *mpt, struct mpt_raid_volume *mpt_vol,
> + int sleep_ok)
> {
> request_t *req;
> struct mpt_raid_action_result *ar;
> @@ -950,7 +951,7 @@ static void
> return;
> }
>
> - req = mpt_get_request(mpt, /*sleep_ok*/TRUE);
> + req = mpt_get_request(mpt, sleep_ok);
> if (req == NULL) {
> mpt_vol_prt(mpt, mpt_vol,
> "mpt_verify_mwce: Get request failed!\n");
> @@ -965,7 +966,7 @@ static void
> rv = mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req,
> MPI_RAID_ACTION_CHANGE_VOLUME_SETTINGS,
> data, /*addr*/0, /*len*/0,
> - /*write*/FALSE, /*wait*/TRUE);
> + /*write*/FALSE, /*wait*/TRUE, sleep_ok);
> if (rv == ETIMEDOUT) {
> mpt_vol_prt(mpt, mpt_vol, "mpt_verify_mwce: "
> "Write Cache Enable Timed-out\n");
> @@ -1018,7 +1019,8 @@ mpt_verify_resync_rate(struct mpt_softc *mpt, stru
> rv = mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req,
> MPI_RAID_ACTION_SET_RESYNC_RATE,
> mpt->raid_resync_rate, /*addr*/0,
> - /*len*/0, /*write*/FALSE, /*wait*/TRUE);
> + /*len*/0, /*write*/FALSE, /*wait*/TRUE,
> + /*sleep_ok*/TRUE);
> if (rv == ETIMEDOUT) {
> mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_data: "
> "Resync Rate Setting Timed-out\n");
> @@ -1054,7 +1056,8 @@ mpt_verify_resync_rate(struct mpt_softc *mpt, stru
> rv = mpt_issue_raid_req(mpt, mpt_vol, /*disk*/NULL, req,
> MPI_RAID_ACTION_CHANGE_VOLUME_SETTINGS,
> data, /*addr*/0, /*len*/0,
> - /*write*/FALSE, /*wait*/TRUE);
> + /*write*/FALSE, /*wait*/TRUE,
> + /*sleep_ok*/TRUE);
> if (rv == ETIMEDOUT) {
> mpt_vol_prt(mpt, mpt_vol, "mpt_refresh_raid_data: "
> "Resync Rate Setting Timed-out\n");
> @@ -1314,7 +1317,7 @@ mpt_refresh_raid_vol(struct mpt_softc *mpt, struct
> return;
> }
> rv = mpt_issue_raid_req(mpt, mpt_vol, NULL, req,
> - MPI_RAID_ACTION_INDICATOR_STRUCT, 0, 0, 0, FALSE, TRUE);
> + MPI_RAID_ACTION_INDICATOR_STRUCT, 0, 0, 0, FALSE, TRUE, TRUE);
> if (rv == ETIMEDOUT) {
> mpt_vol_prt(mpt, mpt_vol,
> "mpt_refresh_raid_vol: Progress Indicator fetch timeout\n");
> @@ -1474,7 +1477,7 @@ mpt_refresh_raid_data(struct mpt_softc *mpt)
> mpt_vol->flags |= MPT_RVF_UP2DATE;
> mpt_vol_prt(mpt, mpt_vol, "%s - %s\n",
> mpt_vol_type(mpt_vol), mpt_vol_state(mpt_vol));
> - mpt_verify_mwce(mpt, mpt_vol);
> + mpt_verify_mwce(mpt, mpt_vol, TRUE);
>
> if (vol_pg->VolumeStatus.Flags == 0) {
> continue;
> @@ -1752,7 +1755,7 @@ mpt_raid_set_vol_mwce(struct mpt_softc *mpt, mpt_r
> mpt_vol_prt(mpt, mpt_vol, "WARNING - Unsafe shutdown "
> "detected. Suggest full resync.\n");
> }
> - mpt_verify_mwce(mpt, mpt_vol);
> + mpt_verify_mwce(mpt, mpt_vol, TRUE);
> }
> mpt->raid_mwce_set = 1;
> MPT_UNLOCK(mpt);
>
Hmm, unfortunately I can't reproduce it anymore. After applying your
patch, rebooting worked without verbose_boot, but to be sure I wanted to
falsify, booted with the old kernel and couldn't reproduce it anymore.
In the meantime I modified some PCI-bridge settings in the virtual
hardware, because more devices were added and my modification to assign
every device it's own (virtual hard wired irq) slot wasn't applicable
anymore. But I removed the additional hardware and tried to restore the
config like it was when I hit the problem; no luck though :-(
Sorry for wasting your time. If I ever see it again, I'll test the patch
and let you know.
Mabye Thomas Vogt still has his hardware setup handy where he saw the
same timeout message a year ago, CCing him.
Thanks,
-Harry
[-- Attachment #2 --]
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.18 (FreeBSD)
iEYEARECAAYFAlB/okMACgkQLDqVQ9VXb8jfDgCgx9MgYLM/NtgXI13/ISvxaphH
+SwAn0XjA0hFQfzLVcA0szZdV1BqBmKt
=Qu73
-----END PGP SIGNATURE-----
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?507FA243.6020207>
