From owner-freebsd-bugs@FreeBSD.ORG Mon Jan 5 19:00:12 2009 Return-Path: Delivered-To: freebsd-bugs@hub.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 07D58106564A for ; Mon, 5 Jan 2009 19:00:12 +0000 (UTC) (envelope-from gnats@FreeBSD.org) Received: from freefall.freebsd.org (freefall.freebsd.org [IPv6:2001:4f8:fff6::28]) by mx1.freebsd.org (Postfix) with ESMTP id E447B8FC1C for ; Mon, 5 Jan 2009 19:00:11 +0000 (UTC) (envelope-from gnats@FreeBSD.org) Received: from freefall.freebsd.org (gnats@localhost [127.0.0.1]) by freefall.freebsd.org (8.14.3/8.14.3) with ESMTP id n05J0B2v060903 for ; Mon, 5 Jan 2009 19:00:11 GMT (envelope-from gnats@freefall.freebsd.org) Received: (from gnats@localhost) by freefall.freebsd.org (8.14.3/8.14.3/Submit) id n05J0BDl060894; Mon, 5 Jan 2009 19:00:11 GMT (envelope-from gnats) Date: Mon, 5 Jan 2009 19:00:11 GMT Message-Id: <200901051900.n05J0BDl060894@freefall.freebsd.org> To: freebsd-bugs@FreeBSD.org From: Sean Bruno Cc: Subject: Re: kern/118093: [firewire] firewire bus reset hogs CPU, causing data to be lost X-BeenThere: freebsd-bugs@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list Reply-To: Sean Bruno List-Id: Bug reports List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 05 Jan 2009 19:00:12 -0000 The following reply was made to PR kern/118093; it has been noted by GNATS. From: Sean Bruno To: bug-followup@FreeBSD.org, freebsd@sopwith.solgatos.com, freebsd-firewire@freebsd.org Cc: scottl@freebsd.org Subject: Re: kern/118093: [firewire] firewire bus reset hogs CPU, causing data to be lost Date: Mon, 05 Jan 2009 10:56:37 -0800 --=-bdk1tSakYWl4EHnChn2C Content-Type: text/plain Content-Transfer-Encoding: 7bit Well ... this has really sent me down the rabbit hole the last couple of days. There is a need to audit all locking in the firewire stack right now and I have started that task. Essentially, threads, callouts, interrupts and task queues are all jumping around causing context to be switched from one thread to another. It's kind of bad in there and I need to sort it out. I'm working with a stable/7 tree, so I've started a patch for it. This patch has quite a few printf -> device_printf changes in it, so try to ignore thost for now. The meat of the patch is a judicious implementation of FW_GLOCK() in certain code areas. Note that sometimes the code is just trying to get the lock and then drops it immediately. This is not very optimal, but it does the trick. I'm still seeing a high level of broken log messages in /var/log/messages but this may help the issue you were seeing. Give it a spin. Sean --=-bdk1tSakYWl4EHnChn2C Content-Disposition: attachment; filename="firewire.diff" Content-Type: text/x-patch; name="firewire.diff"; charset="UTF-8" Content-Transfer-Encoding: 7bit Index: fwohci_pci.c =================================================================== --- fwohci_pci.c (revision 186781) +++ fwohci_pci.c (working copy) @@ -337,11 +337,7 @@ err = bus_setup_intr(self, sc->irq_res, INTR_TYPE_NET | INTR_MPSAFE, -#if FWOHCI_INTFILT - fwohci_filt, NULL, sc, &sc->ih); -#else NULL, (driver_intr_t *) fwohci_intr, sc, &sc->ih); -#endif #if defined(__DragonFly__) || __FreeBSD_version < 500000 /* XXX splcam() should mask this irq for sbp.c*/ err = bus_setup_intr(self, sc->irq_res, INTR_TYPE_CAM, Index: firewire.c =================================================================== --- firewire.c (revision 186781) +++ firewire.c (working copy) @@ -1240,8 +1240,8 @@ fp->mode.common.tcode |= FWTCODE_PHY; if (firewire_debug) - printf("send phy_config root_node=%d gap_count=%d\n", - root_node, gap_count); + device_printf(fc->dev, "%s: phy_config root_node=%d gap_count=%d\n", + __func__, root_node, gap_count); fw_asyreq(fc, -1, xfer); } @@ -1285,7 +1285,7 @@ self_id = &fc->topology_map->self_id[0]; for(i = 0; i < fc->sid_cnt; i ++){ if (sid[1] != ~sid[0]) { - printf("fw_sidrcv: invalid self-id packet\n"); + device_printf(fc->bdev, "%s: invalid self-id packet\n", __func__); sid += 2; continue; } @@ -1331,7 +1331,6 @@ self_id++; fc->topology_map->self_id_count ++; } - device_printf(fc->bdev, "%d nodes", fc->max_node + 1); /* CRC */ fc->topology_map->crc = fw_crc16( (uint32_t *)&fc->topology_map->generation, @@ -1350,16 +1349,16 @@ bcopy(p, &CSRARC(fc, SPED_MAP + 8), (fc->speed_map->crc_len - 1)*4); fc->max_hop = fc->max_node - i_branch; - printf(", maxhop <= %d", fc->max_hop); + device_printf(fc->bdev, "%s: %d nodes, maxhop <= %d\n", + __func__, fc->max_node + 1, fc->max_hop); if(fc->irm == -1 ){ - printf(", Not found IRM capable node"); + device_printf(fc->bdev, "%s: No IRM capable node", __func__); }else{ - printf(", cable IRM = %d", fc->irm); - if (fc->irm == fc->nodeid) - printf(" (me)"); + device_printf(fc->bdev, "%s: IRM capable node = %d, %s\n", + __func__, fc->irm, + (fc->irm == fc->nodeid) ? " (me)" : ""); } - printf("\n"); if (try_bmr && (fc->irm != -1) && (CSRARC(fc, BUS_MGR_ID) == 0x3f)) { if (fc->irm == fc->nodeid) { @@ -1388,6 +1387,7 @@ struct fw_device *fwdev; s = splfw(); + FW_GLOCK(fc); fc->status = FWBUSEXPLORE; /* Invalidate all devices, just after bus reset. */ @@ -1396,6 +1396,7 @@ fwdev->status = FWDEVINVAL; fwdev->rcnt = 0; } + FW_GUNLOCK(fc); splx(s); wakeup((void *)fc); @@ -1647,20 +1648,27 @@ fc = (struct firewire_comm *)arg; - mtx_lock(&fc->wait_lock); + /*mtx_lock(&fc->wait_lock);*/ + FW_GLOCK(fc); while (fc->status != FWBUSDETACH) { + FW_GUNLOCK(fc); if (fc->status == FWBUSEXPLORE) { - mtx_unlock(&fc->wait_lock); + /*mtx_unlock(&fc->wait_lock);*/ fw_explore(fc); fc->status = FWBUSEXPDONE; - if (firewire_debug) - printf("bus_explore done\n"); + if (firewire_debug) { + device_printf(fc->dev,"%s: bus_explore done\n", + __func__); + } fw_attach_dev(fc); - mtx_lock(&fc->wait_lock); + /*mtx_lock(&fc->wait_lock);*/ } - msleep((void *)fc, &fc->wait_lock, PWAIT|PCATCH, "-", 0); + FW_GLOCK(fc); + /*msleep((void *)fc, &fc->wait_lock, PWAIT|PCATCH, "-", 0);*/ + msleep((void *)fc, FW_GMTX(fc), PWAIT|PCATCH, "-", 0); } - mtx_unlock(&fc->wait_lock); + /*mtx_unlock(&fc->wait_lock);*/ + FW_GUNLOCK(fc); kthread_exit(0); } Index: fwohci.c =================================================================== --- fwohci.c (revision 186781) +++ fwohci.c (working copy) @@ -1003,7 +1003,8 @@ if (maxdesc < db_tr->dbcnt) { maxdesc = db_tr->dbcnt; if (firewire_debug) - device_printf(sc->fc.dev, "maxdesc: %d\n", maxdesc); + device_printf(sc->fc.dev, "%s: maxdesc: %d\n", + __func__, maxdesc); } /* last db */ LAST_DB(db_tr, db); @@ -1024,7 +1025,8 @@ if(db_tr != dbch->bottom){ goto txloop; } else { - device_printf(sc->fc.dev, "fwohci_start: lack of db_trq\n"); + device_printf(sc->fc.dev, "%s: lack of db_trq\n", + __func__); dbch->flags |= FWOHCI_DBCH_FULL; } kick: @@ -1036,8 +1038,8 @@ OWRITE(sc, OHCI_DMACTL(off), OHCI_CNTL_DMA_WAKE); } else { if (firewire_debug) - device_printf(sc->fc.dev, "start AT DMA status=%x\n", - OREAD(sc, OHCI_DMACTL(off))); + device_printf(sc->fc.dev, "%s: start AT DMA status=%x\n", + __func__, OREAD(sc, OHCI_DMACTL(off))); OWRITE(sc, OHCI_DMACMD(off), dbch->top->bus_addr | fsegment); OWRITE(sc, OHCI_DMACTL(off), OHCI_CNTL_DMA_RUN); dbch->xferq.flag |= FWXFERQ_RUNNING; @@ -1848,7 +1850,7 @@ /* Disable bus reset interrupt until sid recv. */ OWRITE(sc, FWOHCI_INTMASKCLR, OHCI_INT_PHY_BUS_R); - device_printf(fc->dev, "BUS reset\n"); + device_printf(fc->dev, "%s: BUS reset\n", __func__); OWRITE(sc, FWOHCI_INTMASKCLR, OHCI_INT_CYC_LOST); OWRITE(sc, OHCI_LNKCTLCLR, OHCI_CNTL_CYCSRC); @@ -1885,10 +1887,10 @@ plen = OREAD(sc, OHCI_SID_CNT); fc->nodeid = node_id & 0x3f; - device_printf(fc->dev, "node_id=0x%08x, gen=%d, ", - node_id, (plen >> 16) & 0xff); + device_printf(fc->dev, "%s: node_id=0x%08x, gen=%d\n", + __func__, node_id, (plen >> 16) & 0xff); if (!(node_id & OHCI_NODE_VALID)) { - printf("Bus reset failure\n"); + device_printf(fc->dev, "%s: Bus reset failure\n", __func__); goto sidout; } @@ -1896,11 +1898,11 @@ sc->cycle_lost = 0; OWRITE(sc, FWOHCI_INTMASK, OHCI_INT_CYC_LOST); if ((node_id & OHCI_NODE_ROOT) && !nocyclemaster) { - printf("CYCLEMASTER mode\n"); + device_printf(fc->dev, "%s: CYCLEMASTER mode\n", __func__); OWRITE(sc, OHCI_LNKCTL, OHCI_CNTL_CYCMTR | OHCI_CNTL_CYCTIMER); } else { - printf("non CYCLEMASTER mode\n"); + device_printf(fc->dev, "%s: non CYCLEMASTER mode\n", __func__); OWRITE(sc, OHCI_LNKCTLCLR, OHCI_CNTL_CYCMTR); OWRITE(sc, OHCI_LNKCTL, OHCI_CNTL_CYCTIMER); } @@ -1922,6 +1924,8 @@ u_int i; struct firewire_comm *fc = (struct firewire_comm *)sc; + FW_GLOCK(fc); + FW_GUNLOCK(fc); if (stat & OHCI_INT_DMA_IR) { irstat = atomic_readandclear_int(&sc->irstat); for(i = 0; i < fc->nisodma ; i++){ @@ -1969,8 +1973,8 @@ OWRITE(sc, OHCI_LNKCTLCLR, OHCI_CNTL_CYCTIMER); #endif OWRITE(sc, FWOHCI_INTMASKCLR, OHCI_INT_CYC_LOST); - device_printf(fc->dev, "too many cycle lost, " - "no cycle master presents?\n"); + device_printf(fc->dev, "%s: too many cycle lost, " + "no cycle master presents?\n", __func__); } } if (stat & OHCI_INT_DMA_ATRQ) { @@ -1986,7 +1990,7 @@ device_printf(fc->dev, "unrecoverable error\n"); } if (stat & OHCI_INT_PHY_INT) { - device_printf(fc->dev, "phy int\n"); + device_printf(fc->dev, "%s: phy int\n", __func__); } return; @@ -1997,6 +2001,8 @@ { struct fwohci_softc *sc = (struct fwohci_softc *)arg; + FW_GLOCK(&sc->fc); + FW_GUNLOCK(&sc->fc); fw_busreset(&sc->fc, FWBUSRESET); OWRITE(sc, OHCI_CROMHDR, ntohl(sc->fc.config_rom[0])); OWRITE(sc, OHCI_BUS_OPT, ntohl(sc->fc.config_rom[2])); @@ -2011,6 +2017,8 @@ int i, plen; + FW_GLOCK(fc); + FW_GUNLOCK(fc); plen = OREAD(sc, OHCI_SID_CNT); if (plen & OHCI_SID_ERR) { @@ -2062,7 +2070,16 @@ { uint32_t stat, irstat, itstat; + /* + * Try and grab the global lock + * for a moment to keep us from + * firing while we are still + * setting the interrupt condition + * on the bus + */ + FW_GLOCK(&sc->fc); stat = OREAD(sc, FWOHCI_INTSTAT); + FW_GUNLOCK(&sc->fc); if (stat == 0xffffffff) { device_printf(sc->fc.dev, "device physically ejected?\n"); @@ -2091,25 +2108,15 @@ return (FILTER_HANDLED); } -int -fwohci_filt(void *arg) +void +fwohci_intr(void *arg) { struct fwohci_softc *sc = (struct fwohci_softc *)arg; - if (!(sc->intmask & OHCI_INT_EN)) { - /* polling mode */ - return (FILTER_STRAY); - } - return (fwohci_check_stat(sc)); + fwohci_check_stat(sc); } void -fwohci_intr(void *arg) -{ - fwohci_filt(arg); -} - -void fwohci_poll(struct firewire_comm *fc, int quick, int count) { struct fwohci_softc *sc = (struct fwohci_softc *)fc; @@ -2464,6 +2471,7 @@ struct fwohci_softc *sc; uint32_t fun; + FW_GLOCK(fc); device_printf(fc->dev, "Initiate bus reset\n"); sc = (struct fwohci_softc *)fc; @@ -2487,6 +2495,7 @@ fun |= FW_PHY_ISBR | FW_PHY_RHB; fun = fwphy_wrdata(sc, FW_PHY_ISBR_REG, fun); #endif + FW_GUNLOCK(fc); } void Index: fwohcivar.h =================================================================== --- fwohcivar.h (revision 186781) +++ fwohcivar.h (working copy) @@ -37,12 +37,6 @@ #include -#if defined(__DragonFly__) || __FreeBSD_version < 700043 -#define FWOHCI_INTFILT 0 -#else -#define FWOHCI_INTFILT 1 -#endif - typedef struct fwohci_softc { struct firewire_comm fc; bus_space_tag_t bst; @@ -84,7 +78,6 @@ } fwohci_softc_t; void fwohci_intr (void *arg); -int fwohci_filt (void *arg); int fwohci_init (struct fwohci_softc *, device_t); void fwohci_poll (struct firewire_comm *, int, int); void fwohci_reset (struct fwohci_softc *, device_t); --=-bdk1tSakYWl4EHnChn2C--