From owner-freebsd-current@freebsd.org Sat Oct 10 00:54:21 2020 Return-Path: Delivered-To: freebsd-current@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id C34A7432563 for ; Sat, 10 Oct 2020 00:54:21 +0000 (UTC) (envelope-from eric@vangyzen.net) Received: from smtp.vangyzen.net (hotblack.vangyzen.net [IPv6:2607:fc50:1000:7400:216:3eff:fe72:314f]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 4C7RKS39nrz3f2g for ; Sat, 10 Oct 2020 00:54:20 +0000 (UTC) (envelope-from eric@vangyzen.net) Received: from disco.vangyzen.net (unknown [70.97.188.230]) by smtp.vangyzen.net (Postfix) with ESMTPSA id 39A525646C for ; Fri, 9 Oct 2020 19:54:18 -0500 (CDT) Subject: Re: ZFS crash -- zvol_geom_bio_getattr called when volmode=dev To: freebsd-current@freebsd.org References: <1dfaf797-f23b-af97-d2d3-efa0f5bb88d9@vangyzen.net> <95636bbf-2367-f226-f462-d5581581c18f@vangyzen.net> From: Eric van Gyzen Message-ID: <74ba295a-1f67-5109-0a21-9662eac0334b@vangyzen.net> Date: Fri, 9 Oct 2020 19:54:17 -0500 User-Agent: Mozilla/5.0 (X11; FreeBSD amd64; rv:78.0) Gecko/20100101 Thunderbird/78.3.1 MIME-Version: 1.0 In-Reply-To: Content-Type: text/plain; charset=utf-8; format=flowed Content-Language: en-US Content-Transfer-Encoding: 8bit X-Rspamd-Queue-Id: 4C7RKS39nrz3f2g X-Spamd-Bar: -- X-Spamd-Result: default: False [-2.34 / 15.00]; RCVD_TLS_ALL(0.00)[]; RCVD_VIA_SMTP_AUTH(0.00)[]; FREEFALL_USER(0.00)[eric]; FROM_HAS_DN(0.00)[]; TO_MATCH_ENVRCPT_ALL(0.00)[]; MID_RHS_MATCH_FROM(0.00)[]; MIME_GOOD(-0.10)[text/plain]; PREVIOUSLY_DELIVERED(0.00)[freebsd-current@freebsd.org]; TO_DN_NONE(0.00)[]; RCPT_COUNT_ONE(0.00)[1]; NEURAL_HAM_LONG(-0.86)[-0.860]; ARC_NA(0.00)[]; R_SPF_ALLOW(-0.20)[+a]; NEURAL_HAM_SHORT(-0.35)[-0.347]; DMARC_NA(0.00)[vangyzen.net]; NEURAL_HAM_MEDIUM(-0.83)[-0.831]; FROM_EQ_ENVFROM(0.00)[]; R_DKIM_NA(0.00)[]; MIME_TRACE(0.00)[0:+]; ASN(0.00)[asn:36236, ipnet:2607:fc50:1000::/36, country:US]; RCVD_COUNT_TWO(0.00)[2]; MAILMAN_DEST(0.00)[freebsd-current] X-BeenThere: freebsd-current@freebsd.org X-Mailman-Version: 2.1.33 Precedence: list List-Id: Discussions about the use of FreeBSD-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 10 Oct 2020 00:54:21 -0000 On 10/9/20 6:27 PM, Ryan Moeller wrote: > > On 10/9/20 6:22 PM, Alan Somers wrote: >> This sounds like it might be a regression introduced by the OpenZFS >> merge. >> Have you compared vdev_geom.c in OpenZFS vs the old version? >> -Alan > > > I don't think vdev_geom.c is involved, we're taking a wrong path in > zvol_os.c because > it seems the volume is created using the default volmode and later > changed to volmode=dev. Yes, you're on the right track. I tried this several times on a VM and it eventually hit the window: # zfs create -s -V 20G -o primarycache=none -o volmode=dev head_root/testvol zvol_create_minor_impl:1250[1]: Creating ZVOL head_root/testvol... zvol_create_minor_impl:1371[1]: ZVOL head_root/testvol created. Fatal trap 12: page fault while in kernel mode cpuid = 7; apic id = 07 fault virtual address = 0x110 fault code = supervisor read data, page not present instruction pointer = 0x20:0xffffffff82167fca stack pointer = 0x28:0xfffffe000edcdb30 frame pointer = 0x28:0xfffffe000edcdb70 code segment = base 0x0, limit 0xfffff, type 0x1b = DPL 0, pres 1, long 1, def32 0, gran 1 processor eflags = interrupt enabled, resume, IOPL = 0 current process = 13 (g_down) trap number = 12 db> acttrace Tracing command zfskern pid 21 tid 100478 td 0xfffffe00610c9800 (CPU 6) cpustop_handler() at cpustop_handler+0x28/frame 0xfffffe0011880e00 ipi_nmi_handler() at ipi_nmi_handler+0x39/frame 0xfffffe0011880e10 trap() at trap+0x56/frame 0xfffffe0011880f20 nmi_calltrap() at nmi_calltrap+0x8/frame 0xfffffe0011880f20 --- trap 0x13, rip = 0xffffffff80c25fb2, rsp = 0xfffffe006168c820, rbp = 0xfffffe006168c830 --- lock_delay() at lock_delay+0x42/frame 0xfffffe006168c830 _mtx_lock_spin_cookie() at _mtx_lock_spin_cookie+0xc1/frame 0xfffffe006168c8a0 __mtx_lock_spin_flags() at __mtx_lock_spin_flags+0xd5/frame 0xfffffe006168c8e0 cnputs() at cnputs+0x58/frame 0xfffffe006168c910 vprintf() at vprintf+0xcd/frame 0xfffffe006168c9e0 printf() at printf+0x43/frame 0xfffffe006168ca40 zvol_free() at zvol_free+0x53/frame 0xfffffe006168ca80 zvol_task_cb() at zvol_task_cb+0x271/frame 0xfffffe006168cae0 taskq_run() at taskq_run+0x1f/frame 0xfffffe006168cb00 taskqueue_run_locked() at taskqueue_run_locked+0xaa/frame 0xfffffe006168cb80 taskqueue_thread_loop() at taskqueue_thread_loop+0x94/frame 0xfffffe006168cbb0 fork_exit() at fork_exit+0x80/frame 0xfffffe006168cbf0 fork_trampoline() at fork_trampoline+0xe/frame 0xfffffe006168cbf0 --- trap 0, rip = 0, rsp = 0, rbp = 0 --- Tracing command geom pid 13 tid 100049 td 0xfffffe0011862700 (CPU 7) kdb_enter() at kdb_enter+0x37/frame 0xfffffe000edcd7e0 vpanic() at vpanic+0x19e/frame 0xfffffe000edcd830 panic() at panic+0x43/frame 0xfffffe000edcd890 trap_fatal() at trap_fatal+0x387/frame 0xfffffe000edcd8f0 trap_pfault() at trap_pfault+0x97/frame 0xfffffe000edcd950 trap() at trap+0x2ab/frame 0xfffffe000edcda60 calltrap() at calltrap+0x8/frame 0xfffffe000edcda60 --- trap 0xc, rip = 0xffffffff82167fca, rsp = 0xfffffe000edcdb30, rbp = 0xfffffe000edcdb70 --- zvol_geom_bio_start() at zvol_geom_bio_start+0x2a/frame 0xfffffe000edcdb70 g_io_schedule_down() at g_io_schedule_down+0x134/frame 0xfffffe000edcdba0 g_down_procbody() at g_down_procbody+0x5c/frame 0xfffffe000edcdbb0 fork_exit() at fork_exit+0x80/frame 0xfffffe000edcdbf0 fork_trampoline() at fork_trampoline+0xe/frame 0xfffffe000edcdbf0 --- trap 0, rip = 0, rsp = 0, rbp = 0 --- (The other CPUs were idle.) >> On Fri, Oct 9, 2020 at 3:48 PM Eric van Gyzen wrote: >> >>> On 10/9/20 4:39 PM, Eric van Gyzen wrote: >>>> Does this look familiar?  I'm creating a zvol with volmode=dev, but >>>> some >>>> geom code paths were taken.  If this looks new, I'll provide more >>> details. >>> >>> primarycache=none also seems to be a factor.  I can easily repro with: >>> >>> zfs create -s -V 10G -o primarycache=none -o volmode=dev .../testvol > > > I don't think primarycache is a factor, I can easily repro with > primarycache left at the default. > The volmode property is being set asynchronously and losing the race > with the initial creation of > the minors. When volmode is changed the minor is supposed to be > destroyed and then recreated > in the correct mode, but that does not seem to be working correctly. > Setting vfs.zfs.debug=1 you > can see in dmesg the zvol is created once in volmode=geom and then a > second attempt to create > the zvol fails, because zvol_free did not occur. > > zvol_create_minor_impl:1250[1]: Creating ZVOL p0/testvol... > name=p0/testvol error=0 volmode=1 > zvol_create_minor_impl:1372[1]: ZVOL p0/testvol created. > zvol_create_minor_impl:1250[1]: Creating ZVOL p0/testvol... > > So something is preventing zv_free from being called by > zvol_remove_minors_impl. > > -Ryan > > >>> >>>> 13.0-CURRENT r366500+84ccaf49083c-c272054 GENERIC >>>> >>>> #8  >>>> #9  zvol_geom_bio_getattr (bp=0xfffff80376132900) >>>>       at >>>> /usr/src/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c:545 >>>> #10 zvol_geom_bio_start (bp=0xfffff80376132900) >>>>       at >>>> /usr/src/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c:519 >>>> #11 0xffffffff80b1c684 in g_io_schedule_down (tp=) >>>>       at /usr/src/sys/geom/geom_io.c:848 >>>> #12 0xffffffff80b1cfcc in g_down_procbody (arg=) >>>>       at /usr/src/sys/geom/geom_kern.c:111 >>>> >>>> (kgdb) f 9 >>>> #9  zvol_geom_bio_getattr (bp=0xfffff80376132900) >>>>       at >>>> /usr/src/sys/contrib/openzfs/module/os/freebsd/zfs/zvol_os.c:545 >>>> 545        spa_t *spa = dmu_objset_spa(zv->zv_objset); >>>> >>>> (kgdb) l >>>> 540        zvol_state_t *zv; >>>> 541 >>>> 542        zv = bp->bio_to->private; >>>> 543        ASSERT(zv != NULL); >>>> 544 >>>> 545        spa_t *spa = dmu_objset_spa(zv->zv_objset); >>>> 546        uint64_t refd, avail, usedobjs, availobjs; >>>> 547 >>>> 548        if (g_handleattr_int(bp, "GEOM::candelete", 1)) >>>> 549            return (0); >>>> >>>> (kgdb) p zv >>>> $1 = (zvol_state_t *) 0x0 >>>> >>>> (kgdb) p *bp >>>> $3 = { >>>>     bio_cmd = 4, >>>>     bio_flags = 0, >>>>     bio_cflags = 0, >>>>     bio_pflags = 0, >>>>     bio_dev = 0x0, >>>>     bio_disk = 0x0, >>>>     bio_offset = 0, >>>>     bio_bcount = 0, >>>>     bio_data = 0xfffff801fa687c00 "", >>>>     bio_ma = 0x0, >>>>     bio_ma_offset = 0, >>>>     bio_ma_n = 0, >>>>     bio_error = 0, >>>>     bio_resid = 0, >>>>     bio_done = 0x0, >>>>     bio_driver1 = 0x0, >>>>     bio_driver2 = 0x0, >>>>     bio_caller1 = 0x0, >>>>     bio_caller2 = 0x0, >>>>     bio_queue = { >>>>       tqe_next = 0xffffffffffffffff, >>>>       tqe_prev = 0xffffffffffffffff >>>>     }, >>>>     bio_attribute = 0xffffffff81223c03 "GEOM::physpath", >>>>     bio_zone = { >>>>       zone_cmd = 0 '\000', >>>>       zone_params = { >>>>         disk_params = { >>>>           zone_mode = 0, >>>>           flags = 0, >>>>           optimal_seq_zones = 0, >>>>           optimal_nonseq_zones = 0, >>>>           max_seq_zones = 0 >>>>         }, >>>>         rwp = { >>>>           id = 0, >>>>           flags = 0 '\000' >>>>         }, >>>>         report = { >>>>           starting_id = 0, >>>>           rep_options = 0 '\000', >>>>           header = { >>>>             same = 0 '\000', >>>>             maximum_lba = 0, >>>>             reserved = '\000' >>>>           }, >>>>           entries_allocated = 0, >>>>           entries_filled = 0, >>>>           entries_available = 0, >>>>           entries = 0x0 >>>>         } >>>>       } >>>>     }, >>>>     bio_from = 0xfffff80006b92880, >>>>     bio_to = 0xfffff80006972500, >>>>     bio_length = 1024, >>>>     bio_completed = 0, >>>>     bio_children = 0, >>>>     bio_inbed = 0, >>>>     bio_parent = 0x0, >>>>     bio_t0 = { >>>>       sec = 50, >>>>       frac = 10248368299661698441 >>>>     }, >>>>     bio_task = 0x0, >>>>     bio_task_arg = 0x0, >>>>     bio_spare1 = 0x0, >>>>     bio_spare2 = 0x0, >>>>     bio_track_bp = 0x0, >>>>     bio_pblkno = 0 >>>> } >>>> >>>> (kgdb) p *bp->bio_to >>>> $4 = { >>>>     name = 0xfffff80006972598 "zvol/disco_fast/vm/onefs1-1/disk7", >>>>     provider = { >>>>       le_next = 0x0, >>>>       le_prev = 0xfffff80006972428 >>>>     }, >>>>     geom = 0xfffff80006972400, >>>>     consumers = { >>>>       lh_first = 0xfffff80006b92880 >>>>     }, >>>>     acr = 1, >>>>     acw = 0, >>>>     ace = 0, >>>>     error = 0, >>>>     orphan = { >>>>       tqe_next = 0x0, >>>>       tqe_prev = 0x0 >>>>     }, >>>>     mediasize = 5368709120, >>>>     sectorsize = 512, >>>>     stripesize = 8192, >>>>     stripeoffset = 0, >>>>     stat = 0xfffff80006d3d120, >>>>     spare1 = 0, >>>>     spare2 = 0, >>>>     flags = 48, >>>>     aliases = { >>>>       lh_first = 0x0 >>>>     }, >>>>     private = 0x0, >>>>     index = 0 >>>> }