Date: Wed, 13 Apr 2005 02:59:06 GMT From: Eric Anholt <anholt@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 75038 for review Message-ID: <200504130259.j3D2x6X1003441@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=75038 Change 75038 by anholt@anholt_leguin on 2005/04/13 02:58:53 IF DRM CVS Affected files ... .. //depot/projects/drm-merge-vendor/sys/dev/drm/i915_drv.h#2 edit .. //depot/projects/drm-merge-vendor/sys/dev/drm/mach64_dma.c#2 edit .. //depot/projects/drm-merge-vendor/sys/dev/drm/radeon_drm.h#3 edit .. //depot/projects/drm-merge-vendor/sys/dev/drm/radeon_drv.h#3 edit .. //depot/projects/drm-merge-vendor/sys/dev/drm/radeon_state.c#3 edit .. //depot/projects/drm-merge-vendor/sys/dev/drm/savage_bci.c#2 edit .. //depot/projects/drm-merge-vendor/sys/dev/drm/savage_drv.h#2 edit .. //depot/projects/drm-merge-vendor/sys/dev/drm/savage_state.c#2 edit .. //depot/projects/drm-merge-vendor/sys/dev/drm/sis_mm.c#3 edit Differences ... ==== //depot/projects/drm-merge-vendor/sys/dev/drm/i915_drv.h#2 (text+ko) ==== @@ -72,6 +72,7 @@ int tex_lru_log_granularity; int allow_batchbuffer; struct mem_block *agp_heap; + unsigned int sr01, adpa, ppcr, dvob, dvoc, lvds; } drm_i915_private_t; /* i915_dma.c */ @@ -96,10 +97,10 @@ extern void i915_mem_release(drm_device_t * dev, DRMFILE filp, struct mem_block *heap); -#define I915_READ(reg) DRM_READ32(dev_priv->mmio_map, reg) -#define I915_WRITE(reg,val) DRM_WRITE32(dev_priv->mmio_map, reg, val) -#define I915_READ16(reg) DRM_READ16(dev_priv->mmio_map, reg) -#define I915_WRITE16(reg,val) DRM_WRITE16(dev_priv->mmio_map, reg, val) +#define I915_READ(reg) DRM_READ32(dev_priv->mmio_map, (reg)) +#define I915_WRITE(reg,val) DRM_WRITE32(dev_priv->mmio_map, (reg), (val)) +#define I915_READ16(reg) DRM_READ16(dev_priv->mmio_map, (reg)) +#define I915_WRITE16(reg,val) DRM_WRITE16(dev_priv->mmio_map, (reg), (val)) #define I915_VERBOSE 0 @@ -163,6 +164,13 @@ #define PPCR 0x61204 #define PPCR_ON (1<<0) +#define DVOB 0x61140 +#define DVOB_ON (1<<31) +#define DVOC 0x61160 +#define DVOC_ON (1<<31) +#define LVDS 0x61180 +#define LVDS_ON (1<<31) + #define ADPA 0x61100 #define ADPA_DPMS_MASK (~(3<<10)) #define ADPA_DPMS_ON (0<<10) ==== //depot/projects/drm-merge-vendor/sys/dev/drm/mach64_dma.c#2 (text+ko) ==== @@ -497,7 +497,7 @@ (i >= ring->head - MACH64_DUMP_CONTEXT * 4 && i <= ring->head + MACH64_DUMP_CONTEXT * 4)) { DRM_INFO(" 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x%s%s\n", - ring->start_addr + i * sizeof(u32), + (u32)(ring->start_addr + i * sizeof(u32)), le32_to_cpu(((u32 *) ring->start)[i + 0]), le32_to_cpu(((u32 *) ring->start)[i + 1]), le32_to_cpu(((u32 *) ring->start)[i + 2]), @@ -913,8 +913,8 @@ } memset(dev_priv->ring.start, 0, dev_priv->ring.size); - DRM_INFO("descriptor ring: cpu addr 0x%08x, bus addr: 0x%08x\n", - (u32) dev_priv->ring.start, dev_priv->ring.start_addr); + DRM_INFO("descriptor ring: cpu addr %p, bus addr: 0x%08x\n", + dev_priv->ring.start, dev_priv->ring.start_addr); ret = 0; if (dev_priv->driver_mode != MACH64_MODE_MMIO) { ==== //depot/projects/drm-merge-vendor/sys/dev/drm/radeon_drm.h#3 (text+ko) ==== @@ -151,7 +151,8 @@ #define RADEON_EMIT_PP_CUBIC_OFFSETS_T1 81 #define RADEON_EMIT_PP_CUBIC_FACES_2 82 #define RADEON_EMIT_PP_CUBIC_OFFSETS_T2 83 -#define RADEON_MAX_STATE_PACKETS 84 +#define R200_EMIT_PP_TRI_PERF_CNTL 84 +#define RADEON_MAX_STATE_PACKETS 85 /* Commands understood by cmd_buffer ioctl. More can be added but * obviously these can't be removed or changed: ==== //depot/projects/drm-merge-vendor/sys/dev/drm/radeon_drv.h#3 (text+ko) ==== @@ -42,7 +42,7 @@ #define DRIVER_NAME "radeon" #define DRIVER_DESC "ATI Radeon" -#define DRIVER_DATE "20050208" +#define DRIVER_DATE "20050311" /* Interface history: * @@ -84,10 +84,12 @@ * - Add R100/R200 surface allocation/free support * 1.15- Add support for texture micro tiling * - Add support for r100 cube maps + * 1.16- Add R200_EMIT_PP_TRI_PERF_CNTL packet to support brilinear + * texture filtering on r200 */ #define DRIVER_MAJOR 1 -#define DRIVER_MINOR 15 +#define DRIVER_MINOR 16 #define DRIVER_PATCHLEVEL 0 enum radeon_family { @@ -657,6 +659,8 @@ # define RADEON_3D_DRAW_IMMD 0x00002900 # define RADEON_3D_DRAW_INDX 0x00002A00 # define RADEON_3D_LOAD_VBPNTR 0x00002F00 +# define RADEON_MPEG_IDCT_MACROBLOCK 0x00003000 +# define RADEON_MPEG_IDCT_MACROBLOCK_REV 0x00003100 # define RADEON_3D_CLEAR_ZMASK 0x00003200 # define RADEON_3D_CLEAR_HIZ 0x00003700 # define RADEON_CNTL_HOSTDATA_BLT 0x00009400 @@ -825,6 +829,8 @@ #define R200_SE_TCL_POINT_SPRITE_CNTL 0x22c4 +#define R200_PP_TRI_PERF 0x2cf8 + /* Constants */ #define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ ==== //depot/projects/drm-merge-vendor/sys/dev/drm/radeon_state.c#3 (text+ko) ==== @@ -209,6 +209,7 @@ case RADEON_EMIT_PP_CUBIC_FACES_0: case RADEON_EMIT_PP_CUBIC_FACES_1: case RADEON_EMIT_PP_CUBIC_FACES_2: + case R200_EMIT_PP_TRI_PERF_CNTL: /* These packets don't contain memory offsets */ break; @@ -581,7 +582,8 @@ RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"}, { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"}, { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"}, { - RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"}, + RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"}, { + R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"}, }; /* ================================================================ ==== //depot/projects/drm-merge-vendor/sys/dev/drm/savage_bci.c#2 (text+ko) ==== @@ -47,6 +47,7 @@ #endif for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) { + DRM_MEMORYBARRIER(); status = dev_priv->status_ptr[0]; if ((status & mask) < threshold) return 0; @@ -120,6 +121,7 @@ int i; for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) { + DRM_MEMORYBARRIER(); status = dev_priv->status_ptr[1]; if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff || (status & 0xffff) == 0) @@ -246,6 +248,8 @@ else event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; wrap = dev_priv->event_wrap; + if (event > dev_priv->event_counter) + wrap--; /* hardware hasn't passed the last wrap yet */ DRM_DEBUG(" tail=0x%04x %d\n", tail->age.event, tail->age.wrap); DRM_DEBUG(" head=0x%04x %d\n", event, wrap); @@ -284,6 +288,254 @@ } /* + * Command DMA + */ +static int savage_dma_init(drm_savage_private_t *dev_priv) +{ + unsigned int i; + + dev_priv->nr_dma_pages = dev_priv->cmd_dma->size / + (SAVAGE_DMA_PAGE_SIZE*4); + dev_priv->dma_pages = drm_alloc(sizeof(drm_savage_dma_page_t) * + dev_priv->nr_dma_pages, + DRM_MEM_DRIVER); + if (dev_priv->dma_pages == NULL) + return DRM_ERR(ENOMEM); + + for (i = 0; i < dev_priv->nr_dma_pages; ++i) { + SET_AGE(&dev_priv->dma_pages[i].age, 0, 0); + dev_priv->dma_pages[i].used = 0; + dev_priv->dma_pages[i].flushed = 0; + } + SET_AGE(&dev_priv->last_dma_age, 0, 0); + + dev_priv->first_dma_page = 0; + dev_priv->current_dma_page = 0; + + return 0; +} + +void savage_dma_reset(drm_savage_private_t *dev_priv) +{ + uint16_t event; + unsigned int wrap, i; + event = savage_bci_emit_event(dev_priv, 0); + wrap = dev_priv->event_wrap; + for (i = 0; i < dev_priv->nr_dma_pages; ++i) { + SET_AGE(&dev_priv->dma_pages[i].age, event, wrap); + dev_priv->dma_pages[i].used = 0; + dev_priv->dma_pages[i].flushed = 0; + } + SET_AGE(&dev_priv->last_dma_age, event, wrap); + dev_priv->first_dma_page = dev_priv->current_dma_page = 0; +} + +void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page) +{ + uint16_t event; + unsigned int wrap; + + /* Faked DMA buffer pages don't age. */ + if (dev_priv->cmd_dma == &dev_priv->fake_dma) + return; + + UPDATE_EVENT_COUNTER(); + if (dev_priv->status_ptr) + event = dev_priv->status_ptr[1] & 0xffff; + else + event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; + wrap = dev_priv->event_wrap; + if (event > dev_priv->event_counter) + wrap--; /* hardware hasn't passed the last wrap yet */ + + if (dev_priv->dma_pages[page].age.wrap > wrap || + (dev_priv->dma_pages[page].age.wrap == wrap && + dev_priv->dma_pages[page].age.event > event)) { + if (dev_priv->wait_evnt(dev_priv, + dev_priv->dma_pages[page].age.event) + < 0) + DRM_ERROR("wait_evnt failed!\n"); + } +} + +uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, unsigned int n) +{ + unsigned int cur = dev_priv->current_dma_page; + unsigned int rest = SAVAGE_DMA_PAGE_SIZE - + dev_priv->dma_pages[cur].used; + unsigned int nr_pages = (n - rest + SAVAGE_DMA_PAGE_SIZE-1) / + SAVAGE_DMA_PAGE_SIZE; + uint32_t *dma_ptr; + unsigned int i; + + DRM_DEBUG("cur=%u, cur->used=%u, n=%u, rest=%u, nr_pages=%u\n", + cur, dev_priv->dma_pages[cur].used, n, rest, nr_pages); + + if (cur + nr_pages < dev_priv->nr_dma_pages) { + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + cur*SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used; + if (n < rest) + rest = n; + dev_priv->dma_pages[cur].used += rest; + n -= rest; + cur++; + } else { + dev_priv->dma_flush(dev_priv); + nr_pages = (n + SAVAGE_DMA_PAGE_SIZE-1) / SAVAGE_DMA_PAGE_SIZE; + for (i = cur; i < dev_priv->nr_dma_pages; ++i) { + dev_priv->dma_pages[i].age = dev_priv->last_dma_age; + dev_priv->dma_pages[i].used = 0; + dev_priv->dma_pages[i].flushed = 0; + } + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle; + dev_priv->first_dma_page = cur = 0; + } + for (i = cur; nr_pages > 0; ++i, --nr_pages) { +#if SAVAGE_DMA_DEBUG + if (dev_priv->dma_pages[i].used) { + DRM_ERROR("unflushed page %u: used=%u\n", + i, dev_priv->dma_pages[i].used); + } +#endif + if (n > SAVAGE_DMA_PAGE_SIZE) + dev_priv->dma_pages[i].used = SAVAGE_DMA_PAGE_SIZE; + else + dev_priv->dma_pages[i].used = n; + n -= SAVAGE_DMA_PAGE_SIZE; + } + dev_priv->current_dma_page = --i; + + DRM_DEBUG("cur=%u, cur->used=%u, n=%u\n", + i, dev_priv->dma_pages[i].used, n); + + savage_dma_wait(dev_priv, dev_priv->current_dma_page); + + return dma_ptr; +} + +static void savage_dma_flush(drm_savage_private_t *dev_priv) +{ + BCI_LOCALS; + unsigned int first = dev_priv->first_dma_page; + unsigned int cur = dev_priv->current_dma_page; + uint16_t event; + unsigned int wrap, pad, align, len, i; + unsigned long phys_addr; + + if (first == cur && + dev_priv->dma_pages[cur].used == dev_priv->dma_pages[cur].flushed) + return; + + /* pad length to multiples of 2 entries + * align start of next DMA block to multiles of 8 entries */ + pad = -dev_priv->dma_pages[cur].used & 1; + align = -(dev_priv->dma_pages[cur].used + pad) & 7; + + DRM_DEBUG("first=%u, cur=%u, first->flushed=%u, cur->used=%u, " + "pad=%u, align=%u\n", + first, cur, dev_priv->dma_pages[first].flushed, + dev_priv->dma_pages[cur].used, pad, align); + + /* pad with noops */ + if (pad) { + uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + cur * SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used; + dev_priv->dma_pages[cur].used += pad; + while(pad != 0) { + *dma_ptr++ = BCI_CMD_WAIT; + pad--; + } + } + + DRM_MEMORYBARRIER(); + + /* do flush ... */ + phys_addr = dev_priv->cmd_dma->offset + + (first * SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[first].flushed) * 4; + len = (cur - first) * SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used - + dev_priv->dma_pages[first].flushed; + + DRM_DEBUG("phys_addr=%lx, len=%u\n", + phys_addr | dev_priv->dma_type, len); + + BEGIN_BCI(3); + BCI_SET_REGISTERS(SAVAGE_DMABUFADDR, 1); + BCI_WRITE(phys_addr | dev_priv->dma_type); + BCI_DMA(len); + + /* fix alignment of the start of the next block */ + dev_priv->dma_pages[cur].used += align; + + /* age DMA pages */ + event = savage_bci_emit_event(dev_priv, 0); + wrap = dev_priv->event_wrap; + for (i = first; i < cur; ++i) { + SET_AGE(&dev_priv->dma_pages[i].age, event, wrap); + dev_priv->dma_pages[i].used = 0; + dev_priv->dma_pages[i].flushed = 0; + } + /* age the current page only when it's full */ + if (dev_priv->dma_pages[cur].used == SAVAGE_DMA_PAGE_SIZE) { + SET_AGE(&dev_priv->dma_pages[cur].age, event, wrap); + dev_priv->dma_pages[cur].used = 0; + dev_priv->dma_pages[cur].flushed = 0; + /* advance to next page */ + cur++; + if (cur == dev_priv->nr_dma_pages) + cur = 0; + dev_priv->first_dma_page = dev_priv->current_dma_page = cur; + } else { + dev_priv->first_dma_page = cur; + dev_priv->dma_pages[cur].flushed = dev_priv->dma_pages[i].used; + } + SET_AGE(&dev_priv->last_dma_age, event, wrap); + + DRM_DEBUG("first=cur=%u, cur->used=%u, cur->flushed=%u\n", cur, + dev_priv->dma_pages[cur].used, + dev_priv->dma_pages[cur].flushed); +} + +static void savage_fake_dma_flush(drm_savage_private_t *dev_priv) +{ + BCI_LOCALS; + unsigned int i, j; + if (dev_priv->first_dma_page == dev_priv->current_dma_page && + dev_priv->dma_pages[dev_priv->current_dma_page].used == 0) + return; + + DRM_DEBUG("first=%u, cur=%u, cur->used=%u\n", + dev_priv->first_dma_page, dev_priv->current_dma_page, + dev_priv->dma_pages[dev_priv->current_dma_page].used); + + for (i = dev_priv->first_dma_page; + i <= dev_priv->current_dma_page && dev_priv->dma_pages[i].used; + ++i) { + uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + i * SAVAGE_DMA_PAGE_SIZE; +#if SAVAGE_DMA_DEBUG + /* Sanity check: all pages except the last one must be full. */ + if (i < dev_priv->current_dma_page && + dev_priv->dma_pages[i].used != SAVAGE_DMA_PAGE_SIZE) { + DRM_ERROR("partial DMA page %u: used=%u", + i, dev_priv->dma_pages[i].used); + } +#endif + BEGIN_BCI(dev_priv->dma_pages[i].used); + for (j = 0; j < dev_priv->dma_pages[i].used; ++j) { + BCI_WRITE(dma_ptr[j]); + } + dev_priv->dma_pages[i].used = 0; + } + + /* reset to first page */ + dev_priv->first_dma_page = dev_priv->current_dma_page = 0; +} + +/* * Initalize permanent mappings. On Savage4 and SavageIX the alignment * and size of the aperture is not suitable for automatic MTRR setup * in drm_initmap. Therefore we do it manually before the maps are @@ -462,14 +714,20 @@ } else { dev_priv->status = NULL; } - if (dev_priv->dma_type == SAVAGE_DMA_AGP) { + if (dev_priv->dma_type == SAVAGE_DMA_AGP && init->buffers_offset) { dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); if (!dev->agp_buffer_map) { - DRM_ERROR("could not find dma buffer region!\n"); + DRM_ERROR("could not find DMA buffer region!\n"); savage_do_cleanup_bci(dev); return DRM_ERR(EINVAL); } + drm_core_ioremap(dev->agp_buffer_map, dev); + if (!dev->agp_buffer_map) { + DRM_ERROR("failed to ioremap DMA buffer region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } } if (init->agp_textures_offset) { dev_priv->agp_textures = @@ -482,25 +740,65 @@ } else { dev_priv->agp_textures = NULL; } - if (0 && !S3_SAVAGE3D_SERIES(dev_priv->chipset)) { - /* command DMA not implemented yet */ + + if (init->cmd_dma_offset) { + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + DRM_ERROR("command DMA not supported on " + "Savage3D/MX/IX.\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + if (dev->dma && dev->dma->buflist) { + DRM_ERROR("command and vertex DMA not supported " + "at the same time.\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } dev_priv->cmd_dma = drm_core_findmap(dev, init->cmd_dma_offset); if (!dev_priv->cmd_dma) { DRM_ERROR("could not find command DMA region!\n"); savage_do_cleanup_bci(dev); return DRM_ERR(EINVAL); } + if (dev_priv->dma_type == SAVAGE_DMA_AGP) { + if (dev_priv->cmd_dma->type != _DRM_AGP) { + DRM_ERROR("AGP command DMA region is not a " + "_DRM_AGP map!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + drm_core_ioremap(dev_priv->cmd_dma, dev); + if (!dev_priv->cmd_dma->handle) { + DRM_ERROR("failed to ioremap command " + "DMA region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } + } else if (dev_priv->cmd_dma->type != _DRM_CONSISTENT) { + DRM_ERROR("PCI command DMA region is not a " + "_DRM_CONSISTENT map!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } } else { dev_priv->cmd_dma = NULL; } - if (dev_priv->cmd_dma && dev_priv->dma_type == SAVAGE_DMA_AGP) { - drm_core_ioremap(dev_priv->cmd_dma, dev); - if (!dev_priv->cmd_dma->handle) { - DRM_ERROR("failed to ioremap command DMA region!\n"); + dev_priv->dma_flush = savage_dma_flush; + if (!dev_priv->cmd_dma) { + DRM_DEBUG("falling back to faked command DMA.\n"); + dev_priv->fake_dma.offset = 0; + dev_priv->fake_dma.size = SAVAGE_FAKE_DMA_SIZE; + dev_priv->fake_dma.type = _DRM_SHM; + dev_priv->fake_dma.handle = drm_alloc(SAVAGE_FAKE_DMA_SIZE, + DRM_MEM_DRIVER); + if (!dev_priv->fake_dma.handle) { + DRM_ERROR("could not allocate faked DMA buffer!\n"); savage_do_cleanup_bci(dev); return DRM_ERR(ENOMEM); } + dev_priv->cmd_dma = &dev_priv->fake_dma; + dev_priv->dma_flush = savage_fake_dma_flush; } dev_priv->sarea_priv = @@ -576,6 +874,12 @@ return DRM_ERR(ENOMEM); } + if (savage_dma_init(dev_priv) < 0) { + DRM_ERROR("could not initialize command DMA\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } + return 0; } @@ -583,9 +887,29 @@ { drm_savage_private_t *dev_priv = dev->dev_private; - if (dev_priv->cmd_dma && dev_priv->dma_type == SAVAGE_DMA_AGP) + if (dev_priv->cmd_dma == &dev_priv->fake_dma) { + if (dev_priv->fake_dma.handle) + drm_free(dev_priv->fake_dma.handle, + SAVAGE_FAKE_DMA_SIZE, DRM_MEM_DRIVER); + } else if (dev_priv->cmd_dma && dev_priv->cmd_dma->handle && + dev_priv->cmd_dma->type == _DRM_AGP && + dev_priv->dma_type == SAVAGE_DMA_AGP) drm_core_ioremapfree(dev_priv->cmd_dma, dev); + if (dev_priv->dma_type == SAVAGE_DMA_AGP && + dev->agp_buffer_map && dev->agp_buffer_map->handle) { + drm_core_ioremapfree(dev->agp_buffer_map, dev); + /* make sure the next instance (which may be running + * in PCI mode) doesn't try to use an old + * agp_buffer_map. */ + dev->agp_buffer_map = NULL; + } + + if (dev_priv->dma_pages) + drm_free(dev_priv->dma_pages, + sizeof(drm_savage_dma_page_t)*dev_priv->nr_dma_pages, + DRM_MEM_DRIVER); + return 0; } @@ -623,6 +947,7 @@ sizeof(event)); event.count = savage_bci_emit_event(dev_priv, event.flags); + event.count |= dev_priv->event_wrap << 16; DRM_COPY_TO_USER_IOCTL(&((drm_savage_event_emit_t __user *)data)->count, event.count, sizeof(event.count)); return 0; @@ -633,16 +958,34 @@ DRM_DEVICE; drm_savage_private_t *dev_priv = dev->dev_private; drm_savage_event_wait_t event; + unsigned int event_e, hw_e; + unsigned int event_w, hw_w; DRM_DEBUG("\n"); DRM_COPY_FROM_USER_IOCTL(event, (drm_savage_event_wait_t __user *)data, sizeof(event)); - if (event.count > 0xffff) - return DRM_ERR(EINVAL); + UPDATE_EVENT_COUNTER(); + if (dev_priv->status_ptr) + hw_e = dev_priv->status_ptr[1] & 0xffff; + else + hw_e = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; + hw_w = dev_priv->event_wrap; + if (hw_e > dev_priv->event_counter) + hw_w--; /* hardware hasn't passed the last wrap yet */ + + event_e = event.count & 0xffff; + event_w = event.count >> 16; - return dev_priv->wait_evnt(dev_priv, event.count); + /* Don't need to wait if + * - event counter wrapped since the event was emitted or + * - the hardware has advanced up to or over the event to wait for. + */ + if (event_w < hw_w || (event_w == hw_w && event_e <= hw_e) ) + return 0; + else + return dev_priv->wait_evnt(dev_priv, event_e); } /* ==== //depot/projects/drm-merge-vendor/sys/dev/drm/savage_drv.h#2 (text+ko) ==== @@ -30,11 +30,11 @@ #define DRIVER_NAME "savage" #define DRIVER_DESC "Savage3D/MX/IX, Savage4, SuperSavage, Twister, ProSavage[DDR]" -#define DRIVER_DATE "20050120" +#define DRIVER_DATE "20050313" #define DRIVER_MAJOR 2 -#define DRIVER_MINOR 2 -#define DRIVER_PATCHLEVEL 0 +#define DRIVER_MINOR 4 +#define DRIVER_PATCHLEVEL 1 /* Interface history: * * 1.x The DRM driver from the VIA/S3 code drop, basically a dummy @@ -42,6 +42,11 @@ * 2.1 Scissors registers managed by the DRM, 3D operations clipped by * cliprects of the cmdbuf ioctl * 2.2 Implemented SAVAGE_CMD_DMA_IDX and SAVAGE_CMD_VB_IDX + * 2.3 Event counters used by BCI_EVENT_EMIT/WAIT ioctls are now 32 bits + * wide and thus very long lived (unlikely to ever wrap). The size + * in the struct was 32 bits before, but only 16 bits were used + * 2.4 Implemented command DMA. Now drm_savage_init_t.cmd_dma_offset is + * actually used */ typedef struct drm_savage_age { @@ -56,6 +61,16 @@ drm_buf_t *buf; } drm_savage_buf_priv_t; +typedef struct drm_savage_dma_page { + drm_savage_age_t age; + unsigned int used, flushed; +} drm_savage_dma_page_t; +#define SAVAGE_DMA_PAGE_SIZE 1024 /* in dwords */ +/* Fake DMA buffer size in bytes. 4 pages. Allows a maximum command + * size of 16kbytes or 4k entries. Minimum requirement would be + * 10kbytes for 255 40-byte vertices in one drawing command. */ +#define SAVAGE_FAKE_DMA_SIZE (SAVAGE_DMA_PAGE_SIZE*4*4) + /* interesting bits of hardware state that are saved in dev_priv */ typedef union { struct drm_savage_common_state { @@ -140,6 +155,7 @@ drm_local_map_t *status; drm_local_map_t *agp_textures; drm_local_map_t *cmd_dma; + drm_local_map_t fake_dma; struct { int handle; @@ -152,6 +168,11 @@ uint16_t event_counter; unsigned int event_wrap; + /* Savage4 command DMA */ + drm_savage_dma_page_t *dma_pages; + unsigned int nr_dma_pages, first_dma_page, current_dma_page; + drm_savage_age_t last_dma_age; + /* saved hw state for global/local check on S3D */ uint32_t hw_draw_ctrl, hw_zbuf_ctrl; /* and for scissors (global, so don't emit if not changed) */ @@ -169,6 +190,7 @@ * Avoid unwanted macro expansion. */ void (*emit_clip_rect)(struct drm_savage_private *dev_priv, drm_clip_rect_t *pbox); + void (*dma_flush)(struct drm_savage_private *dev_priv); } drm_savage_private_t; /* ioctls */ @@ -182,6 +204,10 @@ extern uint16_t savage_bci_emit_event(drm_savage_private_t *dev_priv, unsigned int flags); extern void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf); +extern void savage_dma_reset(drm_savage_private_t *dev_priv); +extern void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page); +extern uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, + unsigned int n); extern int savage_preinit(drm_device_t *dev, unsigned long chipset); extern int savage_postcleanup(drm_device_t *dev); extern int savage_do_cleanup_bci(drm_device_t *dev); @@ -287,6 +313,7 @@ /* common stuff */ #define SAVAGE_VERTBUFADDR 0x3e #define SAVAGE_BITPLANEWTMASK 0xd7 +#define SAVAGE_DMABUFADDR 0x51 /* texture enable bits (needed for tex addr checking) */ #define SAVAGE_TEXCTRL_TEXEN_MASK 0x00010000 /* S3D */ @@ -405,6 +432,8 @@ #define BCI_CMD_DRAW_NO_V1 0x00000080 #define BCI_CMD_DRAW_NO_UV1 0x000000c0 +#define BCI_CMD_DMA 0xa8000000 + #define BCI_W_H(w, h) ((((h) << 16) | (w)) & 0x0FFF0FFF) #define BCI_X_Y(x, y) ((((y) << 16) | (x)) & 0x0FFF0FFF) #define BCI_X_W(x, y) ((((w) << 16) | (x)) & 0x0FFF0FFF) @@ -428,10 +457,17 @@ BCI_WRITE(BCI_CMD_SET_REGISTER | \ ((uint32_t)(n) & 0xff) << 16 | \ ((uint32_t)(first) & 0xffff)) +#define DMA_SET_REGISTERS( first, n ) \ + DMA_WRITE(BCI_CMD_SET_REGISTER | \ + ((uint32_t)(n) & 0xff) << 16 | \ + ((uint32_t)(first) & 0xffff)) #define BCI_DRAW_PRIMITIVE(n, type, skip) \ BCI_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \ ((n) << 16)) +#define DMA_DRAW_PRIMITIVE(n, type, skip) \ + DMA_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \ + ((n) << 16)) #define BCI_DRAW_INDICES_S3D(n, type, i0) \ BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \ @@ -441,6 +477,9 @@ BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \ (skip) | ((n) << 16)) +#define BCI_DMA(n) \ + BCI_WRITE(BCI_CMD_DMA | (((n) >> 1) - 1)) + /* * access to MMIO */ @@ -470,6 +509,54 @@ } \ } while(0) +/* + * command DMA support + */ +#define SAVAGE_DMA_DEBUG 1 + +#define DMA_LOCALS uint32_t *dma_ptr; + +#define BEGIN_DMA( n ) do { \ + unsigned int cur = dev_priv->current_dma_page; \ + unsigned int rest = SAVAGE_DMA_PAGE_SIZE - \ + dev_priv->dma_pages[cur].used; \ + if ((n) > rest) { \ + dma_ptr = savage_dma_alloc(dev_priv, (n)); \ + } else { /* fast path for small allocations */ \ + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + \ + cur * SAVAGE_DMA_PAGE_SIZE + \ + dev_priv->dma_pages[cur].used; \ + if (dev_priv->dma_pages[cur].used == 0) \ + savage_dma_wait(dev_priv, cur); \ + dev_priv->dma_pages[cur].used += (n); \ + } \ +} while(0) + +#define DMA_WRITE( val ) *dma_ptr++ = (uint32_t)(val) + +#define DMA_COPY_FROM_USER(src,n) do { \ + DRM_COPY_FROM_USER_UNCHECKED(dma_ptr, (src), (n)*4); \ + dma_ptr += n; \ +} while(0) + +#if SAVAGE_DMA_DEBUG +#define DMA_COMMIT() do { \ + unsigned int cur = dev_priv->current_dma_page; \ + uint32_t *expected = (uint32_t *)dev_priv->cmd_dma->handle + \ + cur * SAVAGE_DMA_PAGE_SIZE + \ + dev_priv->dma_pages[cur].used; \ + if (dma_ptr != expected) { \ + DRM_ERROR("DMA allocation and use don't match: " \ + "%p != %p\n", expected, dma_ptr); \ + savage_dma_reset(dev_priv); \ + } \ +} while(0) +#else +#define DMA_COMMIT() do {/* nothing */} while(0) +#endif + +#define DMA_FLUSH() dev_priv->dma_flush(dev_priv) + /* Buffer aging via event tag */ @@ -489,9 +576,7 @@ (age)->wrap = w; \ } while(0) -#define TEST_AGE( age, e, w ) \ - ( (age)->wrap+1 < (w) || \ - ( (age)->wrap+1 == (w) && (e) <= dev_priv->event_counter ) || \ - (age)->event <= (e) ) +#define TEST_AGE( age, e, w ) \ + ( (age)->wrap < (w) || ( (age)->wrap == (w) && (age)->event <= (e) ) ) #endif /* __SAVAGE_DRV_H__ */ ==== //depot/projects/drm-merge-vendor/sys/dev/drm/savage_state.c#2 (text+ko) ==== @@ -39,15 +39,16 @@ ((((uint32_t)pbox->y2-1) << 16) & 0x07ff0000); if (scstart != dev_priv->state.s3d.scstart || scend != dev_priv->state.s3d.scend) { - BCI_LOCALS; - BEGIN_BCI(4); - BCI_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); - BCI_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); - BCI_WRITE(scstart); - BCI_WRITE(scend); + DMA_LOCALS; + BEGIN_DMA(4); + DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); + DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); + DMA_WRITE(scstart); + DMA_WRITE(scend); dev_priv->state.s3d.scstart = scstart; dev_priv->state.s3d.scend = scend; dev_priv->waiting = 1; + DMA_COMMIT(); } } @@ -64,15 +65,16 @@ ((((uint32_t)pbox->y2-1) << 12) & 0x00fff000); if (drawctrl0 != dev_priv->state.s4.drawctrl0 || drawctrl1 != dev_priv->state.s4.drawctrl1) { - BCI_LOCALS; - BEGIN_BCI(4); - BCI_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); - BCI_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); - BCI_WRITE(drawctrl0); - BCI_WRITE(drawctrl1); + DMA_LOCALS; + BEGIN_DMA(4); + DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); + DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); + DMA_WRITE(drawctrl0); + DMA_WRITE(drawctrl1); dev_priv->state.s4.drawctrl0 = drawctrl0; dev_priv->state.s4.drawctrl1 = drawctrl1; dev_priv->waiting = 1; + DMA_COMMIT(); } } @@ -192,7 +194,7 @@ const drm_savage_cmd_header_t *cmd_header, const uint32_t __user *regs) { - BCI_LOCALS; + DMA_LOCALS; unsigned int count = cmd_header->state.count; unsigned int start = cmd_header->state.start; unsigned int count2 = 0; @@ -244,18 +246,18 @@ bci_size = count + (count+254)/255 + count2 + (count2+254)/255; if (cmd_header->state.global) { - BEGIN_BCI(bci_size+1); - BCI_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); + BEGIN_DMA(bci_size+1); + DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); dev_priv->waiting = 1; } else { - BEGIN_BCI(bci_size); + BEGIN_DMA(bci_size); } do { while (count > 0) { unsigned int n = count < 255 ? count : 255; - BCI_SET_REGISTERS(start, n); - BCI_COPY_FROM_USER(regs, n); + DMA_SET_REGISTERS(start, n); + DMA_COPY_FROM_USER(regs, n); count -= n; start += n; regs += n; @@ -266,6 +268,8 @@ count2 = 0; } while (count); + DMA_COMMIT(); + return 0; } @@ -281,6 +285,11 @@ unsigned int start = cmd_header->prim.start; unsigned int i; + if (!dmabuf) { + DRM_ERROR("called without dma buffers!\n"); + return DRM_ERR(EINVAL); + } + if (!n) return 0; @@ -335,6 +344,11 @@ return DRM_ERR(EINVAL); } + /* Vertex DMA doesn't work with command DMA at the same time, + * so we use BCI_... to submit commands here. Flush buffered + * faked DMA first. */ + DMA_FLUSH(); + if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { BEGIN_BCI(2); BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); @@ -405,7 +419,7 @@ unsigned int vb_size, unsigned int vb_stride) { - BCI_LOCALS; + DMA_LOCALS; unsigned char reorder = 0; unsigned int prim = cmd_header->prim.prim; unsigned int skip = cmd_header->prim.skip; @@ -482,28 +496,32 @@ int reorder[3] = {-1, -1, -1}; reorder[start%3] = 2; - BEGIN_BCI(count*vtx_size+1); - BCI_DRAW_PRIMITIVE(count, prim, skip); + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); for (i = start; i < start+count; ++i) { unsigned int j = i + reorder[i % 3]; - BCI_COPY_FROM_USER(&vtxbuf[vb_stride*j], + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j], vtx_size); } + + DMA_COMMIT(); } else { - BEGIN_BCI(count*vtx_size+1); - BCI_DRAW_PRIMITIVE(count, prim, skip); + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); if (vb_stride == vtx_size) { - BCI_COPY_FROM_USER(&vtxbuf[vb_stride*start], + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*start], vtx_size*count); } else { for (i = start; i < start+count; ++i) { - BCI_COPY_FROM_USER( + DMA_COPY_FROM_USER( &vtxbuf[vb_stride*i], vtx_size); } } + + DMA_COMMIT(); } start += count; @@ -527,6 +545,11 @@ unsigned int n = cmd_header->idx.count; unsigned int i; + if (!dmabuf) { + DRM_ERROR("called without dma buffers!\n"); + return DRM_ERR(EINVAL); + } + if (!n) return 0; >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200504130259.j3D2x6X1003441>