diff --git a/src/video/vid_s3_virge.c b/src/video/vid_s3_virge.c index ea3a899f4..aecd309e3 100644 --- a/src/video/vid_s3_virge.c +++ b/src/video/vid_s3_virge.c @@ -40,31 +40,25 @@ #include <86box/vid_svga.h> #include <86box/vid_svga_render.h> +#ifdef MIN + #undef MIN +#endif +#ifdef MAX + #undef MAX +#endif +#ifdef CLAMP + #undef CLAMP +#endif + +static uint64_t virge_time = 0; + static int dither[4][4] = { - {0, 4, 1, 5}, - { 6, 2, 7, 3}, - { 1, 5, 0, 4}, - { 7, 3, 6, 2}, + {0, 4, 1, 5}, + {6, 2, 7, 3}, + {1, 5, 0, 4}, + {7, 3, 6, 2} }; -#define RB_SIZE 256 -#define RB_MASK (RB_SIZE - 1) - -#define RB_ENTRIES (virge->s3d_write_idx - virge->s3d_read_idx) -#define RB_FULL (RB_ENTRIES == RB_SIZE) -#define RB_EMPTY (!RB_ENTRIES) - -#define FIFO_SIZE 65536 -#define FIFO_MASK (FIFO_SIZE - 1) -#define FIFO_ENTRY_SIZE (1 << 31) - -#define FIFO_ENTRIES (virge->fifo_write_idx - virge->fifo_read_idx) -#define FIFO_FULL ((virge->fifo_write_idx - virge->fifo_read_idx) >= FIFO_SIZE) -#define FIFO_EMPTY (virge->fifo_read_idx == virge->fifo_write_idx) - -#define FIFO_TYPE 0xff000000 -#define FIFO_ADDR 0x00ffffff - #define ROM_VIRGE_325 "roms/video/s3virge/86c325.bin" #define ROM_DIAMOND_STEALTH3D_2000 "roms/video/s3virge/s3virge.bin" #define ROM_DIAMOND_STEALTH3D_3000 "roms/video/s3virge/diamondstealth3000.vbi" @@ -76,6 +70,24 @@ static int dither[4][4] = { #define ROM_DIAMOND_STEALTH3D_4000 "roms/video/s3virge/86c357.bin" #define ROM_TRIO3D2X "roms/video/s3virge/TRIO3D2X_8mbsdr.VBI" +#define RB_SIZE 256 +#define RB_MASK (RB_SIZE - 1) + +#define RB_ENTRIES (virge->s3d_write_idx - virge->s3d_read_idx) +#define RB_FULL (RB_ENTRIES == RB_SIZE) +#define RB_EMPTY (!RB_ENTRIES) + +#define FIFO_SIZE 65536 +#define FIFO_MASK (FIFO_SIZE - 1) +#define FIFO_ENTRY_SIZE (1 << 31) + +#define FIFO_ENTRIES (virge->fifo_write_idx - virge->fifo_read_idx) +#define FIFO_FULL ((virge->fifo_write_idx - virge->fifo_read_idx) >= FIFO_SIZE) +#define FIFO_EMPTY (virge->fifo_read_idx == virge->fifo_write_idx) + +#define FIFO_TYPE 0xff000000 +#define FIFO_ADDR 0x00ffffff + enum { S3_VIRGE_325, S3_DIAMOND_STEALTH3D_2000, @@ -98,58 +110,80 @@ enum { }; enum { - FIFO_INVALID = (0x00 << 24), - FIFO_WRITE_BYTE = (0x01 << 24), - FIFO_WRITE_WORD = (0x02 << 24), + FIFO_INVALID = (0x00 << 24), + FIFO_WRITE_BYTE = (0x01 << 24), + FIFO_WRITE_WORD = (0x02 << 24), FIFO_WRITE_DWORD = (0x03 << 24) }; -typedef struct -{ - uint32_t addr_type; - uint32_t val; +typedef struct { + uint32_t addr_type; + uint32_t val; } fifo_entry_t; typedef struct s3d_t { - uint32_t cmd_set; - int clip_l, clip_r, clip_t, clip_b; + uint32_t cmd_set; + int clip_l; + int clip_r; + int clip_t; + int clip_b; - uint32_t dest_base; - uint32_t dest_str; + uint32_t dest_base; + uint32_t dest_str; - uint32_t z_base; - uint32_t z_str; + uint32_t z_base; + uint32_t z_str; - uint32_t tex_base; - uint32_t tex_bdr_clr; - uint32_t tbv, tbu; - int32_t TdVdX, TdUdX; - int32_t TdVdY, TdUdY; - uint32_t tus, tvs; + uint32_t tex_base; + uint32_t tex_bdr_clr; + uint32_t tbv; + uint32_t tbu; + int32_t TdVdX; + int32_t TdUdX; + int32_t TdVdY; + int32_t TdUdY; + uint32_t tus; + uint32_t tvs; - int32_t TdZdX, TdZdY; - uint32_t tzs; + int32_t TdZdX; + int32_t TdZdY; + uint32_t tzs; - int32_t TdWdX, TdWdY; - uint32_t tws; + int32_t TdWdX; + int32_t TdWdY; + uint32_t tws; - int32_t TdDdX, TdDdY; - uint32_t tds; + int32_t TdDdX; + int32_t TdDdY; + uint32_t tds; - int16_t TdGdX, TdBdX, TdRdX, TdAdX; - int16_t TdGdY, TdBdY, TdRdY, TdAdY; - uint32_t tgs, tbs, trs, tas; + int16_t TdGdX; + int16_t TdBdX; + int16_t TdRdX; + int16_t TdAdX; + int16_t TdGdY; + int16_t TdBdY; + int16_t TdRdY; + int16_t TdAdY; + uint32_t tgs; + uint32_t tbs; + uint32_t trs; + uint32_t tas; - uint32_t TdXdY12; - uint32_t txend12; - uint32_t TdXdY01; - uint32_t txend01; - uint32_t TdXdY02; - uint32_t txs; - uint32_t tys; - int ty01, ty12, tlr; + uint32_t TdXdY12; + uint32_t txend12; + uint32_t TdXdY01; + uint32_t txend01; + uint32_t TdXdY02; + uint32_t txs; + uint32_t tys; + int ty01; + int ty12; + int tlr; - uint8_t fog_r, fog_g, fog_b; + uint8_t fog_r; + uint8_t fog_g; + uint8_t fog_b; } s3d_t; typedef struct virge_t { @@ -157,43 +191,51 @@ typedef struct virge_t { mem_mapping_t mmio_mapping; mem_mapping_t new_mmio_mapping; - rom_t bios_rom; + rom_t bios_rom; - svga_t svga; + svga_t svga; - uint8_t bank; - uint8_t ma_ext; - uint8_t reg6b, lfb_bios; + uint8_t bank; + uint8_t ma_ext; - uint8_t virge_id, virge_id_high, virge_id_low, virge_rev; + uint8_t virge_id; + uint8_t virge_id_high; + uint8_t virge_id_low; + uint8_t virge_rev; - uint8_t int_line; + uint32_t linear_base; + uint32_t linear_size; - uint32_t linear_base, linear_size; + uint8_t pci_regs[256]; + uint8_t pci_slot; - uint8_t pci_regs[256]; + int chip; - uint8_t pci_slot; - uint8_t irq_state; + int bilinear_enabled; + int dithering_enabled; + int memory_size; - int pci; - int chip; - int is_agp; + int pixel_count; + int tri_count; - int bilinear_enabled; - int dithering_enabled; - uint32_t memory_size; - uint32_t vram_mask; + thread_t * render_thread; + event_t * wake_render_thread; + event_t * wake_main_thread; + event_t * not_full_event; - uint32_t hwc_fg_col, hwc_bg_col; - int hwc_col_stack_pos; + uint32_t hwc_fg_col; + uint32_t hwc_bg_col; + int hwc_col_stack_pos; - struct - { + struct { uint32_t src_base; uint32_t dest_base; - int clip_l, clip_r, clip_t, clip_b; - int dest_str, src_str; + int clip_l; + int clip_r; + int clip_t; + int clip_b; + int dest_str; + int src_str; uint32_t mono_pat_0; uint32_t mono_pat_1; uint32_t pat_bg_clr; @@ -201,20 +243,28 @@ typedef struct virge_t { uint32_t src_bg_clr; uint32_t src_fg_clr; uint32_t cmd_set; - int r_width, r_height; - int rsrc_x, rsrc_y; - int rdest_x, rdest_y; + int r_width; + int r_height; + int rsrc_x; + int rsrc_y; + int rdest_x; + int rdest_y; - int lxend0, lxend1; + int lxend0; + int lxend1; int32_t ldx; - uint32_t lxstart, lystart; + uint32_t lxstart; + uint32_t lystart; int lycnt; int line_dir; - int src_x, src_y; - int dest_x, dest_y; - int w, h; - uint8_t rop; + int src_x; + int src_y; + int dest_x; + int dest_y; + int w; + int h; + uint8_t rop; int data_left_count; uint32_t data_left; @@ -233,14 +283,14 @@ typedef struct virge_t { uint32_t dest_l, dest_r; } s3d; - s3d_t s3d_tri; + s3d_t s3d_tri; - s3d_t s3d_buffer[RB_SIZE]; - atomic_int s3d_read_idx, s3d_write_idx; - atomic_int s3d_busy; + s3d_t s3d_buffer[RB_SIZE]; + int s3d_read_idx; + int s3d_write_idx; + int s3d_busy; - struct - { + struct { uint32_t pri_ctrl; uint32_t chroma_ctrl; uint32_t sec_ctrl; @@ -265,45 +315,64 @@ typedef struct virge_t { uint32_t sec_start; uint32_t sec_size; - int sdif; + int sdif; - int pri_x, pri_y, pri_w, pri_h; - int sec_x, sec_y, sec_w, sec_h; + int pri_x; + int pri_y; + int pri_w; + int pri_h; + int sec_x; + int sec_y; + int sec_w; + int sec_h; } streams; - uint8_t cmd_dma; - uint32_t cmd_dma_base; - uint32_t dma_ptr; - uint64_t blitter_time; + fifo_entry_t fifo[FIFO_SIZE]; + volatile int fifo_read_idx, fifo_write_idx; + + volatile int fifo_thread_run, render_thread_run; + + thread_t * fifo_thread; + event_t *wake_fifo_thread; + event_t * fifo_not_full_event; + + int virge_busy; + + uint8_t subsys_stat; + uint8_t subsys_cntl; + + int local; + + uint8_t serialport; + + uint8_t irq_state; + uint8_t advfunc_cntl; + + void *i2c, *ddc; + + int onboard; int fifo_slots_num; - fifo_entry_t fifo[FIFO_SIZE]; + uint32_t vram_mask; - atomic_int fifo_write_idx, fifo_read_idx; - atomic_int virge_busy; + uint8_t reg6b; + uint8_t lfb_bios; + uint8_t int_line; + uint8_t cmd_dma; - atomic_int fifo_thread_run, render_thread_run; + uint32_t cmd_dma_base; + uint32_t dma_ptr; - thread_t *render_thread; - event_t *wake_render_thread; - event_t *wake_main_thread; - event_t *not_full_event; - - thread_t *fifo_thread; - event_t *wake_fifo_thread; - event_t *fifo_not_full_event; - - int local; - - uint8_t subsys_stat, subsys_cntl, advfunc_cntl; - - uint8_t serialport; - - void *i2c, *ddc; - - int onboard; + int pci; + int is_agp; } virge_t; +static __inline void +wake_fifo_thread(virge_t *virge) { + /* Wake up FIFO thread if moving from idle */ + thread_set_event(virge->wake_fifo_thread); +} + static virge_t *reset_state = NULL; static video_timings_t timing_diamond_stealth3d_2000_pci = { .type = VIDEO_PCI, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; @@ -311,48 +380,45 @@ static video_timings_t timing_diamond_stealth3d_3000_pci = { .type = VIDEO_PCI, static video_timings_t timing_virge_dx_pci = { .type = VIDEO_PCI, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; static video_timings_t timing_virge_agp = { .type = VIDEO_AGP, .write_b = 2, .write_w = 2, .write_l = 3, .read_b = 28, .read_w = 28, .read_l = 45 }; -static void s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri); +static void queue_triangle(virge_t *virge); -static void s3_virge_recalctimings(svga_t *svga); -static void s3_virge_updatemapping(virge_t *virge); +static void s3_virge_recalctimings(svga_t *svga); +static void s3_virge_updatemapping(virge_t *virge); -static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat); +static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat); static uint8_t s3_virge_mmio_read(uint32_t addr, void *priv); -static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *rivp); +static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *priv); static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *priv); static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv); static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv); static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv); -static void queue_triangle(virge_t *virge); -static void s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type); - enum { - CMD_SET_AE = 1, - CMD_SET_HC = (1 << 1), + CMD_SET_AE = 1, + CMD_SET_HC = (1 << 1), - CMD_SET_FORMAT_MASK = (7 << 2), - CMD_SET_FORMAT_8 = (0 << 2), - CMD_SET_FORMAT_16 = (1 << 2), - CMD_SET_FORMAT_24 = (2 << 2), + CMD_SET_FORMAT_MASK = (7 << 2), + CMD_SET_FORMAT_8 = (0 << 2), + CMD_SET_FORMAT_16 = (1 << 2), + CMD_SET_FORMAT_24 = (2 << 2), - CMD_SET_MS = (1 << 6), - CMD_SET_IDS = (1 << 7), - CMD_SET_MP = (1 << 8), - CMD_SET_TP = (1 << 9), + CMD_SET_MS = (1 << 6), + CMD_SET_IDS = (1 << 7), + CMD_SET_MP = (1 << 8), + CMD_SET_TP = (1 << 9), - CMD_SET_ITA_MASK = (3 << 10), - CMD_SET_ITA_BYTE = (0 << 10), - CMD_SET_ITA_WORD = (1 << 10), - CMD_SET_ITA_DWORD = (2 << 10), + CMD_SET_ITA_MASK = (3 << 10), + CMD_SET_ITA_BYTE = (0 << 10), + CMD_SET_ITA_WORD = (1 << 10), + CMD_SET_ITA_DWORD = (2 << 10), - CMD_SET_ZUP = (1 << 23), + CMD_SET_ZUP = (1 << 23), - CMD_SET_ZB_MODE = (3 << 24), + CMD_SET_ZB_MODE = (3 << 24), - CMD_SET_XP = (1 << 25), - CMD_SET_YP = (1 << 26), + CMD_SET_XP = (1 << 25), + CMD_SET_YP = (1 << 26), CMD_SET_COMMAND_MASK = (15 << 27) }; @@ -363,11 +429,11 @@ enum { #define CMD_SET_TWE (1 << 26) enum { - CMD_SET_COMMAND_BITBLT = (0 << 27), - CMD_SET_COMMAND_RECTFILL = (2 << 27), - CMD_SET_COMMAND_LINE = (3 << 27), - CMD_SET_COMMAND_POLY = (5 << 27), - CMD_SET_COMMAND_NOP = (15 << 27) + CMD_SET_COMMAND_BITBLT = (0 << 27), + CMD_SET_COMMAND_RECTFILL = (2 << 27), + CMD_SET_COMMAND_LINE = (3 << 27), + CMD_SET_COMMAND_POLY = (5 << 27), + CMD_SET_COMMAND_NOP = (15 << 27) }; #define INT_VSY (1 << 0) @@ -382,44 +448,18 @@ enum { #define SERIAL_PORT_SCR (1 << 2) #define SERIAL_PORT_SDR (1 << 3) -#ifdef ENABLE_S3_VIRGE_LOG -int s3_virge_do_log = ENABLE_S3_VIRGE_LOG; - static void -s3_virge_log(const char *fmt, ...) -{ - va_list ap; - - if (s3_virge_do_log) { - va_start(ap, fmt); - pclog_ex(fmt, ap); - va_end(ap); - } -} -#else -# define s3_virge_log(fmt, ...) -#endif - -static __inline void -s3_virge_wake_fifo_thread(virge_t *virge) -{ - thread_set_event(virge->wake_fifo_thread); /*Wake up FIFO thread if moving from idle*/ -} - -static void -s3_virge_update_irqs(virge_t *virge) -{ - if ((virge->svga.crtc[0x32] & 0x10) && (virge->subsys_stat & (virge->subsys_cntl & INT_MASK))) +s3_virge_update_irqs(virge_t *virge) { + if ((virge->svga.crtc[0x32] & 0x10) && (virge->subsys_stat & virge->subsys_cntl & INT_MASK)) pci_set_irq(virge->pci_slot, PCI_INTA, &virge->irq_state); else pci_clear_irq(virge->pci_slot, PCI_INTA, &virge->irq_state); } static void -s3_virge_out(uint16_t addr, uint8_t val, void *priv) -{ +s3_virge_out(uint16_t addr, uint8_t val, void *priv) { virge_t *virge = (virge_t *) priv; - svga_t *svga = &virge->svga; + svga_t * svga = &virge->svga; uint8_t old; uint32_t cursoraddr; @@ -433,8 +473,7 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) svga_recalctimings(svga); return; } - if (svga->seqaddr == 4) /*Chain-4 - update banking*/ - { + if (svga->seqaddr == 4) { /*Chain-4 - update banking*/ if (val & 8) svga->write_bank = svga->read_bank = virge->bank << 16; else @@ -458,7 +497,9 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) return; if ((svga->crtcreg == 7) && (svga->crtc[0x11] & 0x80)) val = (svga->crtc[7] & ~0x10) | (val & 0x10); - if ((svga->crtcreg >= 0x20) && (svga->crtcreg < 0x40) && (svga->crtcreg != 0x36) && (svga->crtcreg != 0x38) && (svga->crtcreg != 0x39) && ((svga->crtc[0x38] & 0xcc) != 0x48)) + if ((svga->crtcreg >= 0x20) && (svga->crtcreg < 0x40) && + (svga->crtcreg != 0x36) && (svga->crtcreg != 0x38) && + (svga->crtcreg != 0x39) && ((svga->crtc[0x38] & 0xcc) != 0x48)) return; if ((svga->crtcreg >= 0x40) && ((svga->crtc[0x39] & 0xe0) != 0xa0)) return; @@ -466,11 +507,9 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) return; if (svga->crtcreg >= 0x80) return; - old = svga->crtc[svga->crtcreg]; - svga->crtc[svga->crtcreg] = val; - if (svga->crtcreg > 0x18) - s3_virge_log("OUTB VGA reg = %02x, val = %02x\n", svga->crtcreg, val); + old = svga->crtc[svga->crtcreg]; + svga->crtc[svga->crtcreg] = val; switch (svga->crtcreg) { case 0x31: @@ -528,7 +567,8 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) svga->hwcursor.xoff = svga->crtc[0x4e] & 0x3f; svga->hwcursor.yoff = svga->crtc[0x4f] & 0x3f; cursoraddr = (virge->memory_size == 8) ? 0x1fff : 0x0fff; - svga->hwcursor.addr = ((((svga->crtc[0x4c] << 8) | svga->crtc[0x4d]) & cursoraddr) * 1024) + (svga->hwcursor.yoff * 16); + svga->hwcursor.addr = ((((svga->crtc[0x4c] << 8) | + svga->crtc[0x4d]) & cursoraddr) * 1024) + (svga->hwcursor.yoff * 16); break; case 0x4a: @@ -552,7 +592,7 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) switch (virge->hwc_col_stack_pos) { case 0: virge->hwc_bg_col = (virge->hwc_bg_col & 0xffff00) | val; - break; + break; case 1: virge->hwc_bg_col = (virge->hwc_bg_col & 0xff00ff) | (val << 8); break; @@ -600,8 +640,8 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) svga->bpp = (virge->chip == S3_VIRGEVX) ? 24 : 32; break; default: - svga->bpp = 8; - break; + svga->bpp = 8; + break; } break; @@ -616,7 +656,8 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) if (svga->crtcreg < 0xe || svga->crtcreg > 0x10) { if ((svga->crtcreg == 0xc) || (svga->crtcreg == 0xd)) { svga->fullchange = 3; - svga->ma_latch = ((svga->crtc[0xc] << 8) | svga->crtc[0xd]) + ((svga->crtc[8] & 0x60) >> 5); + svga->ma_latch = ((svga->crtc[0xc] << 8) | svga->crtc[0xd]) + + ((svga->crtc[8] & 0x60) >> 5); if ((svga->crtc[0x67] & 0xc) != 0xc) svga->ma_latch |= (virge->ma_ext << 16); } else { @@ -634,10 +675,9 @@ s3_virge_out(uint16_t addr, uint8_t val, void *priv) } static uint8_t -s3_virge_in(uint16_t addr, void *priv) -{ +s3_virge_in(uint16_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - svga_t *svga = &virge->svga; + svga_t * svga = &virge->svga; uint8_t ret; if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1)) @@ -660,10 +700,10 @@ s3_virge_in(uint16_t addr, void *priv) ret = 0xff; break; - case 0x3D4: + case 0x3d4: ret = svga->crtcreg; break; - case 0x3D5: + case 0x3d5: switch (svga->crtcreg) { case 0x2d: ret = virge->virge_id_high; @@ -680,15 +720,15 @@ s3_virge_in(uint16_t addr, void *priv) case 0x31: ret = (svga->crtc[0x31] & 0xcf) | ((virge->ma_ext & 3) << 4); break; - case 0x33: - ret = (svga->crtc[0x33] | 0x04); - break; case 0x35: ret = (svga->crtc[0x35] & 0xf0) | (virge->bank & 0xf); break; + case 0x36: + ret = (svga->crtc[0x36] & 0xfc) | 2; + break; /*PCI bus*/ case 0x45: virge->hwc_col_stack_pos = 0; - ret = svga->crtc[0x45]; + ret = svga->crtc[0x45]; break; case 0x51: ret = (svga->crtc[0x51] & 0xf0) | ((virge->bank >> 2) & 0xc) | ((virge->ma_ext >> 2) & 3); @@ -735,13 +775,13 @@ s3_virge_in(uint16_t addr, void *priv) ret = svga_in(addr, svga); break; } + return ret; } static void -s3_virge_recalctimings(svga_t *svga) -{ - const virge_t *virge = (virge_t *) svga->priv; +s3_virge_recalctimings(svga_t *svga) { + virge_t *virge = (virge_t *) svga->priv; svga->hdisp = svga->hdisp_old; @@ -778,7 +818,7 @@ s3_virge_recalctimings(svga_t *svga) int n = svga->seqregs[0x12] & 0x1f; int r = (svga->seqregs[0x12] >> 5); - if (virge->chip == S3_VIRGEVX || virge->chip == S3_VIRGEDX) + if ((virge->chip == S3_VIRGEVX) || (virge->chip == S3_VIRGEDX)) r &= 7; else if (virge->chip >= S3_VIRGEGX2) r &= 10; @@ -812,9 +852,7 @@ s3_virge_recalctimings(svga_t *svga) video_force_resize_set_monitor(1, svga->monitor_index); } - /* ViRGE/GX2 and later does not use primary stream registers. */ - if ((svga->crtc[0x67] & 0xc) != 0xc || virge->chip >= S3_VIRGEGX2) /*VGA mode*/ - { + if ((svga->crtc[0x67] & 0xc) != 0xc) { /*VGA mode*/ svga->ma_latch |= (virge->ma_ext << 16); if (svga->crtc[0x51] & 0x30) svga->rowoffset |= (svga->crtc[0x51] & 0x30) << 4; @@ -824,47 +862,42 @@ s3_virge_recalctimings(svga_t *svga) svga->rowoffset = 256; svga->lowres = !((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10)); - if ((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10)) { - switch (svga->bpp) { - case 8: - svga->render = svga_render_8bpp_highres; - break; - case 15: - svga->render = svga_render_15bpp_highres; - if (virge->chip != S3_VIRGEVX && virge->chip < S3_VIRGEGX2) { - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - svga->hdisp >>= 1; - svga->dots_per_clock >>= 1; - } - break; - case 16: - svga->render = svga_render_16bpp_highres; - if (virge->chip != S3_VIRGEVX && virge->chip < S3_VIRGEGX2) { - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - svga->hdisp >>= 1; - svga->dots_per_clock >>= 1; - } - break; - case 24: - svga->render = svga_render_24bpp_highres; - if (virge->chip != S3_VIRGEVX && virge->chip < S3_VIRGEGX2) - svga->rowoffset = (svga->rowoffset * 3) / 4; /*Hack*/ - break; - case 32: - svga->render = svga_render_32bpp_highres; - break; + if ((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10)) switch (svga->bpp) { + case 8: + svga->render = svga_render_8bpp_highres; + break; + case 15: + svga->render = svga_render_15bpp_highres; + if ((virge->chip != S3_VIRGEVX) && (virge->chip < S3_VIRGEGX2)) { + svga->hdisp >>= 1; + svga->dots_per_clock >>= 1; + } + break; + case 16: + svga->render = svga_render_16bpp_highres; + if ((virge->chip != S3_VIRGEVX) && (virge->chip < S3_VIRGEGX2)) { + svga->hdisp >>= 1; + svga->dots_per_clock >>= 1; + } + break; + case 24: + svga->render = svga_render_24bpp_highres; + if ((virge->chip != S3_VIRGEVX) && (virge->chip < S3_VIRGEGX2)) + svga->rowoffset = (svga->rowoffset * 3) / 4; /*Hack*/ + break; + case 32: + svga->render = svga_render_32bpp_highres; + break; default: break; - } } - svga->vram_display_mask = (!(svga->crtc[0x31] & 0x08) && (svga->crtc[0x32] & 0x40)) ? 0x3ffff : virge->vram_mask; + + svga->vram_display_mask = (!(svga->crtc[0x31] & 0x08) && + (svga->crtc[0x32] & 0x40)) ? 0x3ffff : virge->vram_mask; + svga->overlay.ena = 0; - s3_virge_log("VGA mode\n"); + if (virge->chip >= S3_VIRGEGX2 && (svga->crtc[0x67] & 0xc) == 0xc) { /* ViRGE/GX2 and later does not use primary stream registers. */ svga->overlay.x = virge->streams.sec_x; @@ -881,8 +914,7 @@ s3_virge_recalctimings(svga_t *svga) svga->rowoffset = virge->streams.pri_stride >> 3; svga->vram_display_mask = virge->vram_mask; } - } else /*Streams mode*/ - { + } else { /*Streams mode*/ if (virge->streams.buffer_ctrl & 1) svga->ma_latch = virge->streams.pri_fb1 >> 2; else @@ -892,8 +924,8 @@ s3_virge_recalctimings(svga_t *svga) if (virge->streams.pri_h < svga->dispend) svga->dispend = virge->streams.pri_h; - svga->overlay.x = virge->streams.sec_x - virge->streams.pri_x; - svga->overlay.y = virge->streams.sec_y - virge->streams.pri_y; + svga->overlay.x = virge->streams.sec_x - virge->streams.pri_x; + svga->overlay.y = virge->streams.sec_y - virge->streams.pri_y; svga->overlay.cur_ysize = virge->streams.sec_h; if (virge->streams.buffer_ctrl & 2) @@ -901,12 +933,14 @@ s3_virge_recalctimings(svga_t *svga) else svga->overlay.addr = virge->streams.sec_fb0; - svga->overlay.ena = (svga->overlay.x >= 0); + svga->overlay.ena = (svga->overlay.x >= 0); svga->overlay.v_acc = virge->streams.dda_vert_accumulator; - svga->rowoffset = virge->streams.pri_stride >> 3; + + svga->rowoffset = virge->streams.pri_stride >> 3; if (virge->chip <= S3_VIRGEDX && svga->overlay.ena) { - svga->overlay.ena = (((virge->streams.blend_ctrl >> 24) & 7) == 0b000) || (((virge->streams.blend_ctrl >> 24) & 7) == 0b101); + svga->overlay.ena = (((virge->streams.blend_ctrl >> 24) & 7) == 0b000) || + (((virge->streams.blend_ctrl >> 24) & 7) == 0b101); } else if (virge->chip == S3_VIRGEGX2 && svga->overlay.ena) { /* 0x20 = Secondary Stream enabled */ /* 0x2000 = Primary Stream enabled */ @@ -918,17 +952,9 @@ s3_virge_recalctimings(svga_t *svga) svga->render = svga_render_8bpp_highres; break; case 3: /*KRGB-16 (1.5.5.5)*/ - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - // svga->dots_per_clock >>= 1; svga->render = svga_render_15bpp_highres; break; case 5: /*RGB-16 (5.6.5)*/ - // svga->htotal >>= 1; - // if ((svga->crtc[0x33] & 0x20) || ((svga->crtc[0x67] & 0xc) == 0xc)) - // svga->hblank_end_val = svga->htotal - 1; - // svga->dots_per_clock >>= 1; svga->render = svga_render_16bpp_highres; break; case 6: /*RGB-24 (8.8.8)*/ @@ -937,9 +963,6 @@ s3_virge_recalctimings(svga_t *svga) case 7: /*XRGB-32 (X.8.8.8)*/ svga->render = svga_render_32bpp_highres; break; - - default: - break; } svga->vram_display_mask = virge->vram_mask; } @@ -976,8 +999,7 @@ s3_virge_update_buffer(virge_t *virge) } static void -s3_virge_updatemapping(virge_t *virge) -{ +s3_virge_updatemapping(virge_t *virge) { svga_t *svga = &virge->svga; if (!(virge->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_MEM)) { @@ -988,10 +1010,8 @@ s3_virge_updatemapping(virge_t *virge) return; } - s3_virge_log("Update mapping - bank %02X ", svga->gdcreg[6] & 0xc); - /*Banked framebuffer*/ - switch (svga->gdcreg[6] & 0xc) { /*VGA mapping*/ - case 0x0: /*128k at A0000*/ + switch (svga->gdcreg[6] & 0xc) { /*Banked framebuffer*/ + case 0x0: /*128k at A0000*/ mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x20000); svga->banked_mask = 0xffff; break; @@ -1007,16 +1027,12 @@ s3_virge_updatemapping(virge_t *virge) mem_mapping_set_addr(&svga->mapping, 0xb8000, 0x08000); svga->banked_mask = 0x7fff; break; - - default: - break; } virge->linear_base = (svga->crtc[0x5a] << 16) | (svga->crtc[0x59] << 24); - s3_virge_log("Linear framebuffer %02X, linear base = %08x, display mask = %08x\n", svga->crtc[0x58] & 0x17, virge->linear_base, svga->vram_display_mask); if ((svga->crtc[0x58] & 0x10) || (virge->advfunc_cntl & 0x10)) { /*Linear framebuffer*/ - switch (svga->crtc[0x58] & 7) { + switch (svga->crtc[0x58] & 3) { case 0: /*64k*/ virge->linear_size = 0x10000; break; @@ -1027,7 +1043,7 @@ s3_virge_updatemapping(virge_t *virge) virge->linear_size = 0x200000; break; case 3: /*4mb on other than ViRGE/VX, 8mb on ViRGE/VX*/ - if (virge->chip == S3_VIRGEVX || virge->chip == S3_TRIO3D2X) + if ((virge->chip == S3_VIRGEVX) || (virge->chip == S3_TRIO3D2X)) virge->linear_size = 0x800000; else virge->linear_size = 0x400000; @@ -1035,17 +1051,13 @@ s3_virge_updatemapping(virge_t *virge) case 7: virge->linear_size = 0x800000; break; - - default: - break; } virge->linear_base &= ~(virge->linear_size - 1); - s3_virge_log("Linear framebuffer at %08X size %08X, mask = %08x, CRTC58 sel = %02x\n", virge->linear_base, virge->linear_size, virge->vram_mask, svga->crtc[0x58] & 7); if (virge->linear_base == 0xa0000) { mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000); mem_mapping_disable(&virge->linear_mapping); } else { - if (virge->chip == S3_VIRGEVX || virge->chip == S3_TRIO3D2X) + if ((virge->chip == S3_VIRGEVX) || (virge->chip == S3_TRIO3D2X)) virge->linear_base &= 0xfe000000; else virge->linear_base &= 0xfc000000; @@ -1058,11 +1070,7 @@ s3_virge_updatemapping(virge_t *virge) svga->fb_only = 0; } - s3_virge_log("Memory mapped IO %02X\n", svga->crtc[0x53] & 0x38); - - /* Memory mapped I/O. */ - /* Old MMIO. */ - if ((svga->crtc[0x53] & 0x10) || (virge->advfunc_cntl & 0x20)) { + if ((svga->crtc[0x53] & 0x10) || (virge->advfunc_cntl & 0x20)) { /*Old MMIO*/ if (svga->crtc[0x53] & 0x20) mem_mapping_set_addr(&virge->mmio_mapping, 0xb8000, 0x8000); else @@ -1070,41 +1078,37 @@ s3_virge_updatemapping(virge_t *virge) } else mem_mapping_disable(&virge->mmio_mapping); - /* New MMIO. */ - if (svga->crtc[0x53] & 0x08) + if (svga->crtc[0x53] & 0x08) /*New MMIO*/ mem_mapping_set_addr(&virge->new_mmio_mapping, virge->linear_base + 0x1000000, 0x10000); else mem_mapping_disable(&virge->new_mmio_mapping); } static void -s3_virge_vblank_start(svga_t *svga) -{ +s3_virge_vblank_start(svga_t *svga) { virge_t *virge = (virge_t *) svga->priv; virge->subsys_stat |= INT_VSY; s3_virge_update_irqs(virge); } -static void s3_virge_wait_fifo_idle(virge_t *virge) { +static void +s3_virge_wait_fifo_idle(virge_t *virge) { while (!FIFO_EMPTY) { - s3_virge_wake_fifo_thread(virge); + wake_fifo_thread(virge); thread_wait_event(virge->fifo_not_full_event, 1); } } static uint8_t -s3_virge_mmio_read(uint32_t addr, void *priv) -{ +s3_virge_mmio_read(uint32_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - uint8_t ret = 0xff; - - s3_virge_log("[%04X:%08X]: MMIO ReadB addr = %04x\n", CS, cpu_state.pc, addr & 0xffff); + uint8_t ret; switch (addr & 0xffff) { case 0x8504: if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); + wake_fifo_thread(virge); ret = virge->subsys_stat; return ret; @@ -1114,11 +1118,8 @@ s3_virge_mmio_read(uint32_t addr, void *priv) ret |= 0x10; else ret |= 0x30; - if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); - - s3_virge_log("Subsys status BYTE1, busy=%d, fiford=%d, fifowr=%d, ret=%x.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx, ret); + wake_fifo_thread(virge); return ret; case 0x850c: @@ -1130,55 +1131,8 @@ s3_virge_mmio_read(uint32_t addr, void *priv) ret = virge->fifo_slots_num >> 2; return ret; - case 0x83b0: - case 0x83b1: - case 0x83b2: - case 0x83b3: - case 0x83b4: - case 0x83b5: - case 0x83b6: - case 0x83b7: - case 0x83b8: - case 0x83b9: - case 0x83ba: - case 0x83bb: - case 0x83bc: - case 0x83bd: - case 0x83be: - case 0x83bf: - case 0x83c0: - case 0x83c1: - case 0x83c2: - case 0x83c3: - case 0x83c4: - case 0x83c5: - case 0x83c6: - case 0x83c7: - case 0x83c8: - case 0x83c9: - case 0x83ca: - case 0x83cb: - case 0x83cc: - case 0x83cd: - case 0x83ce: - case 0x83cf: - case 0x83d0: - case 0x83d1: - case 0x83d2: - case 0x83d3: - case 0x83d4: - case 0x83d5: - case 0x83d6: - case 0x83d7: - case 0x83d8: - case 0x83d9: - case 0x83da: - case 0x83db: - case 0x83dc: - case 0x83dd: - case 0x83de: - case 0x83df: - return s3_virge_in(addr & 0x3ff, virge); + case 0x83b0 ... 0x83df: + return s3_virge_in(addr & 0x3ff, priv); case 0x859c: return virge->cmd_dma; @@ -1191,20 +1145,14 @@ s3_virge_mmio_read(uint32_t addr, void *priv) if ((virge->serialport & SERIAL_PORT_SDW) && i2c_gpio_get_sda(virge->i2c)) ret |= SERIAL_PORT_SDR; return ret; - - default: - break; } return 0xff; } static uint16_t -s3_virge_mmio_read_w(uint32_t addr, void *priv) -{ +s3_virge_mmio_read_w(uint32_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - uint16_t ret = 0xffff; - - s3_virge_log("[%04X:%08X]: MMIO ReadW addr = %04x\n", CS, cpu_state.pc, addr & 0xfffe); + uint16_t ret; switch (addr & 0xfffe) { case 0x8504: @@ -1216,7 +1164,7 @@ s3_virge_mmio_read_w(uint32_t addr, void *priv) ret |= virge->subsys_stat; if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); + wake_fifo_thread(virge); return ret; case 0x850c: @@ -1228,17 +1176,16 @@ s3_virge_mmio_read_w(uint32_t addr, void *priv) return virge->cmd_dma; default: - return s3_virge_mmio_read(addr, virge) | (s3_virge_mmio_read(addr + 1, virge) << 8); + return s3_virge_mmio_read(addr, priv) | (s3_virge_mmio_read(addr + 1, priv) << 8); } - return 0xffff; } static uint32_t -s3_virge_mmio_read_l(uint32_t addr, void *priv) -{ +s3_virge_mmio_read_l(uint32_t addr, void *priv) { virge_t *virge = (virge_t *) priv; - uint32_t ret = 0xffffffff; + svga_t *svga = &virge->svga; + uint32_t ret = 0xffffffff; switch (addr & 0xfffc) { case 0x8180: @@ -1258,30 +1205,46 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) break; case 0x81a0: ret = virge->streams.blend_ctrl; + svga_recalctimings(svga); break; case 0x81c0: ret = virge->streams.pri_fb0; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81c4: ret = virge->streams.pri_fb1; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81c8: ret = virge->streams.pri_stride; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81cc: ret = virge->streams.buffer_ctrl; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81d0: ret = virge->streams.sec_fb0; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81d4: ret = virge->streams.sec_fb1; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81d8: ret = virge->streams.sec_stride; + s3_virge_update_buffer(virge); + svga->fullchange = changeframecount; break; case 0x81dc: ret = virge->streams.overlay_ctrl; + svga->fullchange = changeframecount; break; case 0x81e0: ret = virge->streams.k1_vert_scale; @@ -1314,12 +1277,9 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) ret |= 0x00001000; else ret |= 0x00003000; - ret |= virge->subsys_stat; if (!virge->virge_busy) - s3_virge_wake_fifo_thread(virge); - - s3_virge_log("Subsys status DWORD, busy=%d, fiford=%d, fifowr=%d.\n", virge->virge_busy, virge->fifo_read_idx, virge->fifo_write_idx); + wake_fifo_thread(virge); break; case 0x850c: @@ -1463,569 +1423,387 @@ s3_virge_mmio_read_l(uint32_t addr, void *priv) default: ret = s3_virge_mmio_read_w(addr, priv) | (s3_virge_mmio_read_w(addr + 2, priv) << 16); - break; } - s3_virge_log("MMIO ReadL addr = %04x, val = %08x.\n", addr & 0xfffc, ret); return ret; } static void -s3_virge_mmio_fifo_write(uint32_t addr, uint8_t val, virge_t *virge) -{ - if ((addr & 0xffff) < 0x8000) - s3_virge_bitblt(virge, 8, val); - else { - switch (addr & 0xffff) { - case 0x859c: - virge->cmd_dma = val; - break; - - default: - break; - } - } -} - -static void -s3_virge_mmio_fifo_write_w(uint32_t addr, uint16_t val, virge_t *virge) -{ - if ((addr & 0xfffe) < 0x8000) { - if (virge->s3d.cmd_set & CMD_SET_MS) - s3_virge_bitblt(virge, 16, ((val >> 8) | (val << 8)) << 16); - else - s3_virge_bitblt(virge, 16, val); - } else { - if ((addr & 0xfffe) == 0x859c) - virge->cmd_dma = val; - } -} - -static void -s3_virge_mmio_fifo_write_l(uint32_t addr, uint32_t val, virge_t *virge) -{ - if ((addr & 0xfffc) < 0x8000) { - if (virge->s3d.cmd_set & CMD_SET_MS) - s3_virge_bitblt(virge, 32, ((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24)); - else - s3_virge_bitblt(virge, 32, val); - } else { - switch (addr & 0xfffc) { - case 0x8590: - virge->cmd_dma_base = val; - break; - - case 0x8594: - virge->dma_ptr = val; - break; - - case 0x8598: - break; - - case 0x859c: - virge->cmd_dma = val; - break; - - case 0xa000: - case 0xa004: - case 0xa008: - case 0xa00c: - case 0xa010: - case 0xa014: - case 0xa018: - case 0xa01c: - case 0xa020: - case 0xa024: - case 0xa028: - case 0xa02c: - case 0xa030: - case 0xa034: - case 0xa038: - case 0xa03c: - case 0xa040: - case 0xa044: - case 0xa048: - case 0xa04c: - case 0xa050: - case 0xa054: - case 0xa058: - case 0xa05c: - case 0xa060: - case 0xa064: - case 0xa068: - case 0xa06c: - case 0xa070: - case 0xa074: - case 0xa078: - case 0xa07c: - case 0xa080: - case 0xa084: - case 0xa088: - case 0xa08c: - case 0xa090: - case 0xa094: - case 0xa098: - case 0xa09c: - case 0xa0a0: - case 0xa0a4: - case 0xa0a8: - case 0xa0ac: - case 0xa0b0: - case 0xa0b4: - case 0xa0b8: - case 0xa0bc: - case 0xa0c0: - case 0xa0c4: - case 0xa0c8: - case 0xa0cc: - case 0xa0d0: - case 0xa0d4: - case 0xa0d8: - case 0xa0dc: - case 0xa0e0: - case 0xa0e4: - case 0xa0e8: - case 0xa0ec: - case 0xa0f0: - case 0xa0f4: - case 0xa0f8: - case 0xa0fc: - case 0xa100: - case 0xa104: - case 0xa108: - case 0xa10c: - case 0xa110: - case 0xa114: - case 0xa118: - case 0xa11c: - case 0xa120: - case 0xa124: - case 0xa128: - case 0xa12c: - case 0xa130: - case 0xa134: - case 0xa138: - case 0xa13c: - case 0xa140: - case 0xa144: - case 0xa148: - case 0xa14c: - case 0xa150: - case 0xa154: - case 0xa158: - case 0xa15c: - case 0xa160: - case 0xa164: - case 0xa168: - case 0xa16c: - case 0xa170: - case 0xa174: - case 0xa178: - case 0xa17c: - case 0xa180: - case 0xa184: - case 0xa188: - case 0xa18c: - case 0xa190: - case 0xa194: - case 0xa198: - case 0xa19c: - case 0xa1a0: - case 0xa1a4: - case 0xa1a8: - case 0xa1ac: - case 0xa1b0: - case 0xa1b4: - case 0xa1b8: - case 0xa1bc: - case 0xa1c0: - case 0xa1c4: - case 0xa1c8: - case 0xa1cc: - case 0xa1d0: - case 0xa1d4: - case 0xa1d8: - case 0xa1dc: - case 0xa1e0: - case 0xa1e4: - case 0xa1e8: - case 0xa1ec: - case 0xa1f0: - case 0xa1f4: - case 0xa1f8: - case 0xa1fc: - { - int x = addr & 4; - int y = (addr >> 3) & 7; - int color; - int byte; - virge->s3d.pattern_8[y * 8 + x] = val & 0xff; - virge->s3d.pattern_8[y * 8 + x + 1] = val >> 8; - virge->s3d.pattern_8[y * 8 + x + 2] = val >> 16; - virge->s3d.pattern_8[y * 8 + x + 3] = val >> 24; - - x = (addr >> 1) & 6; - y = (addr >> 4) & 7; - virge->s3d.pattern_16[y * 8 + x] = val & 0xffff; - virge->s3d.pattern_16[y * 8 + x + 1] = val >> 16; - - addr &= 0x00ff; - for (uint8_t xx = 0; xx < 4; xx++) { - x = ((addr + xx) / 3) % 8; - y = ((addr + xx) / 24) % 8; - color = ((addr + xx) % 3) << 3; - byte = (xx << 3); - virge->s3d.pattern_24[y * 8 + x] &= ~(0xff << color); - virge->s3d.pattern_24[y * 8 + x] |= ((val >> byte) & 0xff) << color; - } - - x = (addr >> 2) & 7; - y = (addr >> 5) & 7; - virge->s3d.pattern_32[y * 8 + x] = val & 0xffffff; - } - break; - - case 0xa4d4: /*2D BitBLT SRC Base*/ - case 0xa8d4: /*2D Line SRC Base*/ - case 0xacd4: /*2D Polygon SRC Base*/ - virge->s3d.src_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - s3_virge_log("PortWrite = %04x, SRC Base = %08x, memsize = %i\n", addr & 0xfffc, val, virge->memory_size); - break; - case 0xa4d8: /*2D BitBLT DEST Base*/ - case 0xa8d8: /*2D Line DEST Base*/ - case 0xacd8: /*2D Polygon DEST Base*/ - virge->s3d.dest_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - s3_virge_log("PortWrite = %04x, DST Base = %08x, memsize = %i\n", addr & 0xfffc, val, virge->memory_size); - break; - case 0xa4dc: /*2D BitBLT Left/Right Clipping*/ - case 0xa8dc: /*2D Line Left/Right Clipping*/ - case 0xacdc: /*2D Polygon Left/Right Clipping*/ - virge->s3d.clip_l = (val >> 16) & 0x7ff; - virge->s3d.clip_r = val & 0x7ff; - break; - case 0xa4e0: /*2D BitBLT Top/Bottom Clipping*/ - case 0xa8e0: /*2D Line Top/Bottom Clipping*/ - case 0xace0: /*2D Polygon Top/Bottom Clipping*/ - virge->s3d.clip_t = (val >> 16) & 0x7ff; - virge->s3d.clip_b = val & 0x7ff; - break; - case 0xa4e4: /*2D BitBLT DEST/SRC Stride*/ - case 0xa8e4: /*2D Line DEST/SRC Stride*/ - case 0xace4: /*2D Polygon DEST/SRC Stride*/ - virge->s3d.dest_str = (val >> 16) & 0xff8; - virge->s3d.src_str = val & 0xff8; - break; - case 0xa4e8: /*2D BitBLT Mono Pattern 0*/ - case 0xace8: /*2D Polygon Mono Pattern 0*/ - virge->s3d.mono_pat_0 = val; - break; - case 0xa4ec: /*2D BitBLT Mono Pattern 1*/ - case 0xacec: /*2D Polygon Mono Pattern 1*/ - virge->s3d.mono_pat_1 = val; - break; - case 0xa4f0: /*2D BitBLT Mono Pattern Background*/ - case 0xacf0: /*2D Polygon Mono Pattern Background*/ - virge->s3d.pat_bg_clr = val; - break; - case 0xa4f4: /*2D BitBLT Mono Pattern Foreground*/ - case 0xa8f4: /*2D Line Mono Pattern Foreground*/ - case 0xacf4: /*2D Polygon Mono Pattern Foreground*/ - virge->s3d.pat_fg_clr = val; - break; - case 0xa4f8: /*2D BitBLT SRC Background*/ - virge->s3d.src_bg_clr = val; - break; - case 0xa4fc: /*2D BitBLT SRC Foreground*/ - virge->s3d.src_fg_clr = val; - break; - case 0xa500: /*2D BitBLT Command Set*/ - case 0xa900: /*2D Line Command Set*/ - case 0xad00: /*2D Polygon Command Set*/ - virge->s3d.cmd_set = val; - if (!(val & CMD_SET_AE)) { - s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - case 0xa504: /*2D BitBLT Rectangle Width/Height*/ - virge->s3d.r_width = (val >> 16) & 0x7ff; - virge->s3d.r_height = val & 0x7ff; - break; - case 0xa508: /*2D BitBLT Rectangle SRC X/Y*/ - virge->s3d.rsrc_x = (val >> 16) & 0x7ff; - virge->s3d.rsrc_y = val & 0x7ff; - break; - case 0xa50c: /*2D BitBLT Rectangle DEST X/Y*/ - virge->s3d.rdest_x = (val >> 16) & 0x7ff; - virge->s3d.rdest_y = val & 0x7ff; - if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - case 0xa96c: /*2D Line Draw Endpoints*/ - virge->s3d.lxend0 = (val >> 16) & 0x7ff; - virge->s3d.lxend1 = val & 0x7ff; - break; - case 0xa970: /*2D Line X Delta*/ - virge->s3d.ldx = (int32_t) val; - break; - case 0xa974: /*2D Line X Start*/ - virge->s3d.lxstart = val; - break; - case 0xa978: /*2D Line Y Start*/ - virge->s3d.lystart = val & 0x7ff; - break; - case 0xa97c: /*2D Line Y Count*/ - virge->s3d.lycnt = val & 0x7ff; - virge->s3d.line_dir = val >> 31; - if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - - case 0xad68: /*2D Polygon Right X Delta*/ - virge->s3d.prdx = val; - break; - case 0xad6c: /*2D Polygon Right X Start*/ - virge->s3d.prxstart = val; - break; - case 0xad70: /*2D Polygon Left X Delta*/ - virge->s3d.pldx = val; - break; - case 0xad74: /*2D Polygon Left X Start*/ - virge->s3d.plxstart = val; - break; - case 0xad78: /*2D Polygon Y Start*/ - virge->s3d.pystart = val & 0x7ff; - break; - case 0xad7c: /*2D Polygon Y Count*/ - virge->s3d.pycnt = val & 0x300007ff; - if (virge->s3d.cmd_set & CMD_SET_AE) { - s3_virge_log("Start blit: WriteL addr = %04x, val = %08x.\n", addr & 0xfffc, val); - s3_virge_bitblt(virge, -1, 0); - } - break; - - case 0xb0d4: - case 0xb4d4: - virge->s3d_tri.z_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - break; - case 0xb0d8: - case 0xb4d8: - virge->s3d_tri.dest_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - break; - case 0xb0dc: - case 0xb4dc: - virge->s3d_tri.clip_l = (val >> 16) & 0x7ff; - virge->s3d_tri.clip_r = val & 0x7ff; - break; - case 0xb0e0: - case 0xb4e0: - virge->s3d_tri.clip_t = (val >> 16) & 0x7ff; - virge->s3d_tri.clip_b = val & 0x7ff; - break; - case 0xb0e4: - case 0xb4e4: - virge->s3d_tri.dest_str = (val >> 16) & 0xff8; - virge->s3d.src_str = val & 0xff8; - break; - case 0xb0e8: - case 0xb4e8: - virge->s3d_tri.z_str = val & 0xff8; - break; - case 0xb4ec: - virge->s3d_tri.tex_base = (virge->memory_size == 8) ? (val & 0x7ffff8) : (val & 0x3ffff8); - break; - case 0xb4f0: - virge->s3d_tri.tex_bdr_clr = val & 0xffffff; - break; - case 0xb0f4: - case 0xb4f4: - virge->s3d_tri.fog_b = val & 0xff; - virge->s3d_tri.fog_g = (val >> 8) & 0xff; - virge->s3d_tri.fog_r = (val >> 16) & 0xff; - break; - case 0xb100: - case 0xb500: - virge->s3d_tri.cmd_set = val; - if (!(val & CMD_SET_AE)) - queue_triangle(virge); - break; - case 0xb504: - virge->s3d_tri.tbv = val & 0xfffff; - break; - case 0xb508: - virge->s3d_tri.tbu = val & 0xfffff; - break; - case 0xb50c: - virge->s3d_tri.TdWdX = val; - break; - case 0xb510: - virge->s3d_tri.TdWdY = val; - break; - case 0xb514: - virge->s3d_tri.tws = val; - break; - case 0xb518: - virge->s3d_tri.TdDdX = val; - break; - case 0xb51c: - virge->s3d_tri.TdVdX = val; - break; - case 0xb520: - virge->s3d_tri.TdUdX = val; - break; - case 0xb524: - virge->s3d_tri.TdDdY = val; - break; - case 0xb528: - virge->s3d_tri.TdVdY = val; - break; - case 0xb52c: - virge->s3d_tri.TdUdY = val; - break; - case 0xb530: - virge->s3d_tri.tds = val; - break; - case 0xb534: - virge->s3d_tri.tvs = val; - break; - case 0xb538: - virge->s3d_tri.tus = val; - break; - case 0xb53c: - virge->s3d_tri.TdGdX = val >> 16; - virge->s3d_tri.TdBdX = val & 0xffff; - break; - case 0xb540: - virge->s3d_tri.TdAdX = val >> 16; - virge->s3d_tri.TdRdX = val & 0xffff; - break; - case 0xb544: - virge->s3d_tri.TdGdY = val >> 16; - virge->s3d_tri.TdBdY = val & 0xffff; - break; - case 0xb548: - virge->s3d_tri.TdAdY = val >> 16; - virge->s3d_tri.TdRdY = val & 0xffff; - break; - case 0xb54c: - virge->s3d_tri.tgs = (val >> 16) & 0xffff; - virge->s3d_tri.tbs = val & 0xffff; - break; - case 0xb550: - virge->s3d_tri.tas = (val >> 16) & 0xffff; - virge->s3d_tri.trs = val & 0xffff; - break; - - case 0xb554: - virge->s3d_tri.TdZdX = val; - break; - case 0xb558: - virge->s3d_tri.TdZdY = val; - break; - case 0xb55c: - virge->s3d_tri.tzs = val; - break; - case 0xb560: - virge->s3d_tri.TdXdY12 = val; - break; - case 0xb564: - virge->s3d_tri.txend12 = val; - break; - case 0xb568: - virge->s3d_tri.TdXdY01 = val; - break; - case 0xb56c: - virge->s3d_tri.txend01 = val; - break; - case 0xb570: - virge->s3d_tri.TdXdY02 = val; - break; - case 0xb574: - virge->s3d_tri.txs = val; - break; - case 0xb578: - virge->s3d_tri.tys = val; - break; - case 0xb57c: - virge->s3d_tri.ty01 = (val >> 16) & 0x7ff; - virge->s3d_tri.ty12 = val & 0x7ff; - virge->s3d_tri.tlr = val >> 31; - if (virge->s3d_tri.cmd_set & CMD_SET_AE) - queue_triangle(virge); - break; - - default: - break; - } - } -} - -static void -s3_virge_fifo_thread(void *priv) -{ - virge_t *virge = (virge_t *)priv; +fifo_thread(void *param) { + virge_t *virge = (virge_t *)param; while (virge->fifo_thread_run) { - s3_virge_log("Thread, entries=%d.\n", FIFO_ENTRIES); thread_set_event(virge->fifo_not_full_event); - //thread_wait_event(virge->wake_fifo_thread, -1); - //thread_reset_event(virge->wake_fifo_thread); + thread_wait_event(virge->wake_fifo_thread, -1); + thread_reset_event(virge->wake_fifo_thread); virge->virge_busy = 1; - s3_virge_log("Busy.\n"); while (!FIFO_EMPTY) { - s3_virge_log("COMMAND: RD=%d, WR=%d, addr=%04x, val=%08x.\n", virge->fifo_read_idx, virge->fifo_write_idx, virge->fifo[virge->fifo_read_idx].addr_type & FIFO_ADDR, virge->fifo[virge->fifo_read_idx].val); uint64_t start_time = plat_timer_read(); uint64_t end_time; fifo_entry_t *fifo = &virge->fifo[virge->fifo_read_idx & FIFO_MASK]; uint32_t val = fifo->val; - int op_type = (((fifo->addr_type & FIFO_ADDR) & 0x1c00) >> 10) - 1; switch (fifo->addr_type & FIFO_TYPE) { case FIFO_WRITE_BYTE: - s3_virge_mmio_fifo_write(fifo->addr_type & FIFO_ADDR, - val, virge); + if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) + s3_virge_bitblt(virge, 8, val); + else if (((fifo->addr_type & FIFO_ADDR) & 0xffff) == 0x859c) + virge->cmd_dma = val; break; case FIFO_WRITE_WORD: - s3_virge_mmio_fifo_write_w(fifo->addr_type & FIFO_ADDR, - val, virge); + if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) { + if (virge->s3d.cmd_set & CMD_SET_MS) + s3_virge_bitblt(virge, 16, ((val >> 8) | (val << 8)) << 16); + else + s3_virge_bitblt(virge, 16, val); + } else if (((fifo->addr_type & FIFO_ADDR) & 0xfffe) == 0x859c) + virge->cmd_dma = val; break; case FIFO_WRITE_DWORD: - s3_virge_mmio_fifo_write_l(fifo->addr_type & FIFO_ADDR, - val, virge); - break; - default: + if (((fifo->addr_type & FIFO_ADDR) & 0xfffc) < 0x8000) { + if (virge->s3d.cmd_set & CMD_SET_MS) + s3_virge_bitblt(virge, 32, + ((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | + ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24)); + else + s3_virge_bitblt(virge, 32, val); + } else { + switch ((fifo->addr_type & FIFO_ADDR) & 0xfffc) { + case 0x8590: + virge->cmd_dma_base = val; + break; + + case 0x8594: + virge->dma_ptr = val; + break; + + case 0x8598: + break; + + case 0x859c: + virge->cmd_dma = val; + break; + + case 0xa000 ... 0xa1fc: + { + int x = (fifo->addr_type & FIFO_ADDR) & 4; + int y = ((fifo->addr_type & FIFO_ADDR) >> 3) & 7; + int color; + int byte; + uint32_t addr = (fifo->addr_type & FIFO_ADDR); + virge->s3d.pattern_8[y * 8 + x] = val & 0xff; + virge->s3d.pattern_8[y * 8 + x + 1] = val >> 8; + virge->s3d.pattern_8[y * 8 + x + 2] = val >> 16; + virge->s3d.pattern_8[y * 8 + x + 3] = val >> 24; + + x = ((fifo->addr_type & FIFO_ADDR) >> 1) & 6; + y = ((fifo->addr_type & FIFO_ADDR) >> 4) & 7; + virge->s3d.pattern_16[y * 8 + x] = val & 0xffff; + virge->s3d.pattern_16[y * 8 + x + 1] = val >> 16; + + addr &= 0x00ff; + for (uint8_t xx = 0; xx < 4; xx++) { + x = ((addr + xx) / 3) % 8; + y = ((addr + xx) / 24) % 8; + color = ((addr + xx) % 3) << 3; + byte = (xx << 3); + virge->s3d.pattern_24[y * 8 + x] &= ~(0xff << color); + virge->s3d.pattern_24[y * 8 + x] |= ((val >> byte) & 0xff) << color; + } + + x = ((fifo->addr_type & FIFO_ADDR) >> 2) & 7; + y = ((fifo->addr_type & FIFO_ADDR) >> 5) & 7; + virge->s3d.pattern_32[y * 8 + x] = val & 0xffffff; + } break; + + case 0xa4d4: + case 0xa8d4: + case 0xacd4: + virge->s3d.src_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xa4d8: + case 0xa8d8: + case 0xacd8: + virge->s3d.dest_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xa4dc: + case 0xa8dc: + case 0xacdc: + virge->s3d.clip_l = (val >> 16) & 0x7ff; + virge->s3d.clip_r = val & 0x7ff; + break; + case 0xa4e0: + case 0xa8e0: + case 0xace0: + virge->s3d.clip_t = (val >> 16) & 0x7ff; + virge->s3d.clip_b = val & 0x7ff; + break; + case 0xa4e4: + case 0xa8e4: + case 0xace4: + virge->s3d.dest_str = (val >> 16) & 0xff8; + virge->s3d.src_str = val & 0xff8; + break; + case 0xa4e8: + case 0xace8: + virge->s3d.mono_pat_0 = val; + break; + case 0xa4ec: + case 0xacec: + virge->s3d.mono_pat_1 = val; + break; + case 0xa4f0: + case 0xacf0: + virge->s3d.pat_bg_clr = val; + break; + case 0xa4f4: + case 0xa8f4: + case 0xacf4: + virge->s3d.pat_fg_clr = val; + break; + case 0xa4f8: + virge->s3d.src_bg_clr = val; + break; + case 0xa4fc: + virge->s3d.src_fg_clr = val; + break; + case 0xa500: + case 0xa900: + case 0xad00: + virge->s3d.cmd_set = val; + if (!(val & CMD_SET_AE)) + s3_virge_bitblt(virge, -1, 0); + break; + case 0xa504: + virge->s3d.r_width = (val >> 16) & 0x7ff; + virge->s3d.r_height = val & 0x7ff; + break; + case 0xa508: + virge->s3d.rsrc_x = (val >> 16) & 0x7ff; + virge->s3d.rsrc_y = val & 0x7ff; + break; + case 0xa50c: + virge->s3d.rdest_x = (val >> 16) & 0x7ff; + virge->s3d.rdest_y = val & 0x7ff; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + case 0xa96c: + virge->s3d.lxend0 = (val >> 16) & 0x7ff; + virge->s3d.lxend1 = val & 0x7ff; + break; + case 0xa970: + virge->s3d.ldx = (int32_t)val; + break; + case 0xa974: + virge->s3d.lxstart = val; + break; + case 0xa978: + virge->s3d.lystart = val & 0x7ff; + break; + case 0xa97c: + virge->s3d.lycnt = val & 0x7ff; + virge->s3d.line_dir = val >> 31; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + + case 0xad68: + virge->s3d.prdx = val; + break; + case 0xad6c: + virge->s3d.prxstart = val; + break; + case 0xad70: + virge->s3d.pldx = val; + break; + case 0xad74: + virge->s3d.plxstart = val; + break; + case 0xad78: + virge->s3d.pystart = val & 0x7ff; + break; + case 0xad7c: + virge->s3d.pycnt = val & 0x300007ff; + if (virge->s3d.cmd_set & CMD_SET_AE) + s3_virge_bitblt(virge, -1, 0); + break; + + case 0xb0d4: + case 0xb4d4: + virge->s3d_tri.z_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xb0d8: + case 0xb4d8: + virge->s3d_tri.dest_base = val & ((virge->memory_size == 8) ? + (val & 0x7ffff8) : (val & 0x3ffff8)); + break; + case 0xb0dc: + case 0xb4dc: + virge->s3d_tri.clip_l = (val >> 16) & 0x7ff; + virge->s3d_tri.clip_r = val & 0x7ff; + break; + case 0xb0e0: + case 0xb4e0: + virge->s3d_tri.clip_t = (val >> 16) & 0x7ff; + virge->s3d_tri.clip_b = val & 0x7ff; + break; + case 0xb0e4: + case 0xb4e4: + virge->s3d_tri.dest_str = (val >> 16) & 0xff8; + virge->s3d.src_str = val & 0xff8; + break; + case 0xb0e8: + case 0xb4e8: + virge->s3d_tri.z_str = val & 0xff8; + break; + case 0xb4ec: + virge->s3d_tri.tex_base = val & 0x3ffff8; + break; + case 0xb4f0: + virge->s3d_tri.tex_bdr_clr = val & 0xffffff; + break; + case 0xb0f4: + case 0xb4f4: + virge->s3d_tri.fog_b = val & 0xff; + virge->s3d_tri.fog_g = (val >> 8) & 0xff; + virge->s3d_tri.fog_r = (val >> 16) & 0xff; + break; + case 0xb100: + case 0xb500: + virge->s3d_tri.cmd_set = val; + if (!(val & CMD_SET_AE)) + queue_triangle(virge); + break; + case 0xb504: + virge->s3d_tri.tbv = val & 0xfffff; + break; + case 0xb508: + virge->s3d_tri.tbu = val & 0xfffff; + break; + case 0xb50c: + virge->s3d_tri.TdWdX = val; + break; + case 0xb510: + virge->s3d_tri.TdWdY = val; + break; + case 0xb514: + virge->s3d_tri.tws = val; + break; + case 0xb518: + virge->s3d_tri.TdDdX = val; + break; + case 0xb51c: + virge->s3d_tri.TdVdX = val; + break; + case 0xb520: + virge->s3d_tri.TdUdX = val; + break; + case 0xb524: + virge->s3d_tri.TdDdY = val; + break; + case 0xb528: + virge->s3d_tri.TdVdY = val; + break; + case 0xb52c: + virge->s3d_tri.TdUdY = val; + break; + case 0xb530: + virge->s3d_tri.tds = val; + break; + case 0xb534: + virge->s3d_tri.tvs = val; + break; + case 0xb538: + virge->s3d_tri.tus = val; + break; + case 0xb53c: + virge->s3d_tri.TdGdX = val >> 16; + virge->s3d_tri.TdBdX = val & 0xffff; + break; + case 0xb540: + virge->s3d_tri.TdAdX = val >> 16; + virge->s3d_tri.TdRdX = val & 0xffff; + break; + case 0xb544: + virge->s3d_tri.TdGdY = val >> 16; + virge->s3d_tri.TdBdY = val & 0xffff; + break; + case 0xb548: + virge->s3d_tri.TdAdY = val >> 16; + virge->s3d_tri.TdRdY = val & 0xffff; + break; + case 0xb54c: + virge->s3d_tri.tgs = (val >> 16) & 0xffff; + virge->s3d_tri.tbs = val & 0xffff; + break; + case 0xb550: + virge->s3d_tri.tas = (val >> 16) & 0xffff; + virge->s3d_tri.trs = val & 0xffff; + break; + + case 0xb554: + virge->s3d_tri.TdZdX = val; + break; + case 0xb558: + virge->s3d_tri.TdZdY = val; + break; + case 0xb55c: + virge->s3d_tri.tzs = val; + break; + case 0xb560: + virge->s3d_tri.TdXdY12 = val; + break; + case 0xb564: + virge->s3d_tri.txend12 = val; + break; + case 0xb568: + virge->s3d_tri.TdXdY01 = val; + break; + case 0xb56c: + virge->s3d_tri.txend01 = val; + break; + case 0xb570: + virge->s3d_tri.TdXdY02 = val; + break; + case 0xb574: + virge->s3d_tri.txs = val; + break; + case 0xb578: + virge->s3d_tri.tys = val; + break; + case 0xb57c: + virge->s3d_tri.ty01 = (val >> 16) & 0x7ff; + virge->s3d_tri.ty12 = val & 0x7ff; + virge->s3d_tri.tlr = val >> 31; + if (virge->s3d_tri.cmd_set & CMD_SET_AE) + queue_triangle(virge); + break; + } + } break; } virge->fifo_read_idx++; fifo->addr_type = FIFO_INVALID; - if (op_type) { - if (FIFO_ENTRIES > 0xe000) - thread_set_event(virge->fifo_not_full_event); - } else { - thread_set_event(virge->fifo_not_full_event); - if (((fifo->addr_type & FIFO_ADDR) & 0xffff) >= 0x8000) - virge->fifo_read_idx &= FIFO_MASK; - } + if (FIFO_ENTRIES > 0xe000) + thread_set_event(virge->fifo_not_full_event); - end_time = plat_timer_read(); - virge->blitter_time += end_time - start_time; - } - virge->virge_busy = 0; - virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; - s3_virge_update_irqs(virge); + end_time = plat_timer_read(); + virge_time += end_time - start_time; + } + virge->virge_busy = 0; + virge->subsys_stat |= INT_FIFO_EMP | INT_3DF_EMP; + s3_virge_update_irqs(virge); } } static void -s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) -{ - int op_type = ((addr & 0x1c00) >> 10) - 1; +s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) { fifo_entry_t *fifo = &virge->fifo[virge->fifo_write_idx & FIFO_MASK]; if (FIFO_FULL) { @@ -2039,373 +1817,357 @@ s3_virge_queue(virge_t *virge, uint32_t addr, uint32_t val, uint32_t type) virge->fifo_write_idx++; - if (op_type) { - if (FIFO_ENTRIES > 0xe000) - s3_virge_wake_fifo_thread(virge); - if ((FIFO_ENTRIES > 0xe000) || (FIFO_ENTRIES < 8)) - s3_virge_wake_fifo_thread(virge); - } else { - if (FIFO_ENTRIES < 16) { - s3_virge_wake_fifo_thread(virge); - s3_virge_log("FIFO ENTRIES=%d.\n", FIFO_ENTRIES); - } else { - s3_virge_log("FIFO FULL.\n"); - thread_reset_event(virge->fifo_not_full_event); - thread_wait_event(virge->fifo_not_full_event, -1); /*Wait for room in ringbuffer*/ - } - if (((addr & 0xffff) >= 0x8000)) { - if (virge->fifo_write_idx == FIFO_SIZE) - virge->fifo_write_idx = 1; - } - } + if (FIFO_ENTRIES > 0xe000) + wake_fifo_thread(virge); + if (FIFO_ENTRIES > 0xe000 || FIFO_ENTRIES < 8) + wake_fifo_thread(virge); } static void -s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv) -{ +s3_virge_mmio_write(uint32_t addr, uint8_t val, void *priv) { virge_t *virge = (virge_t *) priv; - s3_virge_log("MMIO WriteB addr = %04x, val = %02x\n", addr & 0xffff, val); - if ((addr & 0xffff) < 0x8000) + + if ((addr & 0xfffc) < 0x8000) s3_virge_queue(virge, addr, val, FIFO_WRITE_BYTE); - else { - switch (addr & 0xffff) { - case 0x83b0: - case 0x83b1: - case 0x83b2: - case 0x83b3: - case 0x83b4: - case 0x83b5: - case 0x83b6: - case 0x83b7: - case 0x83b8: - case 0x83b9: - case 0x83ba: - case 0x83bb: - case 0x83bc: - case 0x83bd: - case 0x83be: - case 0x83bf: - case 0x83c0: - case 0x83c1: - case 0x83c2: - case 0x83c3: - case 0x83c4: - case 0x83c5: - case 0x83c6: - case 0x83c7: - case 0x83c8: - case 0x83c9: - case 0x83ca: - case 0x83cb: - case 0x83cc: - case 0x83cd: - case 0x83ce: - case 0x83cf: - case 0x83d0: - case 0x83d1: - case 0x83d2: - case 0x83d3: - case 0x83d4: - case 0x83d5: - case 0x83d6: - case 0x83d7: - case 0x83d8: - case 0x83d9: - case 0x83da: - case 0x83db: - case 0x83dc: - case 0x83dd: - case 0x83de: - case 0x83df: - s3_virge_out(addr & 0x3ff, val, virge); - break; + else switch (addr & 0xffff) { + case 0x83b0 ... 0x83df: + s3_virge_out(addr & 0x3ff, val, priv); + break; - case 0xff20: - virge->serialport = val; - i2c_gpio_set(virge->i2c, !!(val & SERIAL_PORT_SCW), !!(val & SERIAL_PORT_SDW)); - break; - - default: - break; - } + case 0xff20: + virge->serialport = val; + i2c_gpio_set(virge->i2c, !!(val & SERIAL_PORT_SCW), !!(val & SERIAL_PORT_SDW)); + break; } } static void -s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv) -{ +s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *priv) { virge_t *virge = (virge_t *) priv; - s3_virge_log("[%04X:%08X]: MMIO WriteW addr = %04x, val = %04x\n", CS, cpu_state.pc, addr & 0xfffe, val); - if ((addr & 0xfffe) < 0x8000) + + if ((addr & 0xfffc) < 0x8000) s3_virge_queue(virge, addr, val, FIFO_WRITE_WORD); - else { - if ((addr & 0xfffe) == 0x83d4) { - s3_virge_mmio_write(addr, val, virge); - s3_virge_mmio_write(addr + 1, val >> 8, virge); - } else if ((addr & 0xfffe) == 0xff20) - s3_virge_mmio_write(addr, val, virge); + else switch (addr & 0xfffe) { + case 0x83d4: + s3_virge_mmio_write(addr, val, priv); + s3_virge_mmio_write(addr + 1, val >> 8, priv); + break; + + case 0xff20: + s3_virge_mmio_write(addr, val, priv); + break; } } static void -s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) -{ +s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *priv) { virge_t *virge = (virge_t *) priv; - svga_t *svga = &virge->svga; + svga_t * svga = &virge->svga; - s3_virge_log("[%04X:%08X]: MMIO WriteL addr = %04x, val = %04x\n", CS, cpu_state.pc, addr & 0xfffc, val); - if (((addr & 0xfffc) < 0x8000) || ((addr & 0xe000) == 0xa000)) + if ((addr & 0xfffc) < 0x8000) s3_virge_queue(virge, addr, val, FIFO_WRITE_DWORD); - else { - switch (addr & 0xfffc) { - case 0x8180: - virge->streams.pri_ctrl = val; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x8184: - virge->streams.chroma_ctrl = val; - break; - case 0x8190: - virge->streams.sec_ctrl = val; - virge->streams.dda_horiz_accumulator = val & 0xfff; - if (val & (1 << 11)) - virge->streams.dda_horiz_accumulator |= 0xfffff800; - virge->streams.sdif = (val >> 24) & 7; - break; - case 0x8194: - virge->streams.chroma_upper_bound = val; - break; - case 0x8198: - virge->streams.sec_filter = val; - virge->streams.k1_horiz_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k1_horiz_scale |= 0xfffff800; - virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff; - if ((val >> 16) & (1 << 10)) - virge->streams.k2_horiz_scale |= 0xfffff800; - break; - case 0x81a0: - virge->streams.blend_ctrl = val; - svga_recalctimings(svga); - break; - case 0x81c0: - virge->streams.pri_fb0 = val & 0x7fffff; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81c4: - virge->streams.pri_fb1 = val & 0x7fffff; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81c8: - virge->streams.pri_stride = val & 0xfff; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81cc: - virge->streams.buffer_ctrl = val; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81d0: - virge->streams.sec_fb0 = val; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81d4: - virge->streams.sec_fb1 = val; - s3_virge_update_buffer(virge); - svga->fullchange = changeframecount; - break; - case 0x81d8: - virge->streams.sec_stride = val; - svga->fullchange = changeframecount; - break; - case 0x81dc: - virge->streams.overlay_ctrl = val; - break; - case 0x81e0: - virge->streams.k1_vert_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k1_vert_scale |= 0xfffff800; - break; - case 0x81e4: - virge->streams.k2_vert_scale = val & 0x7ff; - if (val & (1 << 10)) - virge->streams.k2_vert_scale |= 0xfffff800; - break; - case 0x81e8: - virge->streams.dda_vert_accumulator = val & 0xfff; - if (val & (1 << 11)) - virge->streams.dda_vert_accumulator |= 0xfffff800; - break; - case 0x81ec: - virge->streams.fifo_ctrl = val; - break; - case 0x81f0: - virge->streams.pri_start = val; - virge->streams.pri_x = (val >> 16) & 0x7ff; - virge->streams.pri_y = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81f4: - virge->streams.pri_size = val; - virge->streams.pri_w = (val >> 16) & 0x7ff; - virge->streams.pri_h = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81f8: - virge->streams.sec_start = val; - virge->streams.sec_x = (val >> 16) & 0x7ff; - virge->streams.sec_y = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; - case 0x81fc: - virge->streams.sec_size = val; - virge->streams.sec_w = (val >> 16) & 0x7ff; - virge->streams.sec_h = val & 0x7ff; - svga_recalctimings(svga); - svga->fullchange = changeframecount; - break; + else if ((addr & 0xe000) == 0xa000) + s3_virge_queue(virge, addr, val, FIFO_WRITE_DWORD); + else switch (addr & 0xfffc) { + case 0x8180: + virge->streams.pri_ctrl = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x8184: + virge->streams.chroma_ctrl = val; + break; + case 0x8190: + virge->streams.sec_ctrl = val; + virge->streams.dda_horiz_accumulator = val & 0xfff; + if (val & (1 << 11)) + virge->streams.dda_horiz_accumulator |= 0xfffff800; + virge->streams.sdif = (val >> 24) & 7; + break; + case 0x8194: + virge->streams.chroma_upper_bound = val; + break; + case 0x8198: + virge->streams.sec_filter = val; + virge->streams.k1_horiz_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k1_horiz_scale |= 0xfffff800; + virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff; + if ((val >> 16) & (1 << 10)) + virge->streams.k2_horiz_scale |= 0xfffff800; + break; + case 0x81a0: + virge->streams.blend_ctrl = val; + break; + case 0x81c0: + virge->streams.pri_fb0 = val & 0x3fffff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81c4: + virge->streams.pri_fb1 = val & 0x3fffff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81c8: + virge->streams.pri_stride = val & 0xfff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81cc: + virge->streams.buffer_ctrl = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81d0: + virge->streams.sec_fb0 = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81d4: + virge->streams.sec_fb1 = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81d8: + virge->streams.sec_stride = val; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81dc: + virge->streams.overlay_ctrl = val; + break; + case 0x81e0: + virge->streams.k1_vert_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k1_vert_scale |= 0xfffff800; + break; + case 0x81e4: + virge->streams.k2_vert_scale = val & 0x7ff; + if (val & (1 << 10)) + virge->streams.k2_vert_scale |= 0xfffff800; + break; + case 0x81e8: + virge->streams.dda_vert_accumulator = val & 0xfff; + if (val & (1 << 11)) + virge->streams.dda_vert_accumulator |= 0xfffff800; + break; + case 0x81ec: + virge->streams.fifo_ctrl = val; + break; + case 0x81f0: + virge->streams.pri_start = val; + virge->streams.pri_x = (val >> 16) & 0x7ff; + virge->streams.pri_y = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81f4: + virge->streams.pri_size = val; + virge->streams.pri_w = (val >> 16) & 0x7ff; + virge->streams.pri_h = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81f8: + virge->streams.sec_start = val; + virge->streams.sec_x = (val >> 16) & 0x7ff; + virge->streams.sec_y = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; + case 0x81fc: + virge->streams.sec_size = val; + virge->streams.sec_w = (val >> 16) & 0x7ff; + virge->streams.sec_h = val & 0x7ff; + svga_recalctimings(svga); + svga->fullchange = changeframecount; + break; - case 0x8504: - virge->subsys_stat &= ~(val & 0xff); - virge->subsys_cntl = (val >> 8); - s3_virge_update_irqs(virge); - break; + case 0x8504: + virge->subsys_stat &= ~(val & 0xff); + virge->subsys_cntl = (val >> 8); + s3_virge_update_irqs(virge); + break; - case 0x850c: - virge->advfunc_cntl = val & 0xff; - s3_virge_updatemapping(virge); - break; + case 0x850c: + virge->advfunc_cntl = val & 0xff; + s3_virge_updatemapping(virge); + break; - case 0xff20: - s3_virge_mmio_write(addr, val, virge); - break; - - default: - s3_virge_log("Actual WriteL=%04x.\n", addr & 0xfffc); - break; - } + case 0xff20: + s3_virge_mmio_write(addr, val, priv); + break; } } -#define READ(addr, val) \ - { \ - switch (bpp) { \ - case 0: /*8 bpp*/ \ - val = vram[addr & virge->vram_mask]; \ - break; \ - case 1: /*16 bpp*/ \ - val = *(uint16_t *) &vram[addr & virge->vram_mask]; \ - break; \ - case 2: /*24 bpp*/ \ - val = (*(uint32_t *) &vram[addr & virge->vram_mask]) & 0xffffff; \ - break; \ - } \ - } +#define READ(addr, val) \ + do { \ + switch (bpp) { \ + case 0: /*8 bpp*/ \ + val = vram[addr & svga->vram_mask]; \ + break; \ + case 1: /*16 bpp*/ \ + val = *(uint16_t *)&vram[addr & svga->vram_mask]; \ + break; \ + case 2: /*24 bpp*/ \ + val = (*(uint32_t *)&vram[addr & svga->vram_mask]) & 0xffffff; \ + break; \ + } \ + } while (0) -#define CLIP(x, y) \ - { \ - if ((virge->s3d.cmd_set & CMD_SET_HC) && (x < virge->s3d.clip_l || x > virge->s3d.clip_r || y < virge->s3d.clip_t || y > virge->s3d.clip_b)) \ - update = 0; \ - } +#define Z_READ(addr) *(uint16_t *)&vram[addr & svga->vram_mask] -#define CLIP_3D(x, y) \ - { \ - if ((s3d_tri->cmd_set & CMD_SET_HC) && (x < s3d_tri->clip_l || x > s3d_tri->clip_r || y < s3d_tri->clip_t || y > s3d_tri->clip_b)) \ - update = 0; \ - } +#define Z_WRITE(addr, val) \ + if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) \ + *(uint16_t *)&vram[addr & svga->vram_mask] = val -#define MIX() \ - { \ - int c; \ - for (c = 0; c < 24; c++) { \ - int d = (dest & (1 << c)) ? 1 : 0; \ - if (source & (1 << c)) \ - d |= 2; \ - if (pattern & (1 << c)) \ - d |= 4; \ - if (virge->s3d.rop & (1 << d)) \ - out |= (1 << c); \ - } \ - } +#define CLIP(x, y) \ + do { \ + if ((virge->s3d.cmd_set & CMD_SET_HC) && \ + (x < virge->s3d.clip_l || x > virge->s3d.clip_r || \ + y < virge->s3d.clip_t || y > virge->s3d.clip_b)) \ + update = 0; \ + } while (0) -#define WRITE(addr, val) \ - { \ - switch (bpp) { \ - case 0: /*8 bpp*/ \ - vram[addr & virge->vram_mask] = val; \ - svga->changedvram[(addr & virge->vram_mask) >> 12] = changeframecount; \ - break; \ - case 1: /*16 bpp*/ \ - *(uint16_t *) &vram[addr & virge->vram_mask] = val; \ - svga->changedvram[(addr & virge->vram_mask) >> 12] = changeframecount; \ - break; \ - case 2: /*24 bpp*/ \ - *(uint32_t *) &vram[addr & virge->vram_mask] = (val & 0xffffff) | (vram[(addr + 3) & virge->vram_mask] << 24); \ - svga->changedvram[(addr & virge->vram_mask) >> 12] = changeframecount; \ - break; \ - } \ - } +#define CLIP_3D(x, y) \ + do { \ + if ((s3d_tri->cmd_set & CMD_SET_HC) && (x < s3d_tri->clip_l || \ + x > s3d_tri->clip_r || y < s3d_tri->clip_t || \ + y > s3d_tri->clip_b)) \ + update = 0; \ + } while (0) + +#define Z_CLIP(Zzb, Zs) \ + do { \ + if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) \ + switch ((s3d_tri->cmd_set >> 20) & 7) { \ + case 0: \ + update = 0; \ + break; \ + case 1: \ + if (Zs <= Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 2: \ + if (Zs != Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 3: \ + if (Zs < Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 4: \ + if (Zs >= Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 5: \ + if (Zs == Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 6: \ + if (Zs > Zzb) \ + update = 0; \ + else \ + Zzb = Zs; \ + break; \ + case 7: \ + update = 1; \ + Zzb = Zs; \ + break; \ + } \ + } while (0) + +#define MIX() \ + do { \ + int c; \ + for (c = 0; c < 24; c++) { \ + int d = (dest & (1 << c)) ? 1 : 0; \ + if (source & (1 << c)) \ + d |= 2; \ + if (pattern & (1 << c)) \ + d |= 4; \ + if (virge->s3d.rop & (1 << d)) \ + out |= (1 << c); \ + } \ + } while (0) + +#define WRITE(addr, val) \ + do { \ + switch (bpp) { \ + case 0: /*8 bpp*/ \ + vram[addr & svga->vram_mask] = val; \ + virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + changeframecount; \ + break; \ + case 1: /*16 bpp*/ \ + *(uint16_t *)&vram[addr & svga->vram_mask] = val; \ + virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + changeframecount; \ + break; \ + case 2: /*24 bpp*/ \ + *(uint32_t *)&vram[addr & svga->vram_mask] = (val & 0xffffff) |\ + (vram[(addr + 3) & svga->vram_mask] << 24); \ + virge->svga.changedvram[(addr & svga->vram_mask) >> 12] = \ + changeframecount; \ + break; \ + } \ + } while (0) static void -s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) -{ - svga_t *svga = &virge->svga; - uint8_t *vram = svga->vram; - uint32_t mono_pattern[64]; - int count_mask; - int x_inc = (virge->s3d.cmd_set & CMD_SET_XP) ? 1 : -1; - int y_inc = (virge->s3d.cmd_set & CMD_SET_YP) ? 1 : -1; - int bpp; - int x_mul; - int cpu_dat_shift; - const uint32_t *pattern_data; - uint32_t src_fg_clr; - uint32_t src_bg_clr; - uint32_t src_addr; - uint32_t dest_addr; - uint32_t source = 0; - uint32_t dest = 0; - uint32_t pattern; - uint32_t out = 0; - int update; +s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) { + svga_t *svga = &virge->svga; + uint8_t *vram = virge->svga.vram; + uint32_t mono_pattern[64]; + int count_mask; + int x_inc = (virge->s3d.cmd_set & CMD_SET_XP) ? 1 : -1; + int y_inc = (virge->s3d.cmd_set & CMD_SET_YP) ? 1 : -1; + int bpp; + int x_mul; + int cpu_dat_shift; + uint32_t *pattern_data; + uint32_t src_fg_clr; + uint32_t src_bg_clr; switch (virge->s3d.cmd_set & CMD_SET_FORMAT_MASK) { case CMD_SET_FORMAT_8: - bpp = 0; - x_mul = 1; + bpp = 0; + x_mul = 1; cpu_dat_shift = 8; - pattern_data = virge->s3d.pattern_8; - src_fg_clr = virge->s3d.src_fg_clr & 0xff; - src_bg_clr = virge->s3d.src_bg_clr & 0xff; + pattern_data = virge->s3d.pattern_8; + src_fg_clr = virge->s3d.src_fg_clr & 0xff; + src_bg_clr = virge->s3d.src_bg_clr & 0xff; break; case CMD_SET_FORMAT_16: - bpp = 1; - x_mul = 2; + bpp = 1; + x_mul = 2; cpu_dat_shift = 16; - pattern_data = virge->s3d.pattern_16; - src_fg_clr = virge->s3d.src_fg_clr & 0xffff; - src_bg_clr = virge->s3d.src_bg_clr & 0xffff; + pattern_data = virge->s3d.pattern_16; + src_fg_clr = virge->s3d.src_fg_clr & 0xffff; + src_bg_clr = virge->s3d.src_bg_clr & 0xffff; break; case CMD_SET_FORMAT_24: default: - bpp = 2; - x_mul = 3; + bpp = 2; + x_mul = 3; cpu_dat_shift = 24; - pattern_data = virge->s3d.pattern_24; - src_fg_clr = virge->s3d.src_fg_clr; - src_bg_clr = virge->s3d.src_bg_clr; + pattern_data = virge->s3d.pattern_24; + src_fg_clr = virge->s3d.src_fg_clr; + src_bg_clr = virge->s3d.src_bg_clr; break; - } - if (virge->s3d.cmd_set & CMD_SET_MP) - pattern_data = mono_pattern; + } + if (virge->s3d.cmd_set & CMD_SET_MP) + pattern_data = mono_pattern; switch (virge->s3d.cmd_set & CMD_SET_ITA_MASK) { case CMD_SET_ITA_BYTE: @@ -2420,55 +2182,51 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) break; } if (virge->s3d.cmd_set & CMD_SET_MP) { - for (uint8_t y = 0; y < 4; y++) { - for (uint8_t x = 0; x < 8; x++) { - if (virge->s3d.mono_pat_0 & (1 << (x + y * 8))) - mono_pattern[y * 8 + (7 - x)] = virge->s3d.pat_fg_clr; - else - mono_pattern[y * 8 + (7 - x)] = virge->s3d.pat_bg_clr; - if (virge->s3d.mono_pat_1 & (1 << (x + y * 8))) - mono_pattern[(y + 4) * 8 + (7 - x)] = virge->s3d.pat_fg_clr; - else - mono_pattern[(y + 4) * 8 + (7 - x)] = virge->s3d.pat_bg_clr; - } + int x; + int y; + for (y = 0; y < 4; y++) { + for (x = 0; x < 8; x++) { + if (virge->s3d.mono_pat_0 & (1 << (x + y * 8))) + mono_pattern[y * 8 + x] = virge->s3d.pat_fg_clr; + else + mono_pattern[y * 8 + x] = virge->s3d.pat_bg_clr; + if (virge->s3d.mono_pat_1 & (1 << (x + y * 8))) + mono_pattern[(y + 4) * 8 + x] = virge->s3d.pat_fg_clr; + else + mono_pattern[(y + 4) * 8 + x] = virge->s3d.pat_bg_clr; + } } } - switch (virge->s3d.cmd_set & CMD_SET_COMMAND_MASK) { + case CMD_SET_COMMAND_NOP: + break; + case CMD_SET_COMMAND_BITBLT: if (count == -1) { - virge->s3d.src_x = virge->s3d.rsrc_x; - virge->s3d.src_y = virge->s3d.rsrc_y; - virge->s3d.dest_x = virge->s3d.rdest_x; - virge->s3d.dest_y = virge->s3d.rdest_y; - virge->s3d.w = virge->s3d.r_width; - virge->s3d.h = virge->s3d.r_height; - virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; + virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_y = virge->s3d.rsrc_y; + virge->s3d.dest_x = virge->s3d.rdest_x; + virge->s3d.dest_y = virge->s3d.rdest_y; + virge->s3d.w = virge->s3d.r_width; + virge->s3d.h = virge->s3d.r_height; + virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; virge->s3d.data_left_count = 0; - s3_virge_log("BitBlt start src_x=%i,src_y=%i,dest_x=%i,dest_y=%i,w=%i,h=%i,rop=%02X,src_base=%x,dest_base=%x\n", - virge->s3d.src_x, - virge->s3d.src_y, - virge->s3d.dest_x, - virge->s3d.dest_y, - virge->s3d.w, - virge->s3d.h, - virge->s3d.rop, - virge->s3d.src_base, - virge->s3d.dest_base); - if (virge->s3d.cmd_set & CMD_SET_IDS) return; } - if (!virge->s3d.h) return; - while (count) { - src_addr = virge->s3d.src_base + (virge->s3d.src_x * x_mul) + (virge->s3d.src_y * virge->s3d.src_str); - dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); - out = 0; - update = 1; + uint32_t src_addr = virge->s3d.src_base + (virge->s3d.src_x * x_mul) + + (virge->s3d.src_y * virge->s3d.src_str); + uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + + (virge->s3d.dest_y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; + uint32_t pattern; + uint32_t out = 0; + int update = 1; switch (virge->s3d.cmd_set & (CMD_SET_MS | CMD_SET_IDS)) { case 0: @@ -2486,18 +2244,18 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) count -= (cpu_dat_shift - virge->s3d.data_left_count); virge->s3d.data_left_count = 0; if (count < cpu_dat_shift) { - virge->s3d.data_left = cpu_dat; + virge->s3d.data_left = cpu_dat; virge->s3d.data_left_count = count; - count = 0; + count = 0; } } else { source = cpu_dat; cpu_dat >>= cpu_dat_shift; count -= cpu_dat_shift; if (count < cpu_dat_shift) { - virge->s3d.data_left = cpu_dat; + virge->s3d.data_left = cpu_dat; virge->s3d.data_left_count = count; - count = 0; + count = 0; } } if ((virge->s3d.cmd_set & CMD_SET_TP) && source == src_fg_clr) @@ -2510,9 +2268,6 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) cpu_dat <<= 1; count--; break; - - default: - break; } CLIP(virge->s3d.dest_x, virge->s3d.dest_y); @@ -2530,9 +2285,9 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) virge->s3d.dest_x += x_inc; virge->s3d.dest_x &= 0x7ff; if (!virge->s3d.w) { - virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_x = virge->s3d.rsrc_x; virge->s3d.dest_x = virge->s3d.rdest_x; - virge->s3d.w = virge->s3d.r_width; + virge->s3d.w = virge->s3d.r_width; virge->s3d.src_y += y_inc; virge->s3d.dest_y += y_inc; @@ -2549,9 +2304,6 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) cpu_dat <<= (count - (count & count_mask)); count &= count_mask; break; - - default: - break; } if (!virge->s3d.h) return; @@ -2563,27 +2315,23 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) case CMD_SET_COMMAND_RECTFILL: /*No source, pattern = pat_fg_clr*/ if (count == -1) { - virge->s3d.src_x = virge->s3d.rsrc_x; - virge->s3d.src_y = virge->s3d.rsrc_y; + virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_y = virge->s3d.rsrc_y; virge->s3d.dest_x = virge->s3d.rdest_x; virge->s3d.dest_y = virge->s3d.rdest_y; - virge->s3d.w = virge->s3d.r_width; - virge->s3d.h = virge->s3d.r_height; - virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; - - s3_virge_log("RctFll start %i,%i %i,%i %02X %08x\n", virge->s3d.dest_x, - virge->s3d.dest_y, - virge->s3d.w, - virge->s3d.h, - virge->s3d.rop, virge->s3d.dest_base); + virge->s3d.w = virge->s3d.r_width; + virge->s3d.h = virge->s3d.r_height; + virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; } while (count && virge->s3d.h) { - source = virge->s3d.pat_fg_clr; - dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); - pattern = virge->s3d.pat_fg_clr; - out = 0; - update = 1; + uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + + (virge->s3d.dest_y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; + uint32_t pattern = virge->s3d.pat_fg_clr; + uint32_t out = 0; + int update = 1; CLIP(virge->s3d.dest_x, virge->s3d.dest_y); @@ -2600,9 +2348,9 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) virge->s3d.dest_x += x_inc; virge->s3d.dest_x &= 0x7ff; if (!virge->s3d.w) { - virge->s3d.src_x = virge->s3d.rsrc_x; + virge->s3d.src_x = virge->s3d.rsrc_x; virge->s3d.dest_x = virge->s3d.rdest_x; - virge->s3d.w = virge->s3d.r_width; + virge->s3d.w = virge->s3d.r_width; virge->s3d.src_y += y_inc; virge->s3d.dest_y += y_inc; @@ -2610,8 +2358,7 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) if (!virge->s3d.h) return; } else - virge->s3d.w--; - + virge->s3d.w--; count--; } break; @@ -2620,8 +2367,8 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) if (count == -1) { virge->s3d.dest_x = virge->s3d.lxstart; virge->s3d.dest_y = virge->s3d.lystart; - virge->s3d.h = virge->s3d.lycnt; - virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; + virge->s3d.h = virge->s3d.lycnt; + virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; } while (virge->s3d.h) { int x; @@ -2630,7 +2377,8 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) x = virge->s3d.dest_x >> 20; - if (virge->s3d.h == virge->s3d.lycnt && ((virge->s3d.line_dir && x > virge->s3d.lxend0) || (!virge->s3d.line_dir && x < virge->s3d.lxend0))) + if (virge->s3d.h == virge->s3d.lycnt && ((virge->s3d.line_dir && x > virge->s3d.lxend0) || + (!virge->s3d.line_dir && x < virge->s3d.lxend0))) x = virge->s3d.lxend0; if (virge->s3d.h == 1) @@ -2642,17 +2390,22 @@ s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat) goto skip_line; do { - uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str); - uint32_t source = 0; - uint32_t dest = 0; + uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + + (virge->s3d.dest_y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; uint32_t pattern; - uint32_t out = 0; - int update = 1; + uint32_t out = 0; + int update = 1; - if ((virge->s3d.h == virge->s3d.lycnt || !first_pixel) && ((virge->s3d.line_dir && x < virge->s3d.lxend0) || (!virge->s3d.line_dir && x > virge->s3d.lxend0))) + if ((virge->s3d.h == virge->s3d.lycnt || !first_pixel) && + ((virge->s3d.line_dir && x < virge->s3d.lxend0) || + (!virge->s3d.line_dir && x > virge->s3d.lxend0))) update = 0; - if ((virge->s3d.h == 1 || !first_pixel) && ((virge->s3d.line_dir && x > virge->s3d.lxend1) || (!virge->s3d.line_dir && x < virge->s3d.lxend1))) + if ((virge->s3d.h == 1 || !first_pixel) && + ((virge->s3d.line_dir && x > virge->s3d.lxend1) || + (!virge->s3d.line_dir && x < virge->s3d.lxend1))) update = 0; CLIP(x, virge->s3d.dest_y); @@ -2686,68 +2439,71 @@ skip_line: virge->s3d.dest_r = virge->s3d.prxstart; if (virge->s3d.pycnt & (1 << 29)) virge->s3d.dest_l = virge->s3d.plxstart; - virge->s3d.h = virge->s3d.pycnt & 0x7ff; + virge->s3d.h = virge->s3d.pycnt & 0x7ff; virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff; while (virge->s3d.h) { - int x = virge->s3d.dest_l >> 20; - int xend = virge->s3d.dest_r >> 20; - int y = virge->s3d.pystart & 0x7ff; - int xdir = (x < xend) ? 1 : -1; - do { - uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (y * virge->s3d.dest_str); - uint32_t source = 0; - uint32_t dest = 0; - uint32_t pattern; - uint32_t out = 0; - int update = 1; + int x = virge->s3d.dest_l >> 20; + int xend = virge->s3d.dest_r >> 20; + int y = virge->s3d.pystart & 0x7ff; + int xdir = (x < xend) ? 1 : -1; + do { + uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (y * virge->s3d.dest_str); + uint32_t source = 0; + uint32_t dest; + uint32_t pattern; + uint32_t out = 0; + int update = 1; - CLIP(x, y); + CLIP(x, y); - if (update) { - READ(dest_addr, dest); - pattern = pattern_data[(y & 7) * 8 + (x & 7)]; - MIX(); + if (update) { + READ(dest_addr, dest); + pattern = pattern_data[(y & 7) * 8 + (x & 7)]; - WRITE(dest_addr, out); - } + MIX(); - x = (x + xdir) & 0x7ff; - } while (x != (xend + xdir)); + WRITE(dest_addr, out); + } - virge->s3d.dest_l += virge->s3d.pldx; - virge->s3d.dest_r += virge->s3d.prdx; - virge->s3d.h--; - virge->s3d.pystart = (virge->s3d.pystart - 1) & 0x7ff; + x = (x + xdir) & 0x7ff; + } while (x != (xend + xdir)); + + virge->s3d.dest_l += virge->s3d.pldx; + virge->s3d.dest_r += virge->s3d.prdx; + virge->s3d.h--; + virge->s3d.pystart = (virge->s3d.pystart - 1) & 0x7ff; } break; - case CMD_SET_COMMAND_NOP: - break; - default: - break; + fatal("s3_virge_bitblt : blit command %i %08x\n", + (virge->s3d.cmd_set >> 27) & 0xf, virge->s3d.cmd_set); } } -#define RGB15_TO_24(val, r, g, b) \ - b = ((val & 0x001f) << 3) | ((val & 0x001f) >> 2); \ - g = ((val & 0x03e0) >> 2) | ((val & 0x03e0) >> 7); \ - r = ((val & 0x7c00) >> 7) | ((val & 0x7c00) >> 12); +#define RGB15_TO_24(val, r, g, b) \ + b = ((val & 0x001f) << 3) | ((val & 0x001f) >> 2); \ + g = ((val & 0x03e0) >> 2) | ((val & 0x03e0) >> 7); \ + r = ((val & 0x7c00) >> 7) | ((val & 0x7c00) >> 12); -#define RGB24_TO_24(val, r, g, b) \ - b = val & 0xff; \ - g = (val & 0xff00) >> 8; \ - r = (val & 0xff0000) >> 16 +#define RGB24_TO_24(val, r, g, b) \ + b = val & 0xff; \ + g = (val & 0xff00) >> 8; \ + r = (val & 0xff0000) >> 16 -#define RGB15(r, g, b, dest) \ - if (virge->dithering_enabled) { \ - int add = dither[_y & 3][_x & 3]; \ - int _r = (r > 248) ? 248 : r + add; \ - int _g = (g > 248) ? 248 : g + add; \ - int _b = (b > 248) ? 248 : b + add; \ - dest = ((_b >> 3) & 0x1f) | (((_g >> 3) & 0x1f) << 5) | (((_r >> 3) & 0x1f) << 10); \ - } else \ - dest = ((b >> 3) & 0x1f) | (((g >> 3) & 0x1f) << 5) | (((r >> 3) & 0x1f) << 10) +#define RGB15(r, g, b, dest) \ + if (virge->dithering_enabled) { \ + int add = dither[_y & 3][_x & 3]; \ + int _r = (r > 248) ? 248 : r + add; \ + int _g = (g > 248) ? 248 : g + add; \ + int _b = (b > 248) ? 248 : b + add; \ + dest = ((_b >> 3) & 0x1f) | \ + (((_g >> 3) & 0x1f) << 5) | \ + (((_r >> 3) & 0x1f) << 10); \ + } else \ + dest = ((b >> 3) & 0x1f) | \ + (((g >> 3) & 0x1f) << 5) | \ + (((r >> 3) & 0x1f) << 10) #define RGB24(r, g, b) ((b) | ((g) << 8) | ((r) << 16)) @@ -2756,32 +2512,50 @@ typedef struct rgba_t { } rgba_t; typedef struct s3d_state_t { - int32_t r, g, b, a, u, v, d, w; + int32_t r; + int32_t g; + int32_t b; + int32_t a; + int32_t u; + int32_t v; + int32_t d; + int32_t w; - int32_t base_r, base_g, base_b, base_a, base_u, base_v, base_d, base_w; + int32_t base_r; + int32_t base_g; + int32_t base_b; + int32_t base_a; + int32_t base_u; + int32_t base_v; + int32_t base_d; + int32_t base_w; - uint32_t base_z; + uint32_t base_z; - uint32_t tbu, tbv; + uint32_t tbu; + uint32_t tbv; - uint32_t cmd_set; - int max_d; + uint32_t cmd_set; + int max_d; uint16_t *texture[10]; - uint32_t tex_bdr_clr; + uint32_t tex_bdr_clr; - int32_t x1, x2; - int y; + int32_t x1; + int32_t x2; - rgba_t dest_rgba; + int y; + + rgba_t dest_rgba; } s3d_state_t; typedef struct s3d_texture_state_t { - int level; - int texture_shift; + int level; + int texture_shift; - int32_t u, v; + int32_t u; + int32_t v; } s3d_texture_state_t; static void (*tex_read)(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out); @@ -2795,10 +2569,10 @@ static int _x; static int _y; static void -tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12); out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7); @@ -2807,10 +2581,10 @@ tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out } static void -tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) val = state->tex_bdr_clr; @@ -2822,10 +2596,10 @@ tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba } static void -tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8); out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4); @@ -2834,10 +2608,10 @@ tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out } static void -tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint16_t val = state->texture[texture_state->level][offset]; +tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint16_t val = state->texture[texture_state->level][offset]; if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) val = state->tex_bdr_clr; @@ -2849,21 +2623,22 @@ tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba } static void -tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint32_t val = ((uint32_t *) state->texture[texture_state->level])[offset]; +tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset]; out->r = (val >> 16) & 0xff; out->g = (val >> 8) & 0xff; out->b = val & 0xff; out->a = (val >> 24) & 0xff; } + static void -tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) -{ - int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); - uint32_t val = ((uint32_t *) state->texture[texture_state->level])[offset]; +tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) { + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); + uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset]; if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) val = state->tex_bdr_clr; @@ -2875,21 +2650,19 @@ tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba } static void -tex_sample_normal(s3d_state_t *state) -{ +tex_sample_normal(s3d_state_t *state) { s3d_texture_state_t texture_state; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = state->u + state->tbu; - texture_state.v = state->v + state->tbv; + texture_state.u = state->u + state->tbu; + texture_state.v = state->v + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_normal_filter(s3d_state_t *state) -{ +tex_sample_normal_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; int tex_offset; rgba_t tex_samples[4]; @@ -2897,9 +2670,9 @@ tex_sample_normal_filter(s3d_state_t *state) int dv; int d[4]; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = state->u + state->tbu; texture_state.v = state->v + state->tbv; @@ -2924,30 +2697,32 @@ tex_sample_normal_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_mipmap(s3d_state_t *state) -{ +tex_sample_mipmap(s3d_state_t *state) { s3d_texture_state_t texture_state; texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = state->u + state->tbu; - texture_state.v = state->v + state->tbv; + texture_state.u = state->u + state->tbu; + texture_state.v = state->v + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_mipmap_filter(s3d_state_t *state) -{ +tex_sample_mipmap_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; int tex_offset; rgba_t tex_samples[4]; @@ -2959,7 +2734,7 @@ tex_sample_mipmap_filter(s3d_state_t *state) if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = state->u + state->tbu; texture_state.v = state->v + state->tbv; @@ -2984,34 +2759,36 @@ tex_sample_mipmap_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_normal(s3d_state_t *state) -{ +tex_sample_persp_normal(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_normal_filter(s3d_state_t *state) -{ +tex_sample_persp_normal_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -3021,14 +2798,14 @@ tex_sample_persp_normal_filter(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3053,34 +2830,36 @@ tex_sample_persp_normal_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_normal_375(s3d_state_t *state) -{ +tex_sample_persp_normal_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_normal_filter_375(s3d_state_t *state) -{ +tex_sample_persp_normal_filter_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -3090,14 +2869,14 @@ tex_sample_persp_normal_filter_375(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; - texture_state.level = state->max_d; + texture_state.level = state->max_d; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3122,36 +2901,38 @@ tex_sample_persp_normal_filter_375(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_mipmap(s3d_state_t *state) -{ +tex_sample_persp_mipmap(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_mipmap_filter(s3d_state_t *state) -{ +tex_sample_persp_mipmap_filter(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -3161,16 +2942,16 @@ tex_sample_persp_mipmap_filter(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (12 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (12 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3195,36 +2976,38 @@ tex_sample_persp_mipmap_filter(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } static void -tex_sample_persp_mipmap_375(s3d_state_t *state) -{ +tex_sample_persp_mipmap_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - texture_state.u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - texture_state.v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; tex_read(state, &texture_state, &state->dest_rgba); } static void -tex_sample_persp_mipmap_filter_375(s3d_state_t *state) -{ +tex_sample_persp_mipmap_filter_375(s3d_state_t *state) { s3d_texture_state_t texture_state; - int32_t w = 0; + int32_t w = 0; int32_t u; int32_t v; int tex_offset; @@ -3234,16 +3017,16 @@ tex_sample_persp_mipmap_filter_375(s3d_state_t *state) int d[4]; if (state->w) - w = (int32_t) (((1ULL << 27) << 19) / (int64_t) state->w); + w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w); - u = (int32_t) (((int64_t) state->u * (int64_t) w) >> (8 + state->max_d)) + state->tbu; - v = (int32_t) (((int64_t) state->v * (int64_t) w) >> (8 + state->max_d)) + state->tbv; + u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; + v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf); if (texture_state.level < 0) texture_state.level = 0; texture_state.texture_shift = 18 + (9 - texture_state.level); - tex_offset = 1 << texture_state.texture_shift; + tex_offset = 1 << texture_state.texture_shift; texture_state.u = u; texture_state.v = v; @@ -3268,47 +3051,50 @@ tex_sample_persp_mipmap_filter_375(s3d_state_t *state) d[2] = (256 - du) * dv; d[3] = du * dv; - state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; - state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; - state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; - state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; } -#define CLAMP(x) \ - do { \ - if ((x) & ~0xff) \ - x = ((x) < 0) ? 0 : 0xff; \ +#define CLAMP(x) \ + do { \ + if ((x) & ~0xff) \ + x = ((x) < 0) ? 0 : 0xff; \ } while (0) -#define CLAMP_RGBA(r, g, b, a) \ - if ((r) & ~0xff) \ - r = ((r) < 0) ? 0 : 0xff; \ - if ((g) & ~0xff) \ - g = ((g) < 0) ? 0 : 0xff; \ - if ((b) & ~0xff) \ - b = ((b) < 0) ? 0 : 0xff; \ - if ((a) & ~0xff) \ - a = ((a) < 0) ? 0 : 0xff; +#define CLAMP_RGBA(r, g, b, a) \ + if ((r) & ~0xff) \ + r = ((r) < 0) ? 0 : 0xff; \ + if ((g) & ~0xff) \ + g = ((g) < 0) ? 0 : 0xff; \ + if ((b) & ~0xff) \ + b = ((b) < 0) ? 0 : 0xff; \ + if ((a) & ~0xff) \ + a = ((a) < 0) ? 0 : 0xff; -#define CLAMP_RGB(r, g, b) \ - do { \ - if ((r) < 0) \ - r = 0; \ - if ((r) > 0xff) \ - r = 0xff; \ - if ((g) < 0) \ - g = 0; \ - if ((g) > 0xff) \ - g = 0xff; \ - if ((b) < 0) \ - b = 0; \ - if ((b) > 0xff) \ - b = 0xff; \ +#define CLAMP_RGB(r, g, b) \ + do { \ + if ((r) < 0) \ + r = 0; \ + if ((r) > 0xff) \ + r = 0xff; \ + if ((g) < 0) \ + g = 0; \ + if ((g) > 0xff) \ + g = 0xff; \ + if ((b) < 0) \ + b = 0; \ + if ((b) > 0xff) \ + b = 0xff; \ } while (0) static void -dest_pixel_gouraud_shaded_triangle(s3d_state_t *state) -{ +dest_pixel_gouraud_shaded_triangle(s3d_state_t *state) { state->dest_rgba.r = state->r >> 7; CLAMP(state->dest_rgba.r); @@ -3323,8 +3109,7 @@ dest_pixel_gouraud_shaded_triangle(s3d_state_t *state) } static void -dest_pixel_unlit_texture_triangle(s3d_state_t *state) -{ +dest_pixel_unlit_texture_triangle(s3d_state_t *state) { tex_sample(state); if (state->cmd_set & CMD_SET_ABC_SRC) @@ -3332,8 +3117,7 @@ dest_pixel_unlit_texture_triangle(s3d_state_t *state) } static void -dest_pixel_lit_texture_decal(s3d_state_t *state) -{ +dest_pixel_lit_texture_decal(s3d_state_t *state) { tex_sample(state); if (state->cmd_set & CMD_SET_ABC_SRC) @@ -3341,8 +3125,7 @@ dest_pixel_lit_texture_decal(s3d_state_t *state) } static void -dest_pixel_lit_texture_reflection(s3d_state_t *state) -{ +dest_pixel_lit_texture_reflection(s3d_state_t *state) { tex_sample(state); state->dest_rgba.r += (state->r >> 7); @@ -3355,8 +3138,7 @@ dest_pixel_lit_texture_reflection(s3d_state_t *state) } static void -dest_pixel_lit_texture_modulate(s3d_state_t *state) -{ +dest_pixel_lit_texture_modulate(s3d_state_t *state) { int r = state->r >> 7; int g = state->g >> 7; int b = state->b >> 7; @@ -3375,39 +3157,18 @@ dest_pixel_lit_texture_modulate(s3d_state_t *state) } static void -tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2) -{ - svga_t *svga = &virge->svga; - uint8_t *vram = svga->vram; +tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2) { + svga_t *svga = &virge->svga; + uint8_t *vram = virge->svga.vram; + int x_dir = s3d_tri->tlr ? 1 : -1; + int use_z = !(s3d_tri->cmd_set & CMD_SET_ZB_MODE); + int y_count = yc; + int bpp = (s3d_tri->cmd_set >> 2) & 7; + uint32_t dest_offset; + uint32_t z_offset; - int x_dir = s3d_tri->tlr ? 1 : -1; - - int use_z = !(s3d_tri->cmd_set & CMD_SET_ZB_MODE); - - int y_count = yc; - - int bpp = (s3d_tri->cmd_set >> 2) & 7; - - uint32_t dest_offset = 0; - uint32_t z_offset = 0; - - uint32_t src_col; - int src_r = 0; - int src_g = 0; - int src_b = 0; - - int x; - int xe; - uint32_t z; - - uint32_t dest_addr; - uint32_t z_addr; - int dx; - int x_offset; - int xz_offset; - - int update; - uint16_t src_z = 0; + dest_offset = s3d_tri->dest_base + (state->y * s3d_tri->dest_str); + z_offset = s3d_tri->z_base + (state->y * s3d_tri->z_str); if (s3d_tri->cmd_set & CMD_SET_HC) { if (state->y < s3d_tri->clip_t) @@ -3427,240 +3188,194 @@ tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int state->base_a += (s3d_tri->TdAdY * diff_y); state->base_d += (s3d_tri->TdDdY * diff_y); state->base_w += (s3d_tri->TdWdY * diff_y); - state->x1 += (dx1 * diff_y); - state->x2 += (dx2 * diff_y); - state->y -= diff_y; - dest_offset -= s3d_tri->dest_str * diff_y; - z_offset -= s3d_tri->z_str; - y_count -= diff_y; + state->x1 += (dx1 * diff_y); + state->x2 += (dx2 * diff_y); + state->y -= diff_y; + dest_offset -= s3d_tri->dest_str; + z_offset -= s3d_tri->z_str; + y_count -= diff_y; } if ((state->y - y_count) < s3d_tri->clip_t) y_count = (state->y - s3d_tri->clip_t) + 1; } - dest_offset = s3d_tri->dest_base + (state->y * s3d_tri->dest_str); - z_offset = s3d_tri->z_base + (state->y * s3d_tri->z_str); + for (; y_count > 0; y_count--) { + int x = (state->x1 + ((1 << 20) - 1)) >> 20; + int xe = (state->x2 + ((1 << 20) - 1)) >> 20; + uint32_t z = (state->base_z > 0) ? (state->base_z << 1) : 0; - while (y_count > 0) { - x = (state->x1 + ((1 << 20) - 1)) >> 20; - xe = (state->x2 + ((1 << 20) - 1)) >> 20; - z = (state->base_z > 0) ? (state->base_z << 1) : 0; - if (x_dir < 0) { - x--; - xe--; + if (x_dir < 0) { + x--; + xe--; + } + + if (x != xe && ((x_dir > 0 && x < xe) || (x_dir < 0 && x > xe))) { + uint32_t dest_addr; + uint32_t z_addr; + int dx = (x_dir > 0) ? ((31 - ((state->x1 - 1) >> 15)) & 0x1f) : + (((state->x1 - 1) >> 15) & 0x1f); + int x_offset = x_dir * (bpp + 1); + int xz_offset = x_dir << 1; + + if (x_dir > 0) + dx += 1; + + state->r = state->base_r + ((s3d_tri->TdRdX * dx) >> 5); + state->g = state->base_g + ((s3d_tri->TdGdX * dx) >> 5); + state->b = state->base_b + ((s3d_tri->TdBdX * dx) >> 5); + state->a = state->base_a + ((s3d_tri->TdAdX * dx) >> 5); + state->u = state->base_u + ((s3d_tri->TdUdX * dx) >> 5); + state->v = state->base_v + ((s3d_tri->TdVdX * dx) >> 5); + state->w = state->base_w + ((s3d_tri->TdWdX * dx) >> 5); + state->d = state->base_d + ((s3d_tri->TdDdX * dx) >> 5); + z += ((s3d_tri->TdZdX * dx) >> 5); + + if (s3d_tri->cmd_set & CMD_SET_HC) { + if (x_dir > 0) { + if (x > s3d_tri->clip_r) + goto tri_skip_line; + if (xe < s3d_tri->clip_l) + goto tri_skip_line; + if (xe > s3d_tri->clip_r) + xe = s3d_tri->clip_r + 1; + if (x < s3d_tri->clip_l) { + int diff_x = s3d_tri->clip_l - x; + + z += (s3d_tri->TdZdX * diff_x); + state->u += (s3d_tri->TdUdX * diff_x); + state->v += (s3d_tri->TdVdX * diff_x); + state->r += (s3d_tri->TdRdX * diff_x); + state->g += (s3d_tri->TdGdX * diff_x); + state->b += (s3d_tri->TdBdX * diff_x); + state->a += (s3d_tri->TdAdX * diff_x); + state->d += (s3d_tri->TdDdX * diff_x); + state->w += (s3d_tri->TdWdX * diff_x); + + x = s3d_tri->clip_l; + } + } else { + if (x < s3d_tri->clip_l) + goto tri_skip_line; + if (xe > s3d_tri->clip_r) + goto tri_skip_line; + if (xe < s3d_tri->clip_l) + xe = s3d_tri->clip_l - 1; + if (x > s3d_tri->clip_r) { + int diff_x = x - s3d_tri->clip_r; + + z += (s3d_tri->TdZdX * diff_x); + state->u += (s3d_tri->TdUdX * diff_x); + state->v += (s3d_tri->TdVdX * diff_x); + state->r += (s3d_tri->TdRdX * diff_x); + state->g += (s3d_tri->TdGdX * diff_x); + state->b += (s3d_tri->TdBdX * diff_x); + state->a += (s3d_tri->TdAdX * diff_x); + state->d += (s3d_tri->TdDdX * diff_x); + state->w += (s3d_tri->TdWdX * diff_x); + + x = s3d_tri->clip_r; + } + } + } + + virge->svga.changedvram[(dest_offset & svga->vram_mask) >> 12] = changeframecount; + + dest_addr = dest_offset + (x * (bpp + 1)); + z_addr = z_offset + (x << 1); + + x &= 0xfff; + xe &= 0xfff; + + for (; x != xe; x = (x + x_dir) & 0xfff) { + int update = 1; + uint16_t src_z = 0; + + _x = x; + _y = state->y; + + if (use_z) { + src_z = Z_READ(z_addr); + Z_CLIP(src_z, z >> 16); + } + + if (update) { + uint32_t dest_col; + + dest_pixel(state); + + if (s3d_tri->cmd_set & CMD_SET_FE) { + int a = state->a >> 7; + state->dest_rgba.r = ((state->dest_rgba.r * a) + (s3d_tri->fog_r * (255 - a))) / 255; + state->dest_rgba.g = ((state->dest_rgba.g * a) + (s3d_tri->fog_g * (255 - a))) / 255; + state->dest_rgba.b = ((state->dest_rgba.b * a) + (s3d_tri->fog_b * (255 - a))) / 255; + } + + if (s3d_tri->cmd_set & CMD_SET_ABC_ENABLE) { + uint32_t src_col; + int src_r = 0; + uint32_t src_g = 0; + uint32_t src_b = 0; + + switch (bpp) { + case 0: /*8 bpp*/ + /*Not implemented yet*/ + break; + case 1: /*16 bpp*/ + src_col = *(uint16_t *)&vram[dest_addr & svga->vram_mask]; + RGB15_TO_24(src_col, src_r, src_g, src_b); + break; + case 2: /*24 bpp*/ + src_col = (*(uint32_t *)&vram[dest_addr & svga->vram_mask]) & 0xffffff; + RGB24_TO_24(src_col, src_r, src_g, src_b); + break; + } + + state->dest_rgba.r = ((state->dest_rgba.r * state->dest_rgba.a) + + (src_r * (255 - state->dest_rgba.a))) / 255; + state->dest_rgba.g = ((state->dest_rgba.g * state->dest_rgba.a) + + (src_g * (255 - state->dest_rgba.a))) / 255; + state->dest_rgba.b = ((state->dest_rgba.b * state->dest_rgba.a) + + (src_b * (255 - state->dest_rgba.a))) / 255; + } + + switch (bpp) { + case 0: /*8 bpp*/ + /*Not implemented yet*/ + break; + case 1: /*16 bpp*/ + RGB15(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, dest_col); + *(uint16_t *)&vram[dest_addr] = dest_col; + break; + case 2: /*24 bpp*/ + dest_col = RGB24(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b); + *(uint8_t *)&vram[dest_addr] = dest_col & 0xff; + *(uint8_t *)&vram[dest_addr + 1] = (dest_col >> 8) & 0xff; + *(uint8_t *)&vram[dest_addr + 2] = (dest_col >> 16) & 0xff; + break; + } + + if (use_z && (s3d_tri->cmd_set & CMD_SET_ZUP)) + Z_WRITE(z_addr, src_z); + } + + z += s3d_tri->TdZdX; + state->u += s3d_tri->TdUdX; + state->v += s3d_tri->TdVdX; + state->r += s3d_tri->TdRdX; + state->g += s3d_tri->TdGdX; + state->b += s3d_tri->TdBdX; + state->a += s3d_tri->TdAdX; + state->d += s3d_tri->TdDdX; + state->w += s3d_tri->TdWdX; + dest_addr += x_offset; + z_addr += xz_offset; + virge->pixel_count++; + } } - - if (((x != xe) && ((x_dir > 0) && (x < xe))) || ((x_dir < 0) && (x > xe))) { - dx = (x_dir > 0) ? ((31 - ((state->x1 - 1) >> 15)) & 0x1f) : (((state->x1 - 1) >> 15) & 0x1f); - x_offset = x_dir * (bpp + 1); - xz_offset = x_dir << 1; - if (x_dir > 0) - dx += 1; - state->r = state->base_r + ((s3d_tri->TdRdX * dx) >> 5); - state->g = state->base_g + ((s3d_tri->TdGdX * dx) >> 5); - state->b = state->base_b + ((s3d_tri->TdBdX * dx) >> 5); - state->a = state->base_a + ((s3d_tri->TdAdX * dx) >> 5); - state->u = state->base_u + ((s3d_tri->TdUdX * dx) >> 5); - state->v = state->base_v + ((s3d_tri->TdVdX * dx) >> 5); - state->w = state->base_w + ((s3d_tri->TdWdX * dx) >> 5); - state->d = state->base_d + ((s3d_tri->TdDdX * dx) >> 5); - z += ((s3d_tri->TdZdX * dx) >> 5); - - if (s3d_tri->cmd_set & CMD_SET_HC) { - if (x_dir > 0) { - if (x > s3d_tri->clip_r) - goto tri_skip_line; - if (xe < s3d_tri->clip_l) - goto tri_skip_line; - if (xe > s3d_tri->clip_r) - xe = s3d_tri->clip_r + 1; - if (x < s3d_tri->clip_l) { - int diff_x = s3d_tri->clip_l - x; - - z += (s3d_tri->TdZdX * diff_x); - state->u += (s3d_tri->TdUdX * diff_x); - state->v += (s3d_tri->TdVdX * diff_x); - state->r += (s3d_tri->TdRdX * diff_x); - state->g += (s3d_tri->TdGdX * diff_x); - state->b += (s3d_tri->TdBdX * diff_x); - state->a += (s3d_tri->TdAdX * diff_x); - state->d += (s3d_tri->TdDdX * diff_x); - state->w += (s3d_tri->TdWdX * diff_x); - - x = s3d_tri->clip_l; - } - } else { - if (x < s3d_tri->clip_l) - goto tri_skip_line; - if (xe > s3d_tri->clip_r) - goto tri_skip_line; - if (xe < s3d_tri->clip_l) - xe = s3d_tri->clip_l - 1; - if (x > s3d_tri->clip_r) { - int diff_x = x - s3d_tri->clip_r; - - z += (s3d_tri->TdZdX * diff_x); - state->u += (s3d_tri->TdUdX * diff_x); - state->v += (s3d_tri->TdVdX * diff_x); - state->r += (s3d_tri->TdRdX * diff_x); - state->g += (s3d_tri->TdGdX * diff_x); - state->b += (s3d_tri->TdBdX * diff_x); - state->a += (s3d_tri->TdAdX * diff_x); - state->d += (s3d_tri->TdDdX * diff_x); - state->w += (s3d_tri->TdWdX * diff_x); - - x = s3d_tri->clip_r; - } - } - } - - svga->changedvram[(dest_offset & virge->vram_mask) >> 12] = changeframecount; - - dest_addr = dest_offset + (x * (bpp + 1)); - z_addr = z_offset + (x << 1); - - x &= 0xfff; - xe &= 0xfff; - - while (x != xe) { - update = 1; - _x = x; - _y = state->y; - - if (use_z) { - src_z = *(uint16_t *) &vram[z_addr & virge->vram_mask]; - switch ((s3d_tri->cmd_set >> 20) & 7) { - case 0: - update = 0; - break; - case 1: - if ((z >> 16) > src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 2: - if ((z >> 16) == src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 3: - if ((z >> 16) >= src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 4: - if ((z >> 16) < src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 5: - if ((z >> 16) != src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 6: - if ((z >> 16) <= src_z) { - src_z = (z >> 16); - } else - update = 0; - break; - case 7: - src_z = (z >> 16); - break; - - default: - break; - } - } - - if (update) { - uint32_t dest_col; - - dest_pixel(state); - - if (s3d_tri->cmd_set & CMD_SET_FE) { - int a = state->a >> 7; - state->dest_rgba.r = ((state->dest_rgba.r * a) + (s3d_tri->fog_r * (255 - a))) / 255; - state->dest_rgba.g = ((state->dest_rgba.g * a) + (s3d_tri->fog_g * (255 - a))) / 255; - state->dest_rgba.b = ((state->dest_rgba.b * a) + (s3d_tri->fog_b * (255 - a))) / 255; - } - - if (s3d_tri->cmd_set & CMD_SET_ABC_ENABLE) { - switch (bpp) { - case 0: /*8 bpp*/ - /*TODO: Not implemented yet*/ - break; - case 1: /*16 bpp*/ - src_col = *(uint16_t *) &vram[dest_addr & virge->vram_mask]; - RGB15_TO_24(src_col, src_r, src_g, src_b); - break; - case 2: /*24 bpp*/ - src_col = (*(uint32_t *) &vram[dest_addr & virge->vram_mask]) & 0xffffff; - RGB24_TO_24(src_col, src_r, src_g, src_b); - break; - - default: - break; - } - - state->dest_rgba.r = ((state->dest_rgba.r * state->dest_rgba.a) + (src_r * (255 - state->dest_rgba.a))) / 255; - state->dest_rgba.g = ((state->dest_rgba.g * state->dest_rgba.a) + (src_g * (255 - state->dest_rgba.a))) / 255; - state->dest_rgba.b = ((state->dest_rgba.b * state->dest_rgba.a) + (src_b * (255 - state->dest_rgba.a))) / 255; - } - - switch (bpp) { - case 0: /*8 bpp*/ - /*TODO: Not implemented yet*/ - break; - case 1: /*16 bpp*/ - RGB15(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, dest_col); - *(uint16_t *) &vram[dest_addr & virge->vram_mask] = dest_col; - svga->changedvram[(dest_addr & virge->vram_mask) >> 12] = changeframecount; - break; - case 2: /*24 bpp*/ - dest_col = RGB24(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b); - *(uint8_t *) &vram[dest_addr & virge->vram_mask] = dest_col & 0xff; - *(uint8_t *) &vram[(dest_addr + 1) & virge->vram_mask] = (dest_col >> 8) & 0xff; - *(uint8_t *) &vram[(dest_addr + 2) & virge->vram_mask] = (dest_col >> 16) & 0xff; - svga->changedvram[(dest_addr & virge->vram_mask) >> 12] = changeframecount; - break; - - default: - break; - } - } - - if (use_z && (s3d_tri->cmd_set & CMD_SET_ZUP)) { - *(uint16_t *) &vram[z_addr & virge->vram_mask] = src_z; - svga->changedvram[(z_addr & virge->vram_mask) >> 12] = changeframecount; - } - - z += s3d_tri->TdZdX; - state->u += s3d_tri->TdUdX; - state->v += s3d_tri->TdVdX; - state->r += s3d_tri->TdRdX; - state->g += s3d_tri->TdGdX; - state->b += s3d_tri->TdBdX; - state->a += s3d_tri->TdAdX; - state->d += s3d_tri->TdDdX; - state->w += s3d_tri->TdWdX; - dest_addr += x_offset; - z_addr += xz_offset; - - x = (x + x_dir) & 0xfff; - } - } - - y_count--; - + tri_skip_line: - state->x1 += dx1; - state->x2 += dx2; + state->x1 += dx1; + state->x2 += dx2; state->base_u += s3d_tri->TdUdY; state->base_v += s3d_tri->TdVdY; state->base_z += s3d_tri->TdZdY; @@ -3671,31 +3386,22 @@ tri_skip_line: state->base_d += s3d_tri->TdDdY; state->base_w += s3d_tri->TdWdY; state->y--; - dest_offset -= s3d_tri->dest_str; - z_offset -= s3d_tri->z_str; + dest_offset -= s3d_tri->dest_str; + z_offset -= s3d_tri->z_str; } } -static int tex_size[8] = { - 4 * 2, - 2 * 2, - 2 * 2, - 1 * 2, - 2 / 1, - 2 / 1, - 1 * 2, - 1 * 2 -}; +static int tex_size[8] = {4 * 2, 2 * 2, 2 * 2, 1 * 2, 2 / 1, 2 / 1, 1 * 2, 1 * 2}; static void -s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) -{ +s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) { s3d_state_t state; - uint32_t tex_base; + uint32_t tex_base; + int c; - uint64_t start_time = plat_timer_read(); - uint64_t end_time; + uint64_t start_time = plat_timer_read(); + uint64_t end_time; state.tbu = s3d_tri->tbu << 11; state.tbv = s3d_tri->tbv << 11; @@ -3709,18 +3415,18 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) state.base_u = s3d_tri->tus; state.base_v = s3d_tri->tvs; state.base_z = s3d_tri->tzs; - state.base_r = (int32_t) s3d_tri->trs; - state.base_g = (int32_t) s3d_tri->tgs; - state.base_b = (int32_t) s3d_tri->tbs; - state.base_a = (int32_t) s3d_tri->tas; + state.base_r = (int32_t)s3d_tri->trs; + state.base_g = (int32_t)s3d_tri->tgs; + state.base_b = (int32_t)s3d_tri->tbs; + state.base_a = (int32_t)s3d_tri->tas; state.base_d = s3d_tri->tds; state.base_w = s3d_tri->tws; tex_base = s3d_tri->tex_base; - for (int c = 9; c >= 0; c--) { - state.texture[c] = (uint16_t *) &virge->svga.vram[tex_base]; - if (c <= state.max_d) - tex_base += ((1 << (c * 2)) * tex_size[(s3d_tri->cmd_set >> 5) & 7]) / 2; + for (c = 9; c >= 0; c--) { + state.texture[c] = (uint16_t *)&virge->svga.vram[tex_base]; + if (c <= state.max_d) + tex_base += ((1 << (c * 2)) * tex_size[(s3d_tri->cmd_set >> 5) & 7]) / 2; } switch ((s3d_tri->cmd_set >> 27) & 0xf) { @@ -3740,7 +3446,6 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) dest_pixel = dest_pixel_lit_texture_decal; break; default: - s3_virge_log("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf); return; } break; @@ -3749,7 +3454,6 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) dest_pixel = dest_pixel_unlit_texture_triangle; break; default: - s3_virge_log("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf); return; } @@ -3772,34 +3476,35 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) break; case (0 | 8): case (1 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) tex_sample = tex_sample_persp_mipmap_375; else tex_sample = tex_sample_persp_mipmap; break; case (2 | 8): case (3 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) - tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter_375 : tex_sample_persp_mipmap_375; + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) + tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter_375 : + tex_sample_persp_mipmap_375; else - tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter : tex_sample_persp_mipmap; + tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter : + tex_sample_persp_mipmap; break; case (4 | 8): case (5 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) tex_sample = tex_sample_persp_normal_375; else tex_sample = tex_sample_persp_normal; break; case (6 | 8): case (7 | 8): - if (virge->chip == S3_VIRGEDX || virge->chip >= S3_VIRGEGX2) - tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter_375 : tex_sample_persp_normal_375; + if ((virge->chip == S3_VIRGEDX) || (virge->chip >= S3_VIRGEGX2)) + tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter_375 : + tex_sample_persp_normal_375; else - tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter : tex_sample_persp_normal; - break; - - default: + tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter : + tex_sample_persp_normal; break; } @@ -3814,7 +3519,6 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap; break; default: - s3_virge_log("bad texture type %i\n", (s3d_tri->cmd_set >> 5) & 7); tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap; break; } @@ -3826,15 +3530,16 @@ s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri) state.x2 = s3d_tri->txend12; tri(virge, s3d_tri, &state, s3d_tri->ty12, s3d_tri->TdXdY02, s3d_tri->TdXdY12); + virge->tri_count++; + end_time = plat_timer_read(); - virge->blitter_time += end_time - start_time; + virge_time += end_time - start_time; } static void -s3_virge_render_thread(void *priv) -{ - virge_t *virge = (virge_t *) priv; +render_thread(void *param) { + virge_t *virge = (virge_t *)param; while (virge->render_thread_run) { thread_wait_event(virge->wake_render_thread, -1); @@ -3844,7 +3549,7 @@ s3_virge_render_thread(void *priv) s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx & RB_MASK]); virge->s3d_read_idx++; - if (RB_ENTRIES == RB_MASK) + if (RB_ENTRIES == (RB_SIZE - 1)) thread_set_event(virge->not_full_event); } virge->s3d_busy = 0; @@ -3854,30 +3559,27 @@ s3_virge_render_thread(void *priv) } static void -queue_triangle(virge_t *virge) -{ +queue_triangle(virge_t *virge) { if (RB_FULL) { thread_reset_event(virge->not_full_event); if (RB_FULL) thread_wait_event(virge->not_full_event, -1); /*Wait for room in ringbuffer*/ } - virge->s3d_buffer[virge->s3d_write_idx & RB_MASK] = virge->s3d_tri; virge->s3d_write_idx++; if (!virge->s3d_busy) thread_set_event(virge->wake_render_thread); /*Wake up render thread if moving from idle*/ } -static void -s3_virge_hwcursor_draw(svga_t *svga, int displine) -{ - const virge_t *virge = (virge_t *) svga->priv; - uint16_t dat[2]; - int xx; - int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff; - uint32_t fg; - uint32_t bg; - uint32_t vram_mask = virge->vram_mask; +static void s3_virge_hwcursor_draw(svga_t *svga, int displine) { + virge_t *virge = (virge_t *) svga->priv; + int x; + uint16_t dat[2] = { 0, 0 }; + int xx; + int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff; + uint32_t fg; + uint32_t bg; + uint32_t vram_mask = virge->vram_mask; if (svga->interlace && svga->hwcursor_oddeven) svga->hwcursor_latch.addr += 16; @@ -3914,13 +3616,16 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) break; } - for (uint8_t x = 0; x < 64; x += 16) { - dat[0] = (svga->vram[svga->hwcursor_latch.addr & vram_mask] << 8) | svga->vram[(svga->hwcursor_latch.addr + 1) & vram_mask]; - dat[1] = (svga->vram[(svga->hwcursor_latch.addr + 2) & vram_mask] << 8) | svga->vram[(svga->hwcursor_latch.addr + 3) & vram_mask]; + for (x = 0; x < 64; x += 16) { + dat[0] = (svga->vram[svga->hwcursor_latch.addr & vram_mask] << 8) | + svga->vram[(svga->hwcursor_latch.addr + 1) & vram_mask]; + dat[1] = (svga->vram[(svga->hwcursor_latch.addr + 2) & vram_mask] << 8) | + svga->vram[(svga->hwcursor_latch.addr + 3) & vram_mask]; + if (svga->crtc[0x55] & 0x10) { /*X11*/ for (xx = 0; xx < 16; xx++) { - if (offset >= 0) { + if (offset >= svga->hwcursor_latch.x) { if (virge->chip == S3_VIRGEGX2) dat[0] ^= 0x8000; @@ -3929,13 +3634,14 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) } offset++; + dat[0] <<= 1; dat[1] <<= 1; } } else { /*Windows*/ for (xx = 0; xx < 16; xx++) { - if (offset >= 0) { + if (offset >= svga->hwcursor_latch.x) { if (!(dat[0] & 0x8000)) buffer32->line[displine][offset + svga->x_add] = (dat[1] & 0x8000) ? fg : bg; else if (dat[1] & 0x8000) @@ -3943,101 +3649,109 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) } offset++; + dat[0] <<= 1; dat[1] <<= 1; } } + svga->hwcursor_latch.addr += 4; } + if (svga->interlace && !svga->hwcursor_oddeven) svga->hwcursor_latch.addr += 16; } -#define DECODE_YCbCr() \ - do { \ - int c; \ - \ - for (c = 0; c < 2; c++) { \ - uint8_t y1, y2; \ - int8_t Cr, Cb; \ - int dR, dG, dB; \ - \ - y1 = src[0]; \ - Cr = src[1] - 0x80; \ - y2 = src[2]; \ - Cb = src[3] - 0x80; \ - src += 4; \ - \ - dR = (359 * Cr) >> 8; \ - dG = (88 * Cb + 183 * Cr) >> 8; \ - dB = (453 * Cb) >> 8; \ - \ - r[x_write] = y1 + dR; \ - CLAMP(r[x_write]); \ - g[x_write] = y1 - dG; \ - CLAMP(g[x_write]); \ - b[x_write] = y1 + dB; \ - CLAMP(b[x_write]); \ - \ - r[x_write + 1] = y2 + dR; \ - CLAMP(r[x_write + 1]); \ - g[x_write + 1] = y2 - dG; \ - CLAMP(g[x_write + 1]); \ - b[x_write + 1] = y2 + dB; \ - CLAMP(b[x_write + 1]); \ - \ - x_write = (x_write + 2) & 7; \ - } \ +#define DECODE_YCbCr() \ + do { \ + int c; \ + \ + for (c = 0; c < 2; c++) { \ + uint8_t y1, y2; \ + int8_t Cr; \ + int8_t Cb; \ + int dR; \ + int dG; \ + int dB; \ + \ + y1 = src[0]; \ + Cr = src[1] - 0x80; \ + y2 = src[2]; \ + Cb = src[3] - 0x80; \ + src += 4; \ + \ + dR = (359 * Cr) >> 8; \ + dG = (88 * Cb + 183 * Cr) >> 8; \ + dB = (453 * Cb) >> 8; \ + \ + r[x_write] = y1 + dR; \ + CLAMP(r[x_write]); \ + g[x_write] = y1 - dG; \ + CLAMP(g[x_write]); \ + b[x_write] = y1 + dB; \ + CLAMP(b[x_write]); \ + \ + r[x_write + 1] = y2 + dR; \ + CLAMP(r[x_write + 1]); \ + g[x_write + 1] = y2 - dG; \ + CLAMP(g[x_write + 1]); \ + b[x_write + 1] = y2 + dB; \ + CLAMP(b[x_write + 1]); \ + \ + x_write = (x_write + 2) & 7; \ + } \ } while (0) /*Both YUV formats are untested*/ -#define DECODE_YUV211() \ - do { \ - uint8_t y1, y2, y3, y4; \ - int8_t U, V; \ - int dR, dG, dB; \ - \ - U = src[0] - 0x80; \ - y1 = (298 * (src[1] - 16)) >> 8; \ - y2 = (298 * (src[2] - 16)) >> 8; \ - V = src[3] - 0x80; \ - y3 = (298 * (src[4] - 16)) >> 8; \ - y4 = (298 * (src[5] - 16)) >> 8; \ - src += 6; \ - \ - dR = (309 * V) >> 8; \ - dG = (100 * U + 208 * V) >> 8; \ - dB = (516 * U) >> 8; \ - \ - r[x_write] = y1 + dR; \ - CLAMP(r[x_write]); \ - g[x_write] = y1 - dG; \ - CLAMP(g[x_write]); \ - b[x_write] = y1 + dB; \ - CLAMP(b[x_write]); \ - \ - r[x_write + 1] = y2 + dR; \ - CLAMP(r[x_write + 1]); \ - g[x_write + 1] = y2 - dG; \ - CLAMP(g[x_write + 1]); \ - b[x_write + 1] = y2 + dB; \ - CLAMP(b[x_write + 1]); \ - \ - r[x_write + 2] = y3 + dR; \ - CLAMP(r[x_write + 2]); \ - g[x_write + 2] = y3 - dG; \ - CLAMP(g[x_write + 2]); \ - b[x_write + 2] = y3 + dB; \ - CLAMP(b[x_write + 2]); \ - \ - r[x_write + 3] = y4 + dR; \ - CLAMP(r[x_write + 3]); \ - g[x_write + 3] = y4 - dG; \ - CLAMP(g[x_write + 3]); \ - b[x_write + 3] = y4 + dB; \ - CLAMP(b[x_write + 3]); \ - \ - x_write = (x_write + 4) & 7; \ +#define DECODE_YUV211() \ + do { \ + uint8_t y1, y2, y3, y4; \ + int8_t U, V; \ + int dR; \ + int dG; \ + int dB; \ + \ + U = src[0] - 0x80; \ + y1 = (298 * (src[1] - 16)) >> 8; \ + y2 = (298 * (src[2] - 16)) >> 8; \ + V = src[3] - 0x80; \ + y3 = (298 * (src[4] - 16)) >> 8; \ + y4 = (298 * (src[5] - 16)) >> 8; \ + src += 6; \ + \ + dR = (309 * V) >> 8; \ + dG = (100 * U + 208 * V) >> 8; \ + dB = (516 * U) >> 8; \ + \ + r[x_write] = y1 + dR; \ + CLAMP(r[x_write]); \ + g[x_write] = y1 - dG; \ + CLAMP(g[x_write]); \ + b[x_write] = y1 + dB; \ + CLAMP(b[x_write]); \ + \ + r[x_write + 1] = y2 + dR; \ + CLAMP(r[x_write + 1]); \ + g[x_write + 1] = y2 - dG; \ + CLAMP(g[x_write + 1]); \ + b[x_write + 1] = y2 + dB; \ + CLAMP(b[x_write + 1]); \ + \ + r[x_write + 2] = y3 + dR; \ + CLAMP(r[x_write + 2]); \ + g[x_write + 2] = y3 - dG; \ + CLAMP(g[x_write + 2]); \ + b[x_write + 2] = y3 + dB; \ + CLAMP(b[x_write + 2]); \ + \ + r[x_write + 3] = y4 + dR; \ + CLAMP(r[x_write + 3]); \ + g[x_write + 3] = y4 - dG; \ + CLAMP(g[x_write + 3]); \ + b[x_write + 3] = y4 + dB; \ + CLAMP(b[x_write + 3]); \ + \ + x_write = (x_write + 4) & 7; \ } while (0) #define DECODE_YUV422() \ @@ -4045,13 +3759,17 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) int c; \ \ for (c = 0; c < 2; c++) { \ - uint8_t y1, y2; \ - int8_t U, V; \ - int dR, dG, dB; \ + uint8_t y1; \ + uint8_t y2; \ + int8_t U; \ + int8_t V; \ + int dR; \ + int dG; \ + int dB; \ \ - U = src[0] - 0x80; \ + U = src[0] - 0x80; \ y1 = (298 * (src[1] - 16)) >> 8; \ - V = src[2] - 0x80; \ + V = src[2] - 0x80; \ y2 = (298 * (src[3] - 16)) >> 8; \ src += 4; \ \ @@ -4077,110 +3795,122 @@ s3_virge_hwcursor_draw(svga_t *svga, int displine) } \ } while (0) -#define DECODE_RGB555() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - uint16_t dat; \ - \ - dat = *(uint16_t *) src; \ - src += 2; \ - \ - r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \ - g[x_write + c] = ((dat & 0x03e0) >> 2) | ((dat & 0x03e0) >> 7); \ - b[x_write + c] = ((dat & 0x7c00) >> 7) | ((dat & 0x7c00) >> 12); \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_RGB555() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + uint16_t dat; \ + \ + dat = *(uint16_t *)src; \ + src += 2; \ + \ + r[x_write + c] = \ + ((dat & 0x001f) << 3) | \ + ((dat & 0x001f) >> 2); \ + g[x_write + c] = \ + ((dat & 0x03e0) >> 2) | \ + ((dat & 0x03e0) >> 7); \ + b[x_write + c] = \ + ((dat & 0x7c00) >> 7) | \ + ((dat & 0x7c00) >> 12); \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define DECODE_RGB565() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - uint16_t dat; \ - \ - dat = *(uint16_t *) src; \ - src += 2; \ - \ - r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \ - g[x_write + c] = ((dat & 0x07e0) >> 3) | ((dat & 0x07e0) >> 9); \ - b[x_write + c] = ((dat & 0xf800) >> 8) | ((dat & 0xf800) >> 13); \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_RGB565() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + uint16_t dat; \ + \ + dat = *(uint16_t *)src; \ + src += 2; \ + \ + r[x_write + c] = \ + ((dat & 0x001f) << 3) | \ + ((dat & 0x001f) >> 2); \ + g[x_write + c] = \ + ((dat & 0x07e0) >> 3) | \ + ((dat & 0x07e0) >> 9); \ + b[x_write + c] = \ + ((dat & 0xf800) >> 8) | \ + ((dat & 0xf800) >> 13); \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define DECODE_RGB888() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - r[x_write + c] = src[0]; \ - g[x_write + c] = src[1]; \ - b[x_write + c] = src[2]; \ - src += 3; \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_RGB888() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + r[x_write + c] = src[0]; \ + g[x_write + c] = src[1]; \ + b[x_write + c] = src[2]; \ + src += 3; \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define DECODE_XRGB8888() \ - do { \ - int c; \ - \ - for (c = 0; c < 4; c++) { \ - r[x_write + c] = src[0]; \ - g[x_write + c] = src[1]; \ - b[x_write + c] = src[2]; \ - src += 4; \ - } \ - x_write = (x_write + 4) & 7; \ +#define DECODE_XRGB8888() \ + do { \ + int c; \ + \ + for (c = 0; c < 4; c++) { \ + r[x_write + c] = src[0]; \ + g[x_write + c] = src[1]; \ + b[x_write + c] = src[2]; \ + src += 4; \ + } \ + x_write = (x_write + 4) & 7; \ } while (0) -#define OVERLAY_SAMPLE() \ - do { \ - switch (virge->streams.sdif) { \ - case 1: \ - DECODE_YCbCr(); \ - break; \ - case 2: \ - DECODE_YUV422(); \ - break; \ - case 3: \ - DECODE_RGB555(); \ - break; \ - case 4: \ - DECODE_YUV211(); \ - break; \ - case 5: \ - DECODE_RGB565(); \ - break; \ - case 6: \ - DECODE_RGB888(); \ - break; \ - case 7: \ - default: \ - DECODE_XRGB8888(); \ - break; \ - } \ +#define OVERLAY_SAMPLE() \ + do { \ + switch (virge->streams.sdif) { \ + case 1: \ + DECODE_YCbCr(); \ + break; \ + case 2: \ + DECODE_YUV422(); \ + break; \ + case 3: \ + DECODE_RGB555(); \ + break; \ + case 4: \ + DECODE_YUV211(); \ + break; \ + case 5: \ + DECODE_RGB565(); \ + break; \ + case 6: \ + DECODE_RGB888(); \ + break; \ + case 7: \ + default: \ + DECODE_XRGB8888(); \ + break; \ + } \ } while (0) static void -s3_virge_overlay_draw(svga_t *svga, int displine) -{ - const virge_t *virge = (virge_t *) svga->priv; - int offset = (virge->streams.sec_x - virge->streams.pri_x) + 1; - int h_acc = virge->streams.dda_horiz_accumulator; - int r[8]; - int g[8]; - int b[8]; - int x_size; - int x_read = 4; - int x_write = 4; - uint32_t *p; - uint8_t *src = &svga->vram[svga->overlay_latch.addr]; +s3_virge_overlay_draw(svga_t *svga, int displine) { + virge_t *virge = (virge_t *) svga->priv; + int offset = (virge->streams.sec_x - virge->streams.pri_x) + 1; + int h_acc = virge->streams.dda_horiz_accumulator; + int r[8]; + int g[8]; + int b[8]; + int x_size; + int x_read = 4; + int x_write = 4; + int x; + uint32_t *p; + uint8_t *src = &svga->vram[svga->overlay_latch.addr]; - p = &(buffer32->line[displine][offset + svga->x_add]); + p = &((uint32_t *)buffer32->line[displine])[offset + svga->x_add]; if ((offset + virge->streams.sec_w) > virge->streams.pri_w) x_size = (virge->streams.pri_w - virge->streams.sec_x) + 1; @@ -4189,7 +3919,7 @@ s3_virge_overlay_draw(svga_t *svga, int displine) OVERLAY_SAMPLE(); - for (int x = 0; x < x_size; x++) { + for (x = 0; x < x_size; x++) { *p++ = r[x_read] | (g[x_read] << 8) | (b[x_read] << 16); h_acc += virge->streams.k1_horiz_scale; @@ -4507,7 +4237,7 @@ static void * s3_virge_init(const device_t *info) { const char *bios_fn; - virge_t *virge = calloc(1, sizeof(virge_t)); + virge_t *virge = (virge_t *) calloc(1, sizeof(virge_t)); reset_state = calloc(1, sizeof(virge_t)); virge->bilinear_enabled = device_get_config_int("bilinear"); @@ -4742,12 +4472,12 @@ s3_virge_init(const device_t *info) virge->wake_render_thread = thread_create_event(); virge->wake_main_thread = thread_create_event(); virge->not_full_event = thread_create_event(); - virge->render_thread = thread_create(s3_virge_render_thread, virge); + virge->render_thread = thread_create(render_thread, virge); virge->fifo_thread_run = 1; virge->wake_fifo_thread = thread_create_event(); virge->fifo_not_full_event = thread_create_event(); - virge->fifo_thread = thread_create(s3_virge_fifo_thread, virge); + virge->fifo_thread = thread_create(fifo_thread, virge); virge->local = info->local;