diff --git a/Makefile b/Makefile index 55765d3..cbbec4e 100644 --- a/Makefile +++ b/Makefile @@ -33,13 +33,27 @@ SRCS = hello_world.c \ #~ SRCS = hello_poly.c \ #~ ../common/crt0/crt0.s \ -## Hello poly +#~ ## Hello inline GTE #~ TARGET = hello_poly_inline #~ TYPE = ps-exe #~ SRCS = hello_poly_inline.c \ #~ ../common/crt0/crt0.s \ +## Hello inline GTE +#~ TARGET = hello_gte_opti +#~ TYPE = ps-exe + +#~ SRCS = hello_gte_opti.c \ +#~ ../common/crt0/crt0.s \ + +## Hello poly subdiv +#~ TARGET = hello_poly_subdiv +#~ TYPE = ps-exe + +#~ SRCS = hello_poly_subdiv.c \ +#~ ../common/crt0/crt0.s \ + #~ ## Hello textured #~ TARGET = hello_poly_ft #~ TYPE = ps-exe diff --git a/hello_gte_opti.c b/hello_gte_opti.c new file mode 100644 index 0000000..7dcc0fb --- /dev/null +++ b/hello_gte_opti.c @@ -0,0 +1,240 @@ +// Hello free cycles ! +// Let's win some CPU cycles back +// +// Ref : /psyq/DOCS/Devrefs/Inlinref.pdf, p.18 +// /psyq/psx/sample/scea/GTE +// https://psx-spx.consoledev.net/geometrytransformationenginegte/ +// PSX / Z+ +// screen / +//coordinate +-----X+ +//system / | +// eye | Y+ +// +// Credits, thanks : Nicolas Noble, Sickle, Lameguy64 @ psxdev discord : https://discord.com/invite/N2mmwp +// https://discord.com/channels/642647820683444236/663664210525290507/834831466100949002 +#include +#include +#include +#include +#include +// OldWorld PsyQ has a inline_c.h file for inline GTE functions. We have to use the one at https://github.com/grumpycoders/pcsx-redux/blob/07f9b02d1dbb68f57a9f5b9773041813c55a4913/src/mips/psyq/include/inline_n.h +// because the real GTE commands are needed in nugget : https://psx-spx.consoledev.net/geometrytransformationenginegte/#gte-coordinate-calculation-commands +#include +// RAM -> CPU and CPU -> GTE macros : +#include "CPUMAC.H" + +#define VMODE 0 // Video Mode : 0 : NTSC, 1: PAL +#define SCREENXRES 320 // Screen width +#define SCREENYRES 240 + (VMODE << 4) // Screen height : If VMODE is 0 = 240, if VMODE is 1 = 256 +#define CENTERX ( SCREENXRES >> 1 ) // Center of screen on x +#define CENTERY ( SCREENYRES >> 1 ) // Center of screen on y +#define MARGINX 0 // margins for text display +#define MARGINY 32 +#define FONTSIZE 8 * 7 // Text Field Height +#define OTLEN 10 // Ordering Table Length + +DISPENV disp[2]; // Double buffered DISPENV and DRAWENV +DRAWENV draw[2]; +u_long ot[2][OTLEN]; // double ordering table of length 8 * 32 = 256 bits / 32 bytes +char primbuff[2][32768] = {1}; // double primitive buffer of length 32768 * 8 = 262.144 bits / 32,768 Kbytes +char *nextpri = primbuff[0]; // pointer to the next primitive in primbuff. Initially, points to the first bit of primbuff[0] +short db = 0; // index of which buffer is used, values 0, 1 + +// DCache setup +#define dc_camdirp ((sshort*) getScratchAddr(0)) +#define dc_ip ((uchar*) getScratchAddr(1)) +#define dc_opzp ((slong*) getScratchAddr(2)) +#define dc_wmatp ((MATRIX*) getScratchAddr(3)) +#define dc_cmatp ((MATRIX*) getScratchAddr(9)) +#define dc_sxytbl ((DVECTOR*) getScratchAddr(15)) + +void init(void) +{ + ResetGraph(0); + // Initialize and setup the GTE + InitGeom(); + //~ SetGeomOffset(CENTERX,CENTERY); + gte_SetGeomOffset(CENTERX,CENTERY); + gte_SetGeomScreen(CENTERX); + // Set display environment + SetDefDispEnv(&disp[0], 0, 0, SCREENXRES, SCREENYRES); + SetDefDispEnv(&disp[1], 0, SCREENYRES, SCREENXRES, SCREENYRES); + // Set draw environment + SetDefDrawEnv(&draw[0], 0, SCREENYRES, SCREENXRES, SCREENYRES); + SetDefDrawEnv(&draw[1], 0, 0, SCREENXRES, SCREENYRES); + // If PAL, use 320x256, hence 256 - 240 = 16 / 2 = 8 px vertical offset + if (VMODE) + { + SetVideoMode(MODE_PAL); + disp[0].screen.y += 8; + disp[1].screen.y += 8; + } + SetDispMask(1); + + // Set background color + setRGB0(&draw[0], 50, 50, 50); + setRGB0(&draw[1], 50, 50, 50); + draw[0].isbg = 1; + draw[1].isbg = 1; + PutDispEnv(&disp[db]); + PutDrawEnv(&draw[db]); + FntLoad(960, 0); + FntOpen(MARGINX, SCREENYRES - MARGINY - FONTSIZE, SCREENXRES - MARGINX * 2, FONTSIZE, 0, 280 ); +} + +void display(void) +{ + // Wait for drawing + DrawSync(0); + // Wait for vsync + VSync(0); + // Flip DISP and DRAW env + PutDispEnv(&disp[db]); + PutDrawEnv(&draw[db]); + //~ SetDispMask(1); + DrawOTag(ot[db] + OTLEN - 1); + // Flip db index + db = !db; + // Get next primitive in buffer + nextpri = primbuff[db]; +} + +int main(void) +{ + long p, flag, OTz; + SVECTOR rotVector, rotVector4 = {0}; // Initialize rotation vector {x, y, z} + VECTOR transVector = {0, 0, CENTERX, 0}; // Initialize translation vector {x, y, z} + SVECTOR vertPos[4] = { + { 0, -32, 0, 0 }, // Vert 1 + { 32, 0, 0, 0 }, // Vert 2 + { -32, 0, 0, 0 }, + { 0, 32, 0, 0 } + }; // Vert 3 + MATRIX workMatrix = {0}; + POLY_F3 * poly = {0}; // pointer to a POLY_F4 + POLY_F4 * poly4 = {0}; // pointer to a POLY_F4 + init(); + + // Declare registers + register ulong ur0 asm("$16"); + register ulong ur1 asm("$17"); + register ulong ur2 asm("$18"); + register ulong ur3 asm("$19"); + register ulong ur4 asm("$20"); + register ulong ur5 asm("$21"); + + while (1) + { + // Set Ordering table + ClearOTagR(ot[db], OTLEN); + // Cast next primitives in buffer as a POLY_F3 and a POLY_F4 (see display() ) + poly = (POLY_F3 *)nextpri; + nextpri += sizeof(POLY_F3); + poly4 = (POLY_F4 *)nextpri; + // Set matrices - Move to left of screen + // Draw on the left part of the screen + transVector.vx = -CENTERX/2; + // Increment rotation angle on Y axis + rotVector.vy += 8; + //~ rotVector.vx -= 4 ; + // Find rotation matrix from vector, store in + RotMatrix_gte(&rotVector, &workMatrix); + // Ditto for translation + TransMatrix(&workMatrix, &transVector); + // Set the matrices we just found + gte_SetRotMatrix(&workMatrix); + gte_SetTransMatrix(&workMatrix); + // Draw a Tri and a Quad + // Copy Tri vertices from ram to cpu registers casting as ulong so that ur0 (len 32bits) contains vx and vy (2 * 8bits) + // Hence the use of vx, vz members + cpu_ldr(ur0,(ulong*)&vertPos[0].vx); // Put vx, vy value in ur0 + cpu_ldr(ur1,(ulong*)&vertPos[0].vz); // Put vz, pad value in ur1 + cpu_ldr(ur2,(ulong*)&vertPos[1].vx); + cpu_ldr(ur3,(ulong*)&vertPos[1].vz); + cpu_ldr(ur4,(ulong*)&vertPos[2].vx); + cpu_ldr(ur5,(ulong*)&vertPos[2].vz); + // Load the gte registers from the cpu registers (gte-cpu move 1 cycle) - mtc2 %0, $0; + cpu_gted0(ur0); + cpu_gted1(ur1); + cpu_gted2(ur2); + cpu_gted3(ur3); + cpu_gted4(ur4); + cpu_gted5(ur5); + // Tri RotTransPers3 + // The two last cpu->gte copy will happen during the 2 nops in gte_rtpt() + gte_rtpt(); + // Fill the cpu registers with the Quad vertices + cpu_ldr(ur0,(ulong*)&vertPos[0].vx); + cpu_ldr(ur1,(ulong*)&vertPos[0].vz); + cpu_ldr(ur2,(ulong*)&vertPos[1].vx); + cpu_ldr(ur3,(ulong*)&vertPos[1].vz); + cpu_ldr(ur4,(ulong*)&vertPos[2].vx); + cpu_ldr(ur5,(ulong*)&vertPos[2].vz); + // Get nclip value, and win two cycles + gte_nclip(); + // Copy Tri 's screen coordinates from gte registers to d-cache. + gte_stsxy3c(&dc_sxytbl[0]); + // Set matrices - Move to right of screen + transVector.vx = CENTERX/2; + // Increment rot on X/Y axis + rotVector4.vy -= 8 ; + rotVector4.vx -= 4 ; + // Set matrices + RotMatrix_gte(&rotVector4, &workMatrix); + TransMatrix(&workMatrix, &transVector); + gte_SetRotMatrix(&workMatrix); + gte_SetTransMatrix(&workMatrix); + // Load the gte registers from the cpu registers (gte-cpu move 1 cycle) - mtc2 %0, $0; + cpu_gted0(ur0); + cpu_gted1(ur1); + cpu_gted2(ur2); + cpu_gted3(ur3); + cpu_gted4(ur4); + cpu_gted5(ur5); + // Quad RotTransPers3 + // Getting 2 cycles back thanks to nops + gte_rtpt(); + // gte_nclip() has 2 nops, lets use them to load the remaining vertex data from ram->cpu register + cpu_ldr(ur0,(ulong*)&vertPos[3].vx); + cpu_ldr(ur1,(ulong*)&vertPos[3].vz); + // Calculate nclip (outer product) + gte_nclip(); + // Copy result to d-cache + 3 + gte_stsxy3c(&dc_sxytbl[3]); + // Copy from cpu-gte + cpu_gted0(ur0); + cpu_gted1(ur1); + // Quad last vertex RotTransPers + // These two last cpu->gte load are free :p + gte_rtps(); + gte_nclip(); + // Copy last vertex value to d-cache + gte_stsxy(&dc_sxytbl[6]); + // Get p, flag, OTz + gte_stdp(&p); + gte_stflg(&flag); + gte_stszotz(&OTz); + // That's 10 cycles we won back ? + // Copy vertices data from d-cache to ram + // Tri + *(unsigned long long*)&poly->x0 = *(unsigned long long*)&dc_sxytbl[0]; + *(ulong*)&poly->x2 = *(ulong*)&dc_sxytbl[2]; + // Quad + *(unsigned long long*)&poly4->x0 = *(unsigned long long*)&dc_sxytbl[3]; + *(unsigned long long*)&poly4->x2 = *(unsigned long long*)&dc_sxytbl[5]; + // Initialize polygons + setPolyF3(poly); + setRGB0(poly, 255, 0, 255); + setPolyF4(poly4); + setRGB0(poly4, 0, 255, 255); + // Add to OT + addPrim(ot[db], poly); + addPrim(ot[db], poly4); + // Display text + FntPrint("Hello Free cycles !\n"); + FntFlush(-1); + + display(); + } + return 0; + }