gpu-0.0.5/0000755000175000017500000000000010772612062011624 5ustar tmbinctmbincgpu-0.0.5/vs.hlsl0000644000175000017500000000166510772605221013147 0ustar tmbinctmbincfloat4x4 modelView: register (c0); float4x3 modelWorld: register (c4); /* define the vs input. technically, there aren't restrictions on that, other than that all referenced semantics must be available in the vertex buffer format. if they are not, shader instantiation will fail. Neither the order nor the actual format does matter. Both will be fixed up on shader load anyway. */ struct Input { float4 vPos: POSITION; float4 vNormal: NORMAL; float4 vUV: TEXCOORD0; }; /* define the vs output and ps input. This *must* match the ps input, as we do not patch shaders to match. However, in the pixel shader, you can leave out oPos etc. */ struct Output { float4 oPos: POSITION; float3 oNormal: NORMAL; float4 oUV: TEXCOORD0; }; Output main(Input input) { Output output; output.oPos = mul(transpose(modelView), input.vPos); output.oNormal = mul(transpose(modelWorld), input.vNormal.xyz); output.oUV = input.vUV; return output; } gpu-0.0.5/engine.h0000644000175000017500000000563210760704664013257 0ustar tmbinctmbinc#ifndef __engine_h #define __engine_h #ifdef __cplusplus extern "C" { #endif #include /* first dumb try of an engine. just to test the GX functions. */ typedef float eMatrix43[3][4]; typedef float eMatrix44[4][4]; typedef float eMatrix33[3][3]; typedef float eMatrixProj[7]; typedef float eViewport[6]; typedef float eVector3[3]; void build_rot_matrix(eMatrix43 dst, float xrot, float yrot, float zrot); void multiply_matrix(eMatrix43 dst, eMatrix43 s1, eMatrix43 s2); void multiply_matrix_44(eMatrix44 dst, eMatrix44 s1, eMatrix44 s2); void build_proj_ortho(eMatrixProj proj, float t, float b, float l, float r, float n, float f); void build_proj_persp(eMatrixProj proj, float fovy, float aspect, float n, float f); void invert_matrix(eMatrix43 dst, eMatrix43 src); void multiply_vector(eVector3 dst, eMatrix43 mtx, eVector3 src); extern eMatrix43 matrix_stack[128]; extern int matrix_top; /* my stupid matrix stack */ void glLoadIdentity(); void glTranslate(float x, float y, float z); void glRotate(float angle, float x, float y, float z); void glScale(float x, float y, float z); void glMultMatrix(eMatrix43 matrix); void glLoadMatrix(eMatrix43 matrix); void glPushMatrix(); void glPopMatrix(); void gxLoad(); void gluLookAt(float eyex, float eyey, float eyez, float centerx, float centery, float centerz, float upx, float upy, float upz); /* now some stupid, unoptimized vector stuff */ extern inline void vec_zero(eVector3 vec) { vec[0] = 0; vec[1] = 0; vec[2] = 0; } extern inline void vec_copy(eVector3 dst, eVector3 src) { dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; } extern inline float vec_dot(eVector3 vec1, eVector3 vec2) { return vec1[0] * vec2[0] + vec1[1] * vec2[1] +vec1[2] * vec2[2]; } extern inline void vec_cross(eVector3 dst, eVector3 src1, eVector3 src2) { dst[0] = src1[1] * src2[2] - src1[2] * src2[1]; dst[1] = src1[2] * src2[0] - src1[0] * src2[2]; dst[2] = src1[0] * src2[1] - src1[1] * src2[0]; } extern inline float vec_abs(eVector3 vec) { return sqrtf(vec[0]*vec[0] + vec[1] * vec[1] + vec[2] * vec[2]); } extern inline void vec_normalize(eVector3 dst, eVector3 src) { float invlen = 1 / vec_abs(src); dst[0] = src[0] * invlen; dst[1] = src[1] * invlen; dst[2] = src[2] * invlen; } extern inline void vec_scale(eVector3 dst, eVector3 src, float scale) { dst[0] = src[0] * scale; dst[1] = src[1] * scale; dst[2] = src[2] * scale; } extern inline void vec_add(eVector3 dst, eVector3 src1, eVector3 src2) { dst[0] = src1[0] + src2[0]; dst[1] = src1[1] + src2[1]; dst[2] = src1[2] + src2[2]; } extern inline void vec_mac(eVector3 dst, eVector3 src1, eVector3 src2, float f) { dst[0] = src1[0] + src2[0] * f; dst[1] = src1[1] + src2[1] * f; dst[2] = src1[2] + src2[2] * f; } extern inline void vec_sub(eVector3 dst, eVector3 src1, eVector3 src2) { dst[0] = src1[0] - src2[0]; dst[1] = src1[1] - src2[1]; dst[2] = src1[2] - src2[2]; } #ifdef __cplusplus }; #endif #endif gpu-0.0.5/Makefile0000644000175000017500000000060610772605246013274 0ustar tmbinctmbincall: xexample screenshot XEXAMPLE_OBJS = ioremap.o xe.o xee.o engine.o xexample.o SCREENSHOT_OBJS = ioremap.o screenshot.o # xextex.o LDFLAGS = -lm CFLAGS = -Wall -g -O2 xexample: $(XEXAMPLE_OBJS) gcc $(XEXAMPLE_OBJS) -o $@ $(LDFLAGS) screenshot: $(SCREENSHOT_OBJS) gcc $(SCREENSHOT_OBJS) -o $@ $(LDFLAGS) -lpng clean: rm -f $(XEXAMPLE_OBJS) $(SCREENSHOT_OBJS) screenshot xexample gpu-0.0.5/engine.c0000644000175000017500000001533610760704633013250 0ustar tmbinctmbinc#include "engine.h" #include #include eMatrix43 matrix_stack[128]; int matrix_top; const eMatrix43 ident_matrix={{1,0,0,0},{0,1,0,0},{0,0,1,0}}; void build_rot_matrix(eMatrix43 d, float xrot, float yrot, float zrot) { xrot *= M_PI / 180.0; yrot *= M_PI / 180.0; zrot *= M_PI / 180.0; float sinp = sinf(xrot), sinh = sinf(yrot), sinb = sinf(zrot); float cosp = cosf(xrot), cosh = cosf(yrot), cosb = cosf(zrot); d[0][0]=(cosh*cosb+sinb*sinp*sinh); d[0][1]=(cosh*sinb-sinh*sinp*cosb); d[0][2]=(sinh*cosp); d[0][3]=0; d[1][0]=(-sinb*cosp); d[1][1]=(cosp*cosb); d[1][2]=(sinp); d[1][3]=0; d[2][0]=(-sinh*cosb+cosh*sinp*sinb); d[2][1]=(-cosh*sinp*cosb-sinh*sinb); d[2][2]=(cosh*cosp); d[2][3]=0; } #define FIX(x) if (fabs(x) < 1e-16) x = 0; void multiply_matrix(eMatrix43 d, eMatrix43 a, eMatrix43 b) { int i; for (i=0; i<3; ++i) { float ai0=a[i][0], ai1=a[i][1], ai2=a[i][2], ai3=a[i][3]; d[i][0]=ai0 * b[0][0] + ai1 * b[1][0] + ai2 * b[2][0]; d[i][1]=ai0 * b[0][1] + ai1 * b[1][1] + ai2 * b[2][1]; d[i][2]=ai0 * b[0][2] + ai1 * b[1][2] + ai2 * b[2][2]; d[i][3]=ai0 * b[0][3] + ai1 * b[1][3] + ai2 * b[2][3] + ai3; FIX(d[i][0]); FIX(d[i][1]); FIX(d[i][2]); FIX(d[i][3]); } } void multiply_matrix_notranslate(eMatrix43 d, eMatrix43 a, eMatrix43 b) { int i; for (i=0; i<3; ++i) { float ai0=a[i][0], ai1=a[i][1], ai2=a[i][2]; d[i][0]=ai0 * b[0][0] + ai1 * b[1][0] + ai2 * b[2][0]; d[i][1]=ai0 * b[0][1] + ai1 * b[1][1] + ai2 * b[2][1]; d[i][2]=ai0 * b[0][2] + ai1 * b[1][2] + ai2 * b[2][2]; d[i][3]=ai0 * b[0][3] + ai1 * b[1][3] + ai2 * b[2][3]; } } void multiply_matrix_44(eMatrix44 d, eMatrix44 a, eMatrix44 b) { int i; for (i=0; i<4; ++i) { float ai0=a[i][0], ai1=a[i][1], ai2=a[i][2], ai3=a[i][3]; d[i][0]=ai0 * b[0][0] + ai1 * b[1][0] + ai2 * b[2][0] + ai3 * b[3][0]; d[i][1]=ai0 * b[0][1] + ai1 * b[1][1] + ai2 * b[2][1] + ai3 * b[3][1]; d[i][2]=ai0 * b[0][2] + ai1 * b[1][2] + ai2 * b[2][2] + ai3 * b[3][2]; d[i][3]=ai0 * b[0][3] + ai1 * b[1][3] + ai2 * b[2][3] + ai3 * b[3][3]; } } void build_proj_ortho(eMatrixProj proj, float t, float b, float l, float r, float n, float f) { proj[0] = 2.0 / (r - l); proj[1] = -(r+l) / (r-l); proj[2] = 2.0 / (t-b); proj[3] = -(t+b)/(t-b); proj[4] = -1.0/(f-n); proj[5] = -(f)/(f-n); *(int*)(proj+6) = 1; // ortho } void build_proj_persp(eMatrixProj proj, float fovy, float aspect, float n, float f) { float cot = 1.0f / tanf(fovy * 0.5F); float tmp; proj[0] = cot / aspect; proj[1] = 0; proj[2] = cot; proj[3] = 0; tmp = 1.0f / (f-n); proj[4] = -n * tmp; proj[5] = -(f*n) * tmp; *(int*)(proj+6) = 0; // perspective } void invert_matrix(eMatrix43 dst, eMatrix43 src) { } void multiply_vector(eVector3 dst, eMatrix43 mtx, eVector3 src) { dst[0] = mtx[0][0] * src[0] + mtx[0][1] * src[1] + src[2] * mtx[0][2] + mtx[0][3]; dst[1] = mtx[1][0] * src[0] + mtx[1][1] * src[1] + src[2] * mtx[1][2] + mtx[1][3]; dst[2] = mtx[2][0] * src[0] + mtx[2][1] * src[1] + src[2] * mtx[2][2] + mtx[2][3]; } void glLoadIdentity() { memcpy(matrix_stack[matrix_top], ident_matrix, sizeof(eMatrix43)); } void glTranslate(float x, float y, float z) { eMatrix43 *m = &matrix_stack[matrix_top]; (*m)[0][3] += (*m)[0][0] * x + (*m)[0][1] * y + (*m)[0][2] * z; (*m)[1][3] += (*m)[1][0] * x + (*m)[1][1] * y + (*m)[1][2] * z; (*m)[2][3] += (*m)[2][0] * x + (*m)[2][1] * y + (*m)[2][2] * z; } void glRotate(float angle, float x, float y, float z) { float xx, yy, zz, xy, yz, zx, xs, ys, zs, one_c, s, c; int optimized = 0; eMatrix43 m; s = sin(angle * M_PI / 180.0); c = cos(angle * M_PI / 180.0); memcpy(m, ident_matrix, sizeof(eMatrix43)); if (x == 0.0F) { if (y == 0.0F) { if (z != 0.0F) { optimized = 1; m[0][0] = c; m[1][1] = c; if (z < 0.0F) { m[0][1] = s; m[1][0] = -s; } else { m[0][1] = -s; m[1][0] = s; } } } else if (z == 0.0F) { optimized = 1; m[0][0] = c; m[2][2] = c; if (y < 0.0F) { m[0][2] = -s; m[2][0] = s; } else { m[0][2] = s; m[2][0] = -s; } } } else if (y == 0.0F) { if (z == 0.0F) { optimized = 1; m[1][1] = c; m[2][2] = c; if (x < 0.0F) { m[1][2] = s; m[2][1] = -s; } else { m[1][2] = -s; m[2][1] = s; } } } if (!optimized) { const float mag = sqrtf(x * x + y * y + z * z); if (mag <= 1.0e-4) return; x /= mag; y /= mag; z /= mag; xx = x * x; yy = y * y; zz = z * z; xy = x * y; yz = y * z; zx = z * x; xs = x * s; ys = y * s; zs = z * s; one_c = 1.0F - c; m[0][0] = one_c * xx + c; m[0][1] = one_c * xy - zs; m[0][2] = one_c * zx + ys; m[1][0] = one_c * xy + zs; m[1][1] = one_c * yy + c; m[1][2] = one_c * yz - xs; m[2][0] = one_c * zx - ys; m[2][1] = one_c * yz + xs; m[2][2] = one_c * zz + c; } multiply_matrix(matrix_stack[matrix_top], matrix_stack[matrix_top], m); } void glScale(float x, float y, float z) { matrix_stack[matrix_top][0][0] *= x; matrix_stack[matrix_top][1][0] *= x; matrix_stack[matrix_top][2][0] *= x; matrix_stack[matrix_top][0][1] *= y; matrix_stack[matrix_top][1][1] *= y; matrix_stack[matrix_top][2][1] *= y; matrix_stack[matrix_top][0][2] *= z; matrix_stack[matrix_top][1][2] *= z; matrix_stack[matrix_top][2][2] *= z; } void glMultMatrix(eMatrix43 matrix) { multiply_matrix(matrix_stack[matrix_top], matrix_stack[matrix_top], matrix); } void glLoadMatrix(eMatrix43 matrix) { memcpy(matrix_stack[matrix_top], matrix, sizeof(eMatrix43)); } void gluLookAt(float eyex, float eyey, float eyez, float centerx, float centery, float centerz, float upx, float upy, float upz) { eMatrix43 m; float x[3], y[3], z[3], mag; z[0] = eyex - centerx; z[1] = eyey - centery; z[2] = eyez - centerz; mag = sqrtf(z[0] * z[0] + z[1] * z[1] + z[2] * z[2]); if (mag) { z[0] /= mag; z[1] /= mag; z[2] /= mag; } y[0] = upx; y[1] = upy; y[2] = upz; x[0] = y[1] * z[2] - y[2] * z[1]; x[1] = -y[0] * z[2] + y[2] * z[0]; x[2] = y[0] * z[1] - y[1] * z[0]; y[0] = z[1] * x[2] - z[2] * x[1]; y[1] = -z[0] * x[2] + z[2] * x[0]; y[2] = z[0] * x[1] - z[1] * x[0]; mag = sqrtf(x[0] * x[0] + x[1] * x[1] + x[2] * x[2]); if (mag) { x[0] /= mag; x[1] /= mag; x[2] /= mag; } mag = sqrtf(y[0] * y[0] + y[1] * y[1] + y[2] * y[2]); if (mag) { y[0] /= mag; y[1] /= mag; y[2] /= mag; } m[0][0] = x[0]; m[0][1] = x[1]; m[0][2] = x[2]; m[0][3] = 0; m[1][0] = y[0]; m[1][1] = y[1]; m[1][2] = y[2]; m[1][3] = 0; m[2][0] = z[0]; m[2][1] = z[1]; m[2][2] = z[2]; m[2][3] = 0; glMultMatrix(m); glTranslate(-eyex, -eyey, -eyez); } void glPushMatrix() { ++matrix_top; memcpy(matrix_stack + matrix_top, matrix_stack + matrix_top - 1, sizeof(eMatrix43)); } void glPopMatrix() { --matrix_top; } gpu-0.0.5/readme.txt0000644000175000017500000003514710772612062013634 0ustar tmbinctmbincXenos 3D Library, Version 0.0.5 This code resembles a library to use the 3D functionality of the 'xenos' chip in the Xbox 360. It was written from scratch, and contains no microsoft code. All information were reverse engineered from either code or ringbuffer dumps. No chip documentation was available. I believe to not violate any patents or laws with the creation and use of this work. However, your local laws might tell you something different. Be careful! HOW TO START: Please take a look at xexample.c. It's commented, and displays a simple spinning cube with texture. Also, you might want to look at xextex.c, which contains a 16bit texture, stencil buffer operations and makes optionally use of alphablending. MICROCODES: Due to legal reasons, two required microcode files (ucode0.bin, ucode1.bin) could not be included in this package. However, a seperately released tool called "romextract" extracts them from the kernel stored in the flashrom. You need those two files to actually run the demonstration program. LICENSE: This code is, for now, licensed for non-commercial use only. If you prefer another license, contact me. Whatever. Take care about the gl/glu ripped parts. In doubt, remove them first. This code currently runs on linux. It requires access to /dev/mem, so running it as root is usually required. It communicates directly with the graphic hardware, so no userspace library is required. PERFORMANCE NOTE: However, mmap'ing /dev/mem maps the memory as uncached. This is not changeable - O_SYNC only forces uncached mapping when mapping memory below the memory limit (which is 0x1e000000). So we are out of luck. Anyway, you can patch the kernel to remap memory cached, or even better, write combined. This library will use proper cache instructions, so modify the kernel in any way you like, all three kinds of memory should work (uncached, cached, write combined). Write-combined memory is definitely preferred, cached memory is also ok, uncached memory is SLOW (by a factor of about 100x. yes.) The only critical data are command buffer data, and while using uncached memory is damn slow, the command buffers are not *that* big. But depending on the amount of data, you can get a nice performance gain my modifying the memory mapping to a proper type. For a start, the default mapping is fine. Just don't expect a good performance. Do you need more space for textures etc.? Unless you want to write a physical memory allocator, just boot linux with less ram (for example, 128M). Then, adjust the "RINGBUFFER_START" in xe.c. So, how does it work? We have a standard 3D acceleration device. Nothing is really too special. If you know how GPUs work, then you know how the 360 GPU works. Fortunately, we have a GPU which doesn't provide much fixed-functions. Most stuff is done in vertex and pixel shaders. Some stuff, however, is still hardcoded, like blending, z/stencil-compare. The typical sequence to render a frame is: for each model: calculate and load shader constants ("matrices") for each renderstate: setup renderstate for each mesh using this renderstate: draw mesh resolve to framebuffer while clearing EDRAM The xenos GPU can only render to the embedded framebuffer (EDRAM). The EDRAM contains both the framebuffer as well as the z-buffer. After a frame has been rendered, it must be "resolved" into the main RAM to be displayed. While resolving the EDRAM is also cleared with a constant color (and constant Z value), so it's ready for the next frame. All data accessed by the GPU must lie in physical memory. Currently, we don't have a defined kernel interface, so we just use the memory at 0x1e000000 upwards, which is currently reserved (it contains the framebuffer, but nothing more). The xenos works with a continous stream of commands ("command buffer"). Commands are usually just register writes, but are processed automatically. A command buffer can call an "indirect buffer" (or sub command-buffer), which is used to automatically enqueue commands, even though the main command buffer ringsize is limited to 32kb. Then you need shaders. As I don't dare to write a shader assembler or even compiler (beware!), we just use Microsoft's XNA to do the job. Tser wrote a wrapper for the provided native functionality, so you can compile your HLSL shaders using his tool (you need XNA installed - but you don't need XNA on the xbox, don't worry). MSAA doesn't work yet. So does Alphatest. The API was made to resemble the hardware as close as possible, while still maintaining a useful abstraction level. The hardware resembles Direct3D - so my API does as well. No, this was not done because the original Xbox software uses D3D. API DOC: (haha) struct XenosShader resembles a shader. A vertexshader is bound to the vertex buffer format, that's why you might need more than one "instance" for a vertex shader (if you want to use different VBFs with the same shader). That's where the instances are for - you load the shader once, then instantiate it for every VBF, then you apply the vfetch patches. Look into the example. Pixel shader don't need that. Just always use instance 0 here. struct XenosVBFElement and struct XenosVBFFormat describe a vertex buffer format, to be used when applying vfetch patches. struct XenosSurface describes a surface, be it a texture or a render target. pitch is the line distance in pixel. format is one of the XE_FMT_* stuff, possibly with an endianess modifier. Use "Xe_Create_Surface" to allocate a surface, or "Xe_GetFramebufferSurface" to get the framebuffer surface. A surface can be locked, if you want access to the data in it. See the "xextex.c" example. You can also lock a surface for reading after resolving into it. Make sure that you Xe_Sync before, otherwise not all content might be rendered/written to memory. struct XenosVertexBuffer describes a vertexbuffer, to be passed to DrawPrimitive / DrawIndexedPrimitive. Create with Xe_CreateVertexBuffer. struct XenosIndexBuffer desribes an index buffer. Can be either 16 or 32bit indices. struct XenosDevice describes an opaque structure you should never need to mess around with. Unless you have a very good reason, use the functions. void Xe_Init(struct XenosDevice *xe); Initializes the hardware. void Xe_SetRenderTarget(struct XenosDevice *xe, struct XenosSurface *rt); Set render target for the next resolve. Also sets viewport size. void Xe_Resolve(struct XenosDevice *xe); Resolve the edram color buffer into memory, and clear the framebuffer. void Xe_ResolveInto(struct XenosDevice *xe, struct XenosSurface *surface, int source, int clear); Resolve into a surface, with some more control. Source can be XE_SOURCE_COLOR (for copying the color buffer), or XE_SOURCE_DS (for copying the depth/stencil buffer). You can selectively clear the color- or depth/stencil-buffer with XE_CLEAR_COLOR and XE_CLEAR_DS. void Xe_Clear(struct XenosDevice *xe, int flags); Clears the edram, either color, depth/stencil or both. struct XenosSurface *Xe_GetFramebufferSurface(struct XenosDevice *xe); Gets a pointer to the surface describing the framebuffer. You should set this as a rendertarget, unless of course you don't want. void Xe_Execute(struct XenosDevice *xe); Kicks the GPU. void Xe_Sync(struct XenosDevice *xe); Kicks the GPU, and waits until it executed all stuff queued up before. void Xe_SetClearColor(struct XenosDevice *xe, u32 clearcolor); Set the color to clear on resolve / clear. void Xe_DirtyAluConstant(struct XenosDevice *xe, int base, int len); In case you directly overwrite a shader constant in the XenosDevice-struct, you need to flag them dirty. Or just use "SetShaderConstant" to make things simpler. void Xe_DirtyFetch(struct XenosDevice *xe, int base, int len); Same for fetch constants. You shouldn't mess around with them, though. struct XenosShader *Xe_LoadShader(struct XenosDevice *xe, const char *filename); Loads a shader from disk. struct XenosShader *Xe_LoadShaderFromMemory(struct XenosDevice *xe, void *shader); Loads a shader from memory. Please not that the memory belongs to the device afterwards - don't clear/free it! void Xe_InstantiateShader(struct XenosDevice *xe, struct XenosShader *sh, unsigned int index); "Instantiate" a shader, i.e. copy it to physical memory. int Xe_GetShaderLength(struct XenosDevice *xe, void *sh); If you need, you can calculate a shader length with this call. void Xe_ShaderApplyVFetchPatches(struct XenosDevice *xe, struct XenosShader *sh, unsigned int index, const struct XenosVBFFormat *fmt); Important. You need to fixup vertex shader to your used vertexbuffer format. See the examples. int Xe_VBFCalcStride(struct XenosDevice *xe, const struct XenosVBFFormat *fmt); int Xe_VBFCalcSize(struct XenosDevice *xe, const struct XenosVBFElement *fmt); Some helper functions if you mess around with VBFs. The following renderstate functions will try to resemble d3d. Please see there for details. void Xe_SetZFunc(struct XenosDevice *xe, int z_func); Set the Z compare function. void Xe_SetZWrite(struct XenosDevice *xe, int zw); Enable or disable Z write. void Xe_SetZEnable(struct XenosDevice *xe, int zw); Enable or disable Z compare. void Xe_SetFillMode(struct XenosDevice *xe, int front, int back); Set the fill mode for front- and backfacing geometry (point, line, solid, ...). void Xe_SetBlendControl(struct XenosDevice *xe, int col_src, int col_op, int col_dst, int alpha_src, int alpha_op, int alpha_dst); Set all blend controls at once. void Xe_SetSrcBlend(struct XenosDevice *xe, unsigned int blend); Set the source blend factor. void Xe_SetDestBlend(struct XenosDevice *xe, unsigned int blend); Set the dest blend factor. void Xe_SetBlendOp(struct XenosDevice *xe, unsigned int blendop); Set the blend operation. void Xe_SetSrcBlendAlpha(struct XenosDevice *xe, unsigned int blend); void Xe_SetDestBlendAlpha(struct XenosDevice *xe, unsigned int blend); void Xe_SetBlendOpAlpha(struct XenosDevice *xe, unsigned int blendop); same, just for alpha. void Xe_SetCullMode(struct XenosDevice *xe, unsigned int cullmode); Set the cullmode. If you select CW, the definition of front and backfaces will be inverted. void Xe_SetAlphaTestEnable(struct XenosDevice *xe, int enable); void Xe_SetAlphaFunc(struct XenosDevice *xe, unsigned int func); void Xe_SetAlphaRef(struct XenosDevice *xe, float alpharef); This doesn't work yet. /* bfff is a bitfield {backface,frontface} */ void Xe_SetStencilEnable(struct XenosDevice *xe, unsigned int enable); Enable or disable stencil operation. you need this for both stencil compare and stencil modification. void Xe_SetStencilFunc(struct XenosDevice *xe, int bfff, unsigned int func); Set the stencil compare function. bfff is a bitfield: 1 for front facing polys, 2 for back facing polys, so you can set that individually. /* -1 to leave old value */ void Xe_SetStencilOp(struct XenosDevice *xe, int bfff, int fail, int zfail, int pass); Set the stencil operation, for stencil-failed polys, z-failed polys, and passed polys. void Xe_SetStencilRef(struct XenosDevice *xe, int bfff, int ref); Set the stencil compare reference value. void Xe_SetStencilMask(struct XenosDevice *xe, int bfff, int mask); Set the stencil compare mask. void Xe_SetStencilWriteMask(struct XenosDevice *xe, int bfff, int writemask); Set the stencil write/modify mask. void Xe_InvalidateState(struct XenosDevice *xe); Should be called at the beginning of each frame - sets all renderstates to reasonable defaults. void Xe_SetShader(struct XenosDevice *xe, int type, struct XenosShader *sh, int instance); Set a shader to use, either the pixel or the vertex shader, with the given instance (for vs). void Xe_SetTexture(struct XenosDevice *xe, int index, struct XenosSurface *tex); Set a texture to be used in a sampler. struct XenosVertexBuffer *Xe_VBPoolAlloc(struct XenosDevice *xe, int size); void Xe_VBPoolAdd(struct XenosDevice *xe, struct XenosVertexBuffer *vb); void Xe_VBReclaim(struct XenosDevice *xe); void Xe_VBBegin(struct XenosDevice *xe, int pitch); /* pitch, len is nr of vertices */ void Xe_VBPut(struct XenosDevice *xe, void *data, int len); struct XenosVertexBuffer *Xe_VBEnd(struct XenosDevice *xe); void Xe_Draw(struct XenosDevice *xe, struct XenosVertexBuffer *vb, struct XenosIndexBuffer *ib); You should not use these functions, unless you want dynamic vertexbuffer the ugly way. void Xe_SetIndices(struct XenosDevice *de, struct XenosIndexBuffer *ib); Set the indexbuffer to use for Xe_DrawIndexedPrimitive. void Xe_SetStreamSource(struct XenosDevice *xe, int index, struct XenosVertexBuffer *vb, int offset, int stride); Set the vertexbuffer to use for Xe_Draw[Indexed]Primitive. If you want, you can specify an offset. Stride is ignored so far. void Xe_DrawIndexedPrimitive(struct XenosDevice *xe, int type, int base_index, int min_index, int num_vertices, int start_index, int primitive_count); void Xe_DrawPrimitive(struct XenosDevice *xe, int type, int start, int primitive_count); Draw primitives, either with indexbuffer or without. struct XenosIndexBuffer *Xe_CreateIndexBuffer(struct XenosDevice *xe, int length, int format); Create an indexbuffer with the given length and format. struct XenosVertexBuffer *Xe_CreateVertexBuffer(struct XenosDevice *xe, int length); Create a vertexbuffer with the given length. void *Xe_VB_Lock(struct XenosDevice *xe, struct XenosVertexBuffer *vb, int offset, int size, int flags); void *Xe_IB_Lock(struct XenosDevice *xe, struct XenosIndexBuffer *ib, int offset, int size, int flags); void *Xe_Surface_LockRect(struct XenosDevice *xe, struct XenosSurface *surface, int x, int y, int w, int h, int flags); Lock a vertex/indexbuffer/surface. Use flags=XE_LOCK_WRITE if you want to write, or XE_LOCK_READ for reading. void Xe_VB_Unlock(struct XenosDevice *xe, struct XenosVertexBuffer *vb); void Xe_IB_Unlock(struct XenosDevice *xe, struct XenosIndexBuffer *ib); void Xe_Surface_Unlock(struct XenosDevice *xe, struct XenosSurface *surface); After locking, you need to unlock. This will automatically flush the CPU cache. void Xe_SetVertexShaderConstantF(struct XenosDevice *xe, int start, const float *data, int count); /* count = number of 4 floats */ void Xe_SetPixelShaderConstantF(struct XenosDevice *xe, int start, const float *data, int count); /* count = number of 4 floats */ Set a pixel/vertex shader constant. struct XenosSurface *Xe_CreateTexture(struct XenosDevice *xe, unsigned int width, unsigned int height, unsigned int levels, int format, int tiled); Create a surface in physical memory. CHANGELOG: 0.0.1: * initial pre-release 0.0.2: * new example, much cleanup 0.0.3: * load ucodes from disk 0.0.5: * major API overhaul * some additional features * many, many bugfixes All the stuff (except when noted otherwise) was written by me, Felix Domke gpu-0.0.5/screenshot.c0000644000175000017500000000343510772605153014155 0ustar tmbinctmbinc#include #include #include #include #include #define r32(o) regs[(o)/4] volatile void * ioremap(unsigned long physaddr, unsigned size, int sync); int main(int argc, char **argv) { if (argc != 2) { fprintf(stderr, "usage: %s \n", *argv); return 1; } volatile unsigned int *regs = ioremap(0xec800000ULL, 0x20000, 1); if (!regs) { fprintf(stderr, "ioremap failed - %m"); return 1; } int ptr = r32(0x6110); int pitch = r32(0x6120) * 4; int width = r32(0x6134); int height = r32(0x6138); volatile unsigned int *screen = (void*)ioremap(ptr, height * pitch, 0); unsigned int screen2[width * height]; png_bytep row_pointers[height]; int y, x; for (y=0; y> 8); } row_pointers[y] = screen2 + y * width; } FILE *outfp = fopen(argv[1], "wb"); if (!outfp) { perror(argv[1]); return 2; } png_structp png_ptr_w = png_create_write_struct(PNG_LIBPNG_VER_STRING, 0, 0, 0); png_infop info_ptr_w = png_create_info_struct(png_ptr_w); if (setjmp(png_jmpbuf(png_ptr_w))) { png_destroy_write_struct(&png_ptr_w, &info_ptr_w); fclose(outfp); return 1; } png_init_io(png_ptr_w, outfp); png_set_IHDR(png_ptr_w, info_ptr_w, width, height, 8, PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT); png_set_rows(png_ptr_w, info_ptr_w, row_pointers); png_write_png(png_ptr_w, info_ptr_w, PNG_TRANSFORM_IDENTITY, 0); png_write_end(png_ptr_w, info_ptr_w); png_destroy_write_struct(&png_ptr_w, &info_ptr_w); fclose(outfp); return 0; } gpu-0.0.5/xee.c0000644000175000017500000000271410772606322012557 0ustar tmbinctmbinc#include "xee.h" #include #include #include void M_Load44(struct XenosDevice *xe, int base, eMatrix44 *matrix) { Xe_SetVertexShaderConstantF(xe, base, (float*)matrix, 4); } void M_Load43(struct XenosDevice *xe, int base, eMatrix43 *matrix) { Xe_SetVertexShaderConstantF(xe, base, (float*)matrix, 3); } const eMatrix44 g_ident = {{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}}; void M_BuildPersp(eMatrix44 *m, float fovy, float aspect, float f, float n) { float cot = 1.0f / tanf(fovy * 0.5F); float tmp = 1.0f / (f-n); eMatrix44 _m = { {cot / aspect, 0, 0, 0}, {0, cot, 0, 0}, {0, 0, -n * tmp, 1}, {0, 0, -(f*n) * tmp, 0}}; memcpy(m, _m, sizeof(_m)); } void M_Dump(const char *name, eMatrix44 *m) { int i, j; printf("-- %s:\n", name); for (i=0; i<4; ++i) { for (j=0; j<4; ++j) printf("%3.3f ", (*m)[i][j]); printf("\n"); } } eMatrix44 g_proj; void M_LoadMV(struct XenosDevice *xe, int where) { eMatrix44 res, worldview; memcpy(worldview, g_ident, sizeof(eMatrix44)); memcpy(worldview, &matrix_stack[matrix_top], sizeof(eMatrix43)); multiply_matrix_44(res, g_proj, worldview); res[0][3] = -res[0][3]; res[1][3] = -res[1][3]; res[2][3] = -res[2][3]; res[3][3] = -res[3][3]; M_Load44(xe, where, &res); } void M_LoadMW(struct XenosDevice *xe, int where) { eMatrix44 world; memcpy(world, g_ident, sizeof(eMatrix44)); memcpy(world, &matrix_stack[matrix_top], sizeof(eMatrix43)); M_Load44(xe, where, world); } gpu-0.0.5/xee.h0000644000175000017500000000106510770070615012560 0ustar tmbinctmbinc#ifndef __xee_h #define __xee_h #ifdef __cplusplus extern "C" { #endif #include "xe.h" #include "engine.h" void M_Load44(struct XenosDevice *xe, int base, eMatrix44 *matrix); void M_Load43(struct XenosDevice *xe, int base, eMatrix43 *matrix); extern const eMatrix44 g_ident; void M_BuildPersp(eMatrix44 *m, float fovy, float aspect, float f, float n); void M_Dump(const char *name, eMatrix44 *m); extern eMatrix44 g_proj; void M_LoadMV(struct XenosDevice *xe, int where); void M_LoadMW(struct XenosDevice *xe, int where); #ifdef __cplusplus }; #endif #endif gpu-0.0.5/xe.h0000644000175000017500000002655610771541172012431 0ustar tmbinctmbinc#ifndef __xe_h #define __xe_h #ifdef __cplusplus extern "C" { #endif #define SHADER_TYPE_PIXEL 1 #define SHADER_TYPE_VERTEX 0 #define XE_PRIMTYPE_POINTLIST 1 #define XE_PRIMTYPE_LINELIST 2 #define XE_PRIMTYPE_LINESTRIP 3 #define XE_PRIMTYPE_TRIANGLELIST 4 #define XE_PRIMTYPE_TRIANGLESTRIP 5 #define XE_PRIMTYPE_TRIANGLEFAN 6 #define XE_PRIMTYPE_RECTLIST 8 #define XE_PRIMTYPE_QUADLIST 13 #define XE_CMP_NEVER 0 #define XE_CMP_LESS 1 #define XE_CMP_EQUAL 2 #define XE_CMP_LESSEQUAL 3 #define XE_CMP_GREATER 4 #define XE_CMP_NOTEQUAL 5 #define XE_CMP_GREATEREQUAL 6 #define XE_CMP_ALWAYS 7 #define XE_BLEND_ZERO 0 #define XE_BLEND_ONE 1 #define XE_BLEND_SRCCOLOR 4 #define XE_BLEND_INVSRCCOLOR 5 #define XE_BLEND_SRCALPHA 6 #define XE_BLEND_INVSRCALPHA 7 #define XE_BLEND_DESTCOLOR 8 #define XE_BLEND_INVDESTCOLOR 9 #define XE_BLEND_DESTALPHA 10 #define XE_BLEND_INVDESTALPHA 11 #define XE_BLEND_BLENDFACTOR 12 #define XE_BLEND_INVBLENDFACTOR 13 #define XE_BLEND_CONSTANTALPHA 14 #define XE_BLEND_INVCONSTANTALPHA 15 #define XE_BLEND_SRCALPHASAT 16 #define XE_CULL_NONE 0 #define XE_CULL_CW 2 #define XE_CULL_CCW 6 #define XE_BLENDOP_ADD 0 #define XE_BLENDOP_SUBTRACT 1 #define XE_BLENDOP_REVSUBTRACT 4 #define XE_BLENDOP_MIN 2 #define XE_BLENDOP_MAX 3 #define XE_STENCILOP_KEEP 0 #define XE_STENCILOP_ZERO 1 #define XE_STENCILOP_REPLACE 2 #define XE_STENCILOP_INCRSAT 3 #define XE_STENCILOP_DECRSAT 4 #define XE_STENCILOP_INVERT 5 #define XE_STENCILOP_INCR 6 #define XE_STENCILOP_DECR 7 typedef unsigned int u32; typedef unsigned short u16; struct XenosLock { void *start; u32 phys; int size; int flags; }; #define XE_SHADER_MAX_INSTANCES 16 struct XenosShader { void *shader; u32 size; /* we might need more than once instance if we want to use a shader with different VBFs */ u32 shader_phys[XE_SHADER_MAX_INSTANCES], shader_phys_size, program_control, context_misc; void *shader_instance[XE_SHADER_MAX_INSTANCES]; }; /* the shader file format */ struct XenosShaderHeader { u32 magic; u32 offset; u32 _[3]; u32 off_constants, off_shader; }; struct XenosShaderData { u32 sh_off, sh_size; u32 program_control, context_misc; u32 _[2]; }; struct XenosShaderVertex { u32 cnt0, cnt_vfetch, cnt2; }; #define SWIZZLE_XYZW 0x688 #define SWIZZLE_XYZ1 0xA88 // 101 010 001 000 #define SWIZZLE_XY01 0xA08 // 101 000 001 000 #define SWIZZLE_XY__ 0xFC8 // 111 111 001 000 #define SWIZZLE_XYZ_ 0xEC8 // 111 010 001 000 #define SWIZZLE_XYZ0 0x0C8 // 000 010 001 000 #define SWIZZLE_XY0_ 0xE08 // 111 000 001 000 /* each vertex buffer element fills FOUR FLOAT components. the 'usage' specifies which of them (position, color, texuv, ..) the 'fmt' specified in which form they lie in memory. if you specify float3, the remaining component will be filled up with the 0 or 1, according to the swizzling. */ #define XE_TYPE_FLOAT2 37 #define XE_TYPE_FLOAT3 57 #define XE_TYPE_FLOAT4 38 #define XE_TYPE_UBYTE4 6 /* the usage must match the shader */ #define XE_USAGE_POSITION 0 #define XE_USAGE_BLENDWEIGHTS 1 #define XE_USAGE_BLENDINDICES 2 #define XE_USAGE_NORMAL 3 #define XE_USAGE_PSIZE 4 #define XE_USAGE_TEXCOORD 5 #define XE_USAGE_TANGENT 6 #define XE_USAGE_BINORMAL 7 #define XE_USAGE_TESSFACTOR 8 #define XE_USAGE_POSITIONT 9 #define XE_USAGE_COLOR 10 #define XE_USAGE_FOG 11 #define XE_USAGE_DEPTH 12 #define XE_USAGE_SAMPLE 13 /* texture formats */ #define XE_FMT_MASK 0x3F #define XE_FMT_8888 6 #define XE_FMT_16161616 26 #define XE_FMT_ARGB 0x80 #define XE_FMT_BGRA 0x00 #define XE_FMT_16BE 0x40 struct XenosVBFElement { int usage; /* XE_USAGE */ int index; int fmt; /* XE_TYPE */ }; struct XenosVBFFormat { int num; struct XenosVBFElement e[10]; }; struct XenosSurface { int width, height, pitch, tiled, format; u32 ptr, ptr_mip; int bypp; void *base; struct XenosLock lock; }; struct XenosVertexBuffer { u32 phys_base; int vertices; int size, space; /* in DWORDs */ void *base; struct XenosLock lock; struct XenosVertexBuffer *next; }; #define XE_FMT_INDEX16 0 #define XE_FMT_INDEX32 1 struct XenosIndexBuffer { u32 phys_base; int indices; /* actual size, in indices */ int size; /* in bytes */ void *base; int fmt; /* 0 for 16bit, 1 for 32bit */ struct XenosLock lock; }; struct XenosDevice { float alu_constants[256 * 4 * 2]; u32 fetch_constants[96 * 2]; u32 alu_dirty; /* 16 * 4 constants per bit */ u32 fetch_dirty; /* 3 * 2 per bit */ float clipplane[6*4]; u32 integer_constants[10*4]; u32 controlpacket[9], stencildata[2]; unsigned int alpharef; // should be moved into state struct XenosShader *vs, *ps; int vs_index; #define DIRTY_ALU 0x0001 #define DIRTY_FETCH 0x0002 #define DIRTY_CLIP 0x0004 #define DRITY_INTEGER 0x0008 #define DIRTY_CONTROL 0x0010 #define DIRTY_SHADER 0x0020 #define DIRTY_MISC 0x0040 int dirty; /* private */ u32 rb_secondary_base; volatile void *rb, *rb_primary, *rb_secondary; int rb_primary_wptr, rb_secondary_wptr; int rb_secondary_boundary; volatile unsigned int *regs; u32 ucode0[0x120], ucode1[0x900]; struct XenosSurface tex_fb; struct XenosSurface *rt; int alloc_ptr; int last_wptr; int vp_xres, vp_yres; int frameidx; u32 clearcolor; int msaa_samples; struct XenosVertexBuffer *vb_current, *vb_head; int vb_current_pitch; struct XenosVertexBuffer *vb_pool; struct XenosVertexBuffer *vb_pool_after_frame; int tris_drawn; struct XenosIndexBuffer *current_ib; struct XenosVertexBuffer *current_vb; int edram_colorformat, edram_depthbase, edram_color0base, edram_hizpitch, edram_pitch; }; void Xe_Init(struct XenosDevice *xe); void __attribute__((noreturn)) Xe_Fatal(struct XenosDevice *xe, const char *fmt, ...); void Xe_SetRenderTarget(struct XenosDevice *xe, struct XenosSurface *rt); void Xe_Resolve(struct XenosDevice *xe); #define XE_SOURCE_COLOR 0 #define XE_SOURCE_DS 4 #define XE_CLEAR_COLOR 1 #define XE_CLEAR_DS 2 void Xe_ResolveInto(struct XenosDevice *xe, struct XenosSurface *surface, int source, int clear); /* Xe_Clear always clears the complete rendertarget. No excuses. If you want arbitrary targets, use traditional draw. (reason: resolve cannot handle arbitrary shapes) */ void Xe_Clear(struct XenosDevice *xe, int flags); struct XenosSurface *Xe_GetFramebufferSurface(struct XenosDevice *xe); void Xe_Execute(struct XenosDevice *xe); void Xe_Sync(struct XenosDevice *xe); void Xe_SetClearColor(struct XenosDevice *xe, u32 clearcolor); void Xe_DirtyAluConstant(struct XenosDevice *xe, int base, int len); void Xe_DirtyFetch(struct XenosDevice *xe, int base, int len); struct XenosShader *Xe_LoadShader(struct XenosDevice *xe, const char *filename); struct XenosShader *Xe_LoadShaderFromMemory(struct XenosDevice *xe, void *shader); void Xe_InstantiateShader(struct XenosDevice *xe, struct XenosShader *sh, unsigned int index); int Xe_GetShaderLength(struct XenosDevice *xe, void *sh); void Xe_ShaderApplyVFetchPatches(struct XenosDevice *xe, struct XenosShader *sh, unsigned int index, const struct XenosVBFFormat *fmt); int Xe_VBFCalcStride(struct XenosDevice *xe, const struct XenosVBFFormat *fmt); int Xe_VBFCalcSize(struct XenosDevice *xe, const struct XenosVBFElement *fmt); void Xe_SetZFunc(struct XenosDevice *xe, int z_func); void Xe_SetZWrite(struct XenosDevice *xe, int zw); void Xe_SetZEnable(struct XenosDevice *xe, int zw); void Xe_SetFillMode(struct XenosDevice *xe, int front, int back); void Xe_SetBlendControl(struct XenosDevice *xe, int col_src, int col_op, int col_dst, int alpha_src, int alpha_op, int alpha_dst); void Xe_SetSrcBlend(struct XenosDevice *xe, unsigned int blend); void Xe_SetDestBlend(struct XenosDevice *xe, unsigned int blend); void Xe_SetBlendOp(struct XenosDevice *xe, unsigned int blendop); void Xe_SetSrcBlendAlpha(struct XenosDevice *xe, unsigned int blend); void Xe_SetDestBlendAlpha(struct XenosDevice *xe, unsigned int blend); void Xe_SetBlendOpAlpha(struct XenosDevice *xe, unsigned int blendop); void Xe_SetCullMode(struct XenosDevice *xe, unsigned int cullmode); void Xe_SetAlphaTestEnable(struct XenosDevice *xe, int enable); void Xe_SetAlphaFunc(struct XenosDevice *xe, unsigned int func); void Xe_SetAlphaRef(struct XenosDevice *xe, float alpharef); /* bfff is a bitfield {backface,frontface} */ void Xe_SetStencilEnable(struct XenosDevice *xe, unsigned int enable); void Xe_SetStencilFunc(struct XenosDevice *xe, int bfff, unsigned int func); /* -1 to leave old value */ void Xe_SetStencilOp(struct XenosDevice *xe, int bfff, int fail, int zfail, int pass); void Xe_SetStencilRef(struct XenosDevice *xe, int bfff, int ref); void Xe_SetStencilMask(struct XenosDevice *xe, int bfff, int mask); void Xe_SetStencilWriteMask(struct XenosDevice *xe, int bfff, int writemask); void Xe_InvalidateState(struct XenosDevice *xe); void Xe_SetShader(struct XenosDevice *xe, int type, struct XenosShader *sh, int instance); void Xe_SetTexture(struct XenosDevice *xe, int index, struct XenosSurface *tex); struct XenosVertexBuffer *Xe_VBPoolAlloc(struct XenosDevice *xe, int size); void Xe_VBPoolAdd(struct XenosDevice *xe, struct XenosVertexBuffer *vb); void Xe_VBReclaim(struct XenosDevice *xe); void Xe_VBBegin(struct XenosDevice *xe, int pitch); /* pitch, len is nr of vertices */ void Xe_VBPut(struct XenosDevice *xe, void *data, int len); struct XenosVertexBuffer *Xe_VBEnd(struct XenosDevice *xe); void Xe_Draw(struct XenosDevice *xe, struct XenosVertexBuffer *vb, struct XenosIndexBuffer *ib); void Xe_SetIndices(struct XenosDevice *de, struct XenosIndexBuffer *ib); void Xe_DrawIndexedPrimitive(struct XenosDevice *xe, int type, int base_index, int min_index, int num_vertices, int start_index, int primitive_count); void Xe_DrawPrimitive(struct XenosDevice *xe, int type, int start, int primitive_count); void Xe_SetStreamSource(struct XenosDevice *xe, int index, struct XenosVertexBuffer *vb, int offset, int stride); struct XenosIndexBuffer *Xe_CreateIndexBuffer(struct XenosDevice *xe, int length, int format); struct XenosVertexBuffer *Xe_CreateVertexBuffer(struct XenosDevice *xe, int length); #define XE_LOCK_READ 1 #define XE_LOCK_WRITE 2 void *Xe_VB_Lock(struct XenosDevice *xe, struct XenosVertexBuffer *vb, int offset, int size, int flags); void Xe_VB_Unlock(struct XenosDevice *xe, struct XenosVertexBuffer *vb); void *Xe_IB_Lock(struct XenosDevice *xe, struct XenosIndexBuffer *ib, int offset, int size, int flags); void Xe_IB_Unlock(struct XenosDevice *xe, struct XenosIndexBuffer *ib); void Xe_SetVertexShaderConstantF(struct XenosDevice *xe, int start, const float *data, int count); /* count = number of 4 floats */ void Xe_SetPixelShaderConstantF(struct XenosDevice *xe, int start, const float *data, int count); /* count = number of 4 floats */ struct XenosSurface *Xe_CreateTexture(struct XenosDevice *xe, unsigned int width, unsigned int height, unsigned int levels, int format, int tiled); void *Xe_Surface_LockRect(struct XenosDevice *xe, struct XenosSurface *surface, int x, int y, int w, int h, int flags); void Xe_Surface_Unlock(struct XenosDevice *xe, struct XenosSurface *surface); #ifdef __cplusplus }; #endif #endif gpu-0.0.5/xexample.c0000644000175000017500000001417710772605126013630 0ustar tmbinctmbinc#include #include #include #include "xe.h" #include "engine.h" #include "xee.h" struct XenosDevice _xe, *xe; int main(void) { xe = &_xe; /* initialize the GPU */ Xe_Init(xe); /* create a render target (the framebuffer) */ struct XenosSurface *fb = Xe_GetFramebufferSurface(xe); Xe_SetRenderTarget(xe, fb); /* let's define a vertex buffer format */ static const struct XenosVBFFormat vbf = { 5, { {XE_USAGE_POSITION, 0, XE_TYPE_FLOAT3}, {XE_USAGE_NORMAL, 0, XE_TYPE_FLOAT3}, {XE_USAGE_TANGENT, 0, XE_TYPE_FLOAT3}, {XE_USAGE_COLOR, 0, XE_TYPE_UBYTE4}, {XE_USAGE_TEXCOORD, 0, XE_TYPE_FLOAT2}, } }; /* a cube */ float cube[] = { // POSITION | NORMAL | TANGENT | COL | U V | -0.5000 , -0.5000 , -0.5000 , +0.0000 , +0.0000 , -1.0000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000 , +1.0000, -0.5000 , +0.5000 , -0.5000 , +0.0000 , +0.0000 , -1.0000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000 , +0.0000, +0.5000 , +0.5000 , -0.5000 , +0.0000 , +0.0000 , -1.0000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +2.0000 , +0.0000, +0.5000 , -0.5000 , -0.5000 , +0.0000 , +0.0000 , -1.0000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +2.0000 , +1.0000, -0.5000 , -0.5000 , +0.5000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , -1.0000 , +0.0000 , +0.0000 , +1.0000, -0.5000 , +0.5000 , +0.5000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , -1.0000 , +0.0000 , +0.0000 , +0.0000, -0.5000 , +0.5000 , -0.5000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , -1.0000 , +0.0000 , +1.0000 , +0.0000, -0.5000 , -0.5000 , -0.5000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , -1.0000 , +0.0000 , +1.0000 , +1.0000, +0.5000 , -0.5000 , +0.5000 , +0.0000 , +0.0000 , +1.0000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +3.0000 , +1.0000, +0.5000 , +0.5000 , +0.5000 , +0.0000 , +0.0000 , +1.0000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +3.0000 , +0.0000, -0.5000 , +0.5000 , +0.5000 , +0.0000 , +0.0000 , +1.0000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +4.0000 , +0.0000, -0.5000 , -0.5000 , +0.5000 , +0.0000 , +0.0000 , +1.0000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +4.0000 , +1.0000, +0.5000 , -0.5000 , -0.5000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000 , +0.0000 , +2.0000 , +1.0000, +0.5000 , +0.5000 , -0.5000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000 , +0.0000 , +2.0000 , +0.0000, +0.5000 , +0.5000 , +0.5000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000 , +0.0000 , +3.0000 , +0.0000, +0.5000 , -0.5000 , +0.5000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000 , +0.0000 , +3.0000 , +1.0000, -0.5000 , +0.5000 , -0.5000 , +0.0000 , +1.0000 , +0.0000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000, -0.5000 , +0.5000 , +0.5000 , +0.0000 , +1.0000 , +0.0000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000, +0.5000 , +0.5000 , +0.5000 , +0.0000 , +1.0000 , +0.0000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000 , +0.0000, +0.5000 , +0.5000 , -0.5000 , +0.0000 , +1.0000 , +0.0000 , +1.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000 , +1.0000, +0.5000 , -0.5000 , -0.5000 , +0.0000 , -1.0000 , +0.0000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000, +0.5000 , -0.5000 , +0.5000 , +0.0000 , -1.0000 , +0.0000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000 , +0.0000, -0.5000 , -0.5000 , +0.5000 , +0.0000 , -1.0000 , +0.0000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000 , +0.0000, -0.5000 , -0.5000 , -0.5000 , +0.0000 , -1.0000 , +0.0000 , -1.0000 , +0.0000 , +0.0000 , +0.0000 , +1.0000 , +1.0000, }; unsigned short cube_indices[] = { 0, 1, 2, 0, 2, 3, 4, 5, 6, 4, 6, 7, 8, 9, 10, 8, 10, 11, 12, 13, 14, 12, 14, 15, 16, 17, 18, 16, 18, 19, 20, 21, 22, 20, 22, 23}; /* load pixel shader */ struct XenosShader *sh_ps, *sh_vs; sh_ps = Xe_LoadShader(xe, "ps.psu"); Xe_InstantiateShader(xe, sh_ps, 0); /* load vertex shader */ sh_vs = Xe_LoadShader(xe, "vs.vsu"); Xe_InstantiateShader(xe, sh_vs, 0); Xe_ShaderApplyVFetchPatches(xe, sh_vs, 0, &vbf); M_BuildPersp(&g_proj, 45.0 / 180.0 * M_PI, 640.0/480.0, 1, 200.0); /* create and fill vertex buffer */ struct XenosVertexBuffer *vb = Xe_CreateVertexBuffer(xe, sizeof(cube)); void *v = Xe_VB_Lock(xe, vb, 0, sizeof(cube), XE_LOCK_WRITE); memcpy(v, cube, sizeof(cube)); Xe_VB_Unlock(xe, vb); /* create and fill index buffer */ struct XenosIndexBuffer *ib = Xe_CreateIndexBuffer(xe, sizeof(cube_indices), XE_FMT_INDEX16); unsigned short *i = Xe_IB_Lock(xe, ib, 0, sizeof(cube_indices), XE_LOCK_WRITE); memcpy(i, cube_indices, sizeof(cube_indices)); Xe_IB_Unlock(xe, ib); /* stats */ time_t start = time(0); int f = 0; int framecount = 0; while (1) { f++; framecount++; /* begin a new frame, i.e. reset all renderstates to the default */ Xe_InvalidateState(xe); /* load some model-view matrix */ glLoadIdentity(); glPushMatrix(); glTranslate(0, 0, -3); glRotate(f / 100.0, .5, .1, 1); M_LoadMV(xe, 0); // load model view matrix to VS constant 0 M_LoadMW(xe, 4); // load (fake) model world matrix to VS constant 4 /* set the light direction for the pixel shader */ float lightDirection[] = {0, 0, -1, 0}; Xe_SetPixelShaderConstantF(xe, 0, lightDirection, 1); int max_vertices = sizeof(cube)/(sizeof(*cube)*12); int nr_primitives = sizeof(cube_indices)/sizeof(*cube_indices) / 3; /* draw cube */ Xe_SetShader(xe, SHADER_TYPE_PIXEL, sh_ps, 0); Xe_SetShader(xe, SHADER_TYPE_VERTEX, sh_vs, 0); Xe_SetStreamSource(xe, 0, vb, 0, 12); /* using this vertex buffer */ Xe_SetIndices(xe, ib); /* ... this index buffer... */ Xe_SetTexture(xe, 0, fb); /* ... and this texture */ Xe_DrawIndexedPrimitive(xe, XE_PRIMTYPE_TRIANGLELIST, 0, 0, max_vertices, 0, nr_primitives); /* clear to white */ Xe_SetClearColor(xe, ~0); /* resolve (and clear) */ Xe_Resolve(xe); /* wait for render finish */ Xe_Sync(xe); glPopMatrix(); /* some stats */ if (time(0) != start) { time(&start); printf("%d fps\n", framecount); framecount = 0; } } return 0; } gpu-0.0.5/xe.c0000644000175000017500000014504710772606663012431 0ustar tmbinctmbinc#include "xe.h" #include #include #include #include #include #include #include #if 0 /* if you want more texture ram, boot with mem=128M and use this */ #define RINGBUFFER_BASE 0x08000000 #define RINGBUFFER_SIZE 0x17C00000 #else #define RINGBUFFER_BASE 0x1e000000 #define RINGBUFFER_SIZE 0x01C00000 #endif #define RPTR_WRITEBACK 0x10000 #define SCRATCH_WRITEBACK 0x10100 #define RINGBUFFER_PRIMARY_SIZE (0x8000/4) #define RINGBUFFER_SECONDARY_SIZE (0x100000/4) static inline int FLOAT(float f) { union { float f; u32 d; } u = {f}; return u.d; } #define rput32(d) *(volatile u32*)(xe->rb_secondary + xe->rb_secondary_wptr++ * 4) = (d); #define rput(base, len) memcpy(((void*)xe->rb_secondary) + xe->rb_secondary_wptr * 4, base, len * 4); xe->rb_secondary_wptr += len; #define rput32p(d) do { *(volatile u32*)(xe->rb_primary + xe->rb_primary_wptr++ * 4) = d; if (xe->rb_primary_wptr == RINGBUFFER_PRIMARY_SIZE) xe->rb_primary_wptr = 0; } while (0) #define rputf(d) rput32(FLOAT(d)); #define r32(o) xe->regs[(o)/4] #define w32(o, v) xe->regs[(o)/4] = (v) #define RADEON_CP_PACKET0 0x00000000 #define RADEON_ONE_REG_WR (1 << 15) #define CP_PACKET0( reg, n ) \ (RADEON_CP_PACKET0 | ((n) << 16) | ((reg) >> 2)) #define CP_PACKET0_TABLE( reg, n ) \ (RADEON_CP_PACKET0 | RADEON_ONE_REG_WR | ((n) << 16) | ((reg) >> 2)) static inline void Xe_pWriteReg(struct XenosDevice *xe, u32 reg, u32 val) { rput32(CP_PACKET0(reg, 0)); rput32(val); } static inline u32 SurfaceInfo(int surface_pitch, int msaa_samples, int hi_zpitch) { return surface_pitch | (msaa_samples << 16) | (hi_zpitch << 18); } static inline u32 xy32(int x, int y) { return x | (y << 16); } #define CACHELINE_SIZE 128 #define __dcbst(where) asm volatile ("dcbst 0,%0" : : "r"(where) : "memory") #define __dcbf(where) asm volatile ("dcbf 0,%0" : : "r"(where) : "memory") #define __sync() asm volatile ("sync" : : : "memory") void Xe_pSyncToDevice(struct XenosDevice *xe, volatile void *data, int len) { while (len > 0) { __dcbst(data); data += CACHELINE_SIZE; len -= CACHELINE_SIZE; } } void Xe_pSyncFromDevice(struct XenosDevice *xe, volatile void *data, int len) { while (len > 0) { __dcbf(data); data += CACHELINE_SIZE; len -= CACHELINE_SIZE; } } void *Xe_pAlloc(struct XenosDevice *xe, u32 *phys, int size, int align) { void *r; if (!align) align = size; xe->alloc_ptr += (-xe->alloc_ptr) & (align-1); xe->alloc_ptr += align; r = ((void*)xe->rb) + xe->alloc_ptr; if (phys) *phys = RINGBUFFER_BASE + xe->alloc_ptr; xe->alloc_ptr += size; printf("Xe_pAlloc: at %d kb\n", xe->alloc_ptr / 1024); if (xe->alloc_ptr > (RINGBUFFER_SIZE)) Xe_Fatal(xe, "FATAL: out of memory. (alloc_ptr: %d kb, RINGBUFFER_SIZE: %d kb)\n", xe->alloc_ptr / 1024, RINGBUFFER_SIZE / 1024); return r; } void Xe_pInvalidateGpuCache_Primary(struct XenosDevice *xe, int base, int size) { size += 0x1FFF; size &= ~0x1FFF; rput32p(0x00000a31); rput32p(0x01000000); rput32p(0x00010a2f); rput32p(size); rput32p(base); rput32p(0xc0043c00); rput32p(0x00000003); rput32p(0x00000a31); rput32p(0x00000000); rput32p(0x80000000); rput32p(0x00000008); } void Xe_pRBCommitPrimary(struct XenosDevice *xe) { int i; for (i=0; i<0x20; ++i) rput32p(0x80000000); Xe_pSyncToDevice(xe, xe->rb_primary, RINGBUFFER_PRIMARY_SIZE * 4); __asm__ ("sync"); w32(0x0714, xe->rb_primary_wptr); // printf("committed to %08x\n", rb_primary_wptr); } void Xe_pRBKickSegment(struct XenosDevice *xe, int base, int len) { // printf("kick_segment: %x, len=%x\n", base, len * 4); Xe_pSyncToDevice(xe, xe->rb_secondary + base * 4, len * 4); Xe_pInvalidateGpuCache_Primary(xe, xe->rb_secondary_base + base * 4, len * 4 + 0x1000); rput32p(0xc0013f00); rput32p(xe->rb_secondary_base + base * 4); rput32p(len); } #define RINGBUFFER_SECONDARY_GUARD 0x20000 void Xe_pRBKick(struct XenosDevice *xe) { // printf("kick: wptr = %x, last_wptr = %x\n", rb_secondary_wptr, last_wptr); Xe_pRBKickSegment(xe, xe->last_wptr, xe->rb_secondary_wptr - xe->last_wptr); xe->rb_secondary_wptr += (-xe->rb_secondary_wptr)&0x1F; /* 128byte align */ if (xe->rb_secondary_wptr >= RINGBUFFER_SECONDARY_SIZE) Xe_Fatal(xe, "increase guardband"); if (xe->rb_secondary_wptr > (RINGBUFFER_SECONDARY_SIZE - RINGBUFFER_SECONDARY_GUARD)) xe->rb_secondary_wptr = 0; xe->last_wptr = xe->rb_secondary_wptr; Xe_pRBCommitPrimary(xe); } #define SEGMENT_SIZE 1024 void Xe_pRBMayKick(struct XenosDevice *xe) { // printf("may kick: wptr = %x, last_wptr = %x\n", rb_secondary_wptr, last_wptr); int distance = xe->rb_secondary_wptr - xe->last_wptr; if (distance < 0) distance += RINGBUFFER_SECONDARY_SIZE; if (distance >= SEGMENT_SIZE) Xe_pRBKick(xe); } u32 Xe_pRBAlloc(struct XenosDevice *xe) { u32 rb_primary_phys; xe->rb_primary = Xe_pAlloc(xe, &rb_primary_phys, RINGBUFFER_PRIMARY_SIZE * 4, 0); xe->rb_secondary = Xe_pAlloc(xe, &xe->rb_secondary_base, RINGBUFFER_SECONDARY_SIZE * 4, 0x100); return rb_primary_phys; } void Xe_pSetSurfaceClip(struct XenosDevice *xe, int offset_x, int offset_y, int sc_left, int sc_top, int sc_right, int sc_bottom) { rput32(0x00022080); rput32(xy32(offset_x, offset_y)); rput32(xy32(sc_left, sc_top)); rput32(xy32(sc_right, sc_bottom)); } void Xe_pSetBin(struct XenosDevice *xe, u32 mask_low, u32 select_low, u32 mask_hi, u32 select_hi) { rput32(0xc0006000); rput32(mask_low); rput32(0xc0006200); rput32(select_low); rput32(0xc0006100); rput32(mask_hi); rput32(0xc0006300); rput32(select_hi); } void Xe_pWaitUntilIdle(struct XenosDevice *xe, u32 what) { rput32(0x000005c8); rput32(what); } void Xe_pDrawNonIndexed(struct XenosDevice *xe, int num_points, int primtype) { rput32(0xc0012201); rput32(0x00000000); rput32(0x00000080 | (num_points << 16) | primtype); } void Xe_pDrawIndexedPrimitive(struct XenosDevice *xe, int primtype, int num_points, u32 indexbuffer, u32 indexbuffer_size, int indextype) { assert(num_points < 65536); int type = 0; rput32(0xc0032201); rput32(0x00000000); rput32(0x00000000 | (type << 6) | primtype | (num_points << 16) | (indextype << 11)); rput32(indexbuffer); rput32(indexbuffer_size | 0x40000000); } void Xe_pSetIndexOffset(struct XenosDevice *xe, int offset) { rput32(0x00002102); rput32(offset); } void Xe_pResetRingbuffer(struct XenosDevice *xe) { w32(0x0704, r32(0x0704) | 0x80000000); w32(0x017c, 0); w32(0x0714, 0); w32(0x0704, r32(0x0704) &~0x80000000); } void Xe_pSetupRingbuffer(struct XenosDevice *xe, u32 buffer_base, u32 size_in_l2qw) { Xe_pResetRingbuffer(xe); w32(0x0704, size_in_l2qw | 0x8020000); w32(0x0700, buffer_base); w32(0x0718, 0x10); } void Xe_pLoadUcodes(struct XenosDevice *xe, const u32 *ucode0, const u32 *ucode1) { int i; w32(0x117c, 0); usleep(100); for (i = 0; i < 0x120; ++i) w32(0x1180, ucode0[i]); w32(0x117c, 0); usleep(100); for (i = 0; i < 0x120; ++i) r32(0x1180); w32(0x07e0, 0); for (i = 0; i < 0x900; ++i) w32(0x07e8, ucode1[i]); w32(0x07e4, 0); for (i = 0; i < 0x900; ++i) if (r32(0x07e8) != ucode1[i]) break; if (i != 0x900) Xe_Fatal(xe, "ucode1 microcode verify error\n"); } void Xe_pWaitReady(struct XenosDevice *xe) { int timeout = 1<<24; while (r32(0x1740) & 0x80000000) { if (!timeout--) Xe_Fatal(xe, "timeout in init, likely the GPU was already hung before we started\n"); } } void Xe_pWaitReady2(struct XenosDevice *xe) { while (!(r32(0x1740) & 0x00040000)); } void Xe_pInit1(struct XenosDevice *xe) { w32(0x01a8, 0); w32(0x0e6c, 0xC0F0000); w32(0x3400, 0x40401); usleep(1000); w32(0x3400, 0x40400); w32(0x3300, 0x3A22); w32(0x340c, 0x1003F1F); w32(0x00f4, 0x1E); } void Xe_pReset(struct XenosDevice *xe) { Xe_pWaitReady2(xe); // Xe_pWaitReady(xe); #if 0 printf("waiting for reset.\n"); do { w32(0x00f0, 0x8064); r32(0x00f0); w32(0x00f0, 0); w32(0x00f0, 0x11800); r32(0x00f0); w32(0x00f0, 0); usleep(1000); } while (r32(0x1740) & 0x80000000); #endif Xe_pInit1(xe); } void Xe_pInit0(struct XenosDevice *xe, u32 buffer_base, u32 size_in_l2qw) { w32(0x07d8, 0x1000FFFF); usleep(2000); w32(0x00f0, 1); (void)r32(0x00f0); usleep(1000); w32(0x00f0, 0); usleep(1000); Xe_pSetupRingbuffer(xe, buffer_base, size_in_l2qw); Xe_pWaitReady(xe); if (!(r32(0x07d8) & 0x10000000)) Xe_Fatal(xe, "something wrong (1)\n"); w32(0x07d8, 0xFFFF); usleep(1000); w32(0x3214, 7); w32(0x3294, 1); w32(0x3408, 0x800); Xe_pWaitReady(xe); if (r32(0x0714)) Xe_Fatal(xe, "[WARN] something wrong (2)\n"); if (r32(0x0710)) Xe_Fatal(xe, "[WARN] something wrong (3)\n"); w32(0x07ec, 0x1A); } void Xe_pSetup(struct XenosDevice *xe, u32 buffer_base, u32 buffer_size, const u32 *ucode0, const u32 *ucode1) { Xe_pWaitReady(xe); w32(0x07d8, 0x1000FFFF); Xe_pSetupRingbuffer(xe, buffer_base, buffer_size); Xe_pLoadUcodes(xe, ucode0, ucode1); Xe_pWaitReady(xe); w32(0x07d8, 0xFFFF); w32(0x07d0, 0xFFFF); w32(0x07f0, 0); w32(0x0774, 0); w32(0x0770, 0); w32(0x3214, 7); w32(0x3294, 1); w32(0x3408, 0x800); Xe_pInit0(xe, buffer_base, buffer_size); Xe_pWaitReady(xe); } void Xe_pMasterInit(struct XenosDevice *xe, u32 buffer_base) { if ((r32(0x0e6c) & 0xF00) != 0xF00) printf("something wrong (3)\n"); FILE *f = fopen("ucode0.bin", "rb"); if (!f) Xe_Fatal(xe, "ucode0.bin %m"); fread(xe->ucode0, 0x120*4, 1, f); fclose(f); f = fopen("ucode1.bin", "rb"); if (!f) Xe_Fatal(xe, "ucode1.bin: %m"); fread(xe->ucode1, 0x900*4, 1, f); fclose(f); Xe_pSetup(xe, buffer_base, 0xC, xe->ucode0, xe->ucode1); w32(0x07d4, 0); w32(0x07d4, 1); w32(0x2054, 0x1E); w32(0x2154, 0x1E); w32(0x3c10, 0xD); w32(0x3c40, 0x17); w32(0x3c48, 0); while (r32(0x3c4c) & 0x80000000); w32(0x3c40, 0x1017); w32(0x3c48, 0); while (r32(0x3c4c) & 0x80000000); w32(0x87e4, 0x17); } void Xe_pEnableWriteback(struct XenosDevice *xe, u32 addr, int blocksize) { u32 v = r32(0x0704); v &= ~0x8003F00; w32(0x0704, v); w32(0x070c, addr | 2); w32(0x0704, v | (blocksize << 8)); } void Xe_pGInit0(struct XenosDevice *xe) { rput32(0xc0003b00); rput32(0x00000300); rput32(0xc0192b00); rput32(0x00000000); rput32(0x00000018); rput32(0x00001003); rput32(0x00001200); rput32(0xc4000000); rput32(0x00001004); rput32(0x00001200); rput32(0xc2000000); rput32(0x00001005); rput32(0x10061200); rput32(0x22000000); rput32(0xc8000000); rput32(0x00000000); rput32(0x02000000); rput32(0xc800c000); rput32(0x00000000); rput32(0xc2000000); rput32(0xc888c03e); rput32(0x00000000); rput32(0xc2010100); rput32(0xc8000000); rput32(0x00000000); rput32(0x02000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0xc00a2b00); rput32(0x00000001); rput32(0x00000009); rput32(0x00000000); rput32(0x1001c400); rput32(0x22000000); rput32(0xc80f8000); rput32(0x00000000); rput32(0xc2000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00012180); rput32(0x1000000e); rput32(0x00000000); rput32(0x00022100); rput32(0x0000ffff); rput32(0x00000000); rput32(0x00000000); rput32(0x00022204); rput32(0x00010000); rput32(0x00010000); rput32(0x00000300); rput32(0x00002312); rput32(0x0000ffff); rput32(0x0000200d); rput32(0x00000000); rput32(0x00002200); rput32(0x00000000); rput32(0x00002203); rput32(0x00000000); rput32(0x00002208); rput32(0x00000004); rput32(0x00002104); rput32(0x00000000); rput32(0x00002280); rput32(0x00080008); rput32(0x00002302); rput32(0x00000004); Xe_pSetSurfaceClip(xe, 0, 0, 0, 0, 16, 16); } void Xe_pGInit1(struct XenosDevice *xe, int arg) { rput32(0x000005c8); rput32(0x00020000); rput32(0x00078d00); rput32(arg | 1); rput32(arg | 1); rput32(arg | 1); rput32(arg | 1); rput32(arg | 1); rput32(arg | 1); rput32(arg | 1); rput32(arg | 1); rput32(0x00000d00); rput32(arg); } void Xe_pGInit2(struct XenosDevice *xe) { int i; for (i=0; i<24; ++i) { rput32(0xc0003600); rput32(0x00010081); } } void Xe_pGInit3(struct XenosDevice *xe) { rput32(0x000005c8); rput32(0x00020000); rput32(0x00000d04); rput32(0x00000000); } void Xe_pGInit4(struct XenosDevice *xe) { rput32(0x00000d02); rput32(0x00010800); rput32(0x00030a02); rput32(0xc0100000); rput32(0x07f00000); rput32(0xc0000000); rput32(0x00100000); Xe_pGInit3(xe); } void Xe_pGInit5(struct XenosDevice *xe) { rput32(0x00000d01); rput32(0x04000000); rput32(0xc0022100); rput32(0x00000081); rput32(0xffffffff); rput32(0x80010000); rput32(0xc0022100); rput32(0x00000082); rput32(0xffffffff); rput32(0x00000000); rput32(0x00000e42); rput32(0x00001f60); rput32(0x00000c85); rput32(0x00000003); rput32(0x0000057c); rput32(0x0badf00d); rput32(0x0000057b); rput32(0x00000000); } void Xe_pGInit6(struct XenosDevice *xe) { Xe_pSetSurfaceClip(xe, 0, 0, 0, 0, 1024, 720); rput32(0x0002857e); rput32(0x00010017); rput32(0x00000000); rput32(0x03ff02cf); rput32(0x0002857e); rput32(0x00010017); rput32(0x00000004); rput32(0x03ff02cf); } void Xe_pGInit7(struct XenosDevice *xe) { rput32(0x000005c8); rput32(0x00020000); rput32(0x00000f01); rput32(0x0000200e); } void Xe_pGInit8(struct XenosDevice *xe) { Xe_pSetSurfaceClip(xe, 0, 0, 0, 0, 1024, 720); } void Xe_pGInit9(struct XenosDevice *xe) { int i; rput32(0x0000057e); rput32(0x00010019); Xe_pGInit0(xe); for (i = 0x10; i <= 0x70; ++i) Xe_pGInit1(xe, 0x00000000 | (i << 12) | ((0x80 - i) << 4)); Xe_pGInit2(xe); rput32(0x0000057e); rput32(0x0001001a); Xe_pGInit8(xe); } void Xe_pGInit10(struct XenosDevice *xe) { Xe_pSetSurfaceClip(xe, 0, 0, 0, 0, 1024, 720); rput32(0x0000057e); rput32(0x00010019); rput32(0xc0003b00); rput32(0x00000300); Xe_pGInit7(xe); Xe_pGInit9(xe); } void Xe_pGInit(struct XenosDevice *xe) { rput32(0xc0114800); rput32(0x000003ff); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000080); rput32(0x00000100); rput32(0x00000180); rput32(0x00000200); rput32(0x00000280); rput32(0x00000300); rput32(0x00000380); rput32(0x00010800); rput32(0x00000007); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); Xe_pGInit4(xe); Xe_pGInit5(xe); Xe_pGInit6(xe); Xe_pGInit10(xe); } volatile void * ioremap(unsigned long physaddr, unsigned size, int sync); void Xe_DirtyAluConstant(struct XenosDevice *xe, int base, int len) { len += base & 15; base >>= 4; while (len > 0) { xe->alu_dirty |= 1 << base; ++base; len -= 16; } xe->dirty |= DIRTY_ALU; } void Xe_DirtyFetch(struct XenosDevice *xe, int base, int len) { len += base % 3; base /= 3; while (len > 0) { xe->fetch_dirty |= 1 << base; ++base; len -= 3; } xe->dirty |= DIRTY_FETCH; } struct XenosShader *Xe_LoadShader(struct XenosDevice *xe, const char *filename) { FILE *f = fopen(filename, "rb"); if (!f) Xe_Fatal(xe, "FATAL: shader %s not found!\n", filename); fseek(f, 0, SEEK_END); int size = ftell(f); fseek(f, 0, SEEK_SET); void *m = malloc(size); fread(m, size, 1, f); fclose(f); return Xe_LoadShaderFromMemory(xe, m); } struct XenosShader *Xe_LoadShaderFromMemory(struct XenosDevice *xe, void *m) { struct XenosShaderHeader *hdr = m; if ((hdr->magic >> 16) != 0x102a) Xe_Fatal(xe, "shader version: %08x, expected: something with 102a.\n", hdr->magic); struct XenosShader *s = malloc(sizeof(struct XenosShader)); memset(s, 0, sizeof(*s)); s->shader = m; struct XenosShaderData *data = m + hdr->off_shader; s->program_control = data->program_control; s->context_misc = data->context_misc; return s; } void Xe_pUploadShaderConstants(struct XenosDevice *xe, struct XenosShader *s) { struct XenosShaderHeader *hdr = s->shader; if (hdr->off_constants) { /* upload shader constants */ // printf("off_constants: %d\n", hdr->off_constants); void *constants = s->shader + hdr->off_constants; constants += 16; int size = *(u32*)constants; constants += 4; size -= 0xC; // printf("uploading shader constants..\n"); while (size) { u16 start = *(u16*)constants; constants += 2; u16 count = *(u16*)constants; constants += 2; u32 offset = *(u32*)constants; constants += 4; float *c = s->shader + hdr->offset + offset; // printf("start: %d, count: %d, off: %d\n", start, count, hdr->offset + offset); // int i; // for (i=0; ialu_constants + start * 4, c, count * 4); Xe_DirtyAluConstant(xe, start, 4); size -= 8; } } } int Xe_VBFCalcSize(struct XenosDevice *xe, const struct XenosVBFElement *fmt) { switch (fmt->fmt) { case 6: // char4 return 4; case 37: // float2 return 8; case 38: // float4 return 16; case 57: // float3 return 12; default: Xe_Fatal(xe, "Unknown VBF %d!\n", fmt->fmt); } } int Xe_pVBFNrComponents(struct XenosDevice *xe, const struct XenosVBFElement *fmt) { switch (fmt->fmt) { case 6: // char4 return 4; case 37: // float2 return 2; case 38: // float4 return 4; case 57: // float3 return 3; default: Xe_Fatal(xe, "Unknown VBF %d!\n", fmt->fmt); } } int Xe_VBFCalcStride(struct XenosDevice *xe, const struct XenosVBFFormat *fmt) { int i; int total_size = 0; for (i=0; inum; ++i) total_size += Xe_VBFCalcSize(xe, &fmt->e[i]); return total_size; } void Xe_pInvalidateGpuCache(struct XenosDevice *xe, int base, int size) { rput32(0x00000a31); rput32(0x01000000); rput32(0x00010a2f); rput32(size); rput32(base); rput32(0xc0043c00); rput32(0x00000003); rput32(0x00000a31); rput32(0x00000000); rput32(0x80000000); rput32(0x00000008); } void Xe_pInvalidateGpuCacheAll(struct XenosDevice *xe, int base, int size) { rput32(0x00000a31); rput32(0x03000100); rput32(0x00010a2f); rput32(size); rput32(base); rput32(0xc0043c00); rput32(0x00000003); rput32(0x00000a31); rput32(0x00000000); rput32(0x80000000); rput32(0x00000008); } void Xe_pUnlock(struct XenosDevice *xe, struct XenosLock *lock) { if (!lock->start) Xe_Fatal(xe, "unlock without lock"); if (lock->flags & XE_LOCK_WRITE) { Xe_pSyncToDevice(xe, lock->start, lock->size); Xe_pInvalidateGpuCache(xe, lock->phys, lock->size); } lock->start = 0; } void Xe_pLock(struct XenosDevice *xe, struct XenosLock *lock, void *addr, u32 phys, int size, int flags) { if (!flags) Xe_Fatal(xe, "flags=0"); if (lock->start) Xe_Fatal(xe, "locked twice"); if (lock->flags & XE_LOCK_READ) { /* *you* must make sure that the GPU already flushed this content. (usually, it is, though) */ Xe_pSyncFromDevice(xe, addr, size); } lock->start = addr; lock->phys = phys; lock->size = size; lock->flags = flags; } /* shaders are not specific to a vertex input format. the vertex format specified in a vertex shader is just dummy. Thus we need to patch the vfetch instructions to match our defined structure. */ void Xe_ShaderApplyVFetchPatches(struct XenosDevice *xe, struct XenosShader *sh, unsigned int index, const struct XenosVBFFormat *fmt) { assert(index < XE_SHADER_MAX_INSTANCES); assert(sh->shader_phys[index]); struct XenosLock lock; memset(&lock, 0, sizeof(lock)); Xe_pLock(xe, &lock, sh->shader_instance[index], sh->shader_phys[index], sh->shader_phys_size, XE_LOCK_READ|XE_LOCK_WRITE); int stride = Xe_VBFCalcStride(xe, fmt); if (stride & 3) Xe_Fatal(xe, "your vertex buffer format is not DWORD aligned.\n"); stride /= 4; struct XenosShaderHeader *hdr = sh->shader; struct XenosShaderData *data = sh->shader + hdr->off_shader; void *shader_code = sh->shader_instance[index]; u32 *c = (u32*)(data + 1); int skip = *c++; int num_vfetch = *c; ++c; c += skip * 2; int i; int fetched_to = 0; for (i=0; i> 12) & 0xF; int stream = (vfetch_patch >> 16) & 0xF; int insn = vfetch_patch & 0xFFF; // printf("raw: %08x\n", vfetch_patch); // printf("type=%d, stream=%d, insn=%d\n", type, stream, insn); u32 *vfetch = shader_code + insn * 12; // printf(" old vfetch: %08x %08x %08x\n", vfetch[0], vfetch[1], vfetch[2]); // printf(" old swizzle: %08x\n", vfetch[1] & 0xFFF); int Offset = (vfetch[2] & 0x7fffff00) >> 8; int DataFormat = (vfetch[1] & 0x003f0000) >> 16; int Stride= (vfetch[2] & 0x000000ff); int Signed= (vfetch[1] & 0x00001000) >> 12; int NumFormat = (vfetch[1] & 0x00002000) >> 13; int PrefetchCount= (vfetch[0] & 0x38000000) >> 27; // printf(" old Offset=%08x, DataFormat=%d, Stride=%d, Signed=%d, NumFormat=%d, PrefetchCount=%d\n", // Offset,DataFormat, Stride, Signed, NumFormat, PrefetchCount); /* let's find the element which applies for this. */ int j; int offset = 0; for (j=0; j < fmt->num; ++j) { if ((fmt->e[j].usage == type) && (fmt->e[j].index == stream)) break; offset += Xe_VBFCalcSize(xe, &fmt->e[j]); } offset /= 4; if (j == fmt->num) Xe_Fatal(xe, "shader requires input type %d_%d, which wasn't found in vertex format.\n", type, stream); Offset = offset; DataFormat = fmt->e[j].fmt; Signed = 0; Stride = stride; NumFormat = 0; // fraction if (DataFormat != 6) NumFormat = 1; int to_fetch = 0; /* if we need fetching... */ if (fetched_to <= offset + ((Xe_VBFCalcSize(xe, &fmt->e[j])+3)/4)) to_fetch = stride - fetched_to; if (to_fetch > 8) to_fetch = 8; to_fetch = 1; /* FIXME: prefetching doesn't always work. */ int is_mini = 0; if (to_fetch == 0) { PrefetchCount = 0; is_mini = 1; } else PrefetchCount = to_fetch - 1; fetched_to += to_fetch; /* patch vfetch instruction */ vfetch[0] &= ~(0x00000000|0x00000000|0x00000000|0x00000000|0x00000000|0x38000000|0x00000000); vfetch[1] &= ~(0x00000000|0x003f0000|0x00000000|0x00001000|0x00002000|0x00000000|0x40000000); vfetch[2] &= ~(0x7fffff00|0x00000000|0x000000ff|0x00000000|0x00000000|0x00000000|0x00000000); vfetch[2] |= Offset << 8; vfetch[1] |= DataFormat << 16; vfetch[2] |= Stride; vfetch[1] |= Signed << 12; vfetch[1] |= NumFormat << 13; vfetch[0] |= PrefetchCount << 27; vfetch[1] |= is_mini << 30; // printf("specified swizzle: %08x\n", fmt->e[j].swizzle); int comp; int nrcomp = Xe_pVBFNrComponents(xe, &fmt->e[j]); for (comp = 0; comp < 4; comp++) { int shift = comp * 3; int sw = (vfetch[1] >> shift) & 7; /* see original swizzle, xyzw01_? */ // printf("comp%d sw=%c ", comp, "xyzw01?_"[sw]); if ((sw < 4) && (sw >= nrcomp)) /* refer to an unavailable position? */ { if (sw == 3) // a/w sw = 5; // 1 else sw = 4; // 0 } // printf(" -> %c\n", "xyzw01?_"[sw]); vfetch[1] &= ~(7<> 8; DataFormat = (vfetch[1] & 0x003f0000) >> 16; Stride= (vfetch[2] & 0x000000ff); Signed= (vfetch[1] & 0x00001000) >> 12; NumFormat = (vfetch[1] & 0x00002000) >> 13; PrefetchCount= (vfetch[0] & 0x38000000) >> 27; // printf(" new Offset=%08x, DataFormat=%d, Stride=%d, Signed=%d, NumFormat=%d, PrefetchCount=%d\n", // Offset,DataFormat, Stride, Signed, NumFormat, PrefetchCount); // printf(" new vfetch: %08x %08x %08x\n", vfetch[0], vfetch[1], vfetch[2]); } Xe_pUnlock(xe, &lock); } void Xe_InstantiateShader(struct XenosDevice *xe, struct XenosShader *sh, unsigned int index) { assert(index < XE_SHADER_MAX_INSTANCES); struct XenosShaderHeader *hdr = sh->shader; struct XenosShaderData *data = sh->shader + hdr->off_shader; void *shader_code = sh->shader + data->sh_off + hdr->offset; sh->shader_phys_size = data->sh_size; printf("allocating %d bytes\n", data->sh_size); void *p = Xe_pAlloc(xe, &sh->shader_phys[index], data->sh_size, 0x100); memcpy(p, shader_code, data->sh_size); Xe_pSyncToDevice(xe, p, data->sh_size); sh->shader_instance[index] = p; } int Xe_GetShaderLength(struct XenosDevice *xe, void *sh) { struct XenosShaderHeader *hdr = sh; struct XenosShaderData *data = sh + hdr->off_shader; return data->sh_off + hdr->offset + data->sh_size; } void Xe_Init(struct XenosDevice *xe) { unsigned long kernel_mem[3] = {0,0,0}; int f = open("/proc/device-tree/memory/reg", O_RDONLY); if (f >= 0) { read(f, &kernel_mem, 12); close(f); } if (!kernel_mem[2]) Xe_Fatal(xe, "couldn't determine memory size\n"); printf("linux kernel uses %ld MB\n", kernel_mem[2] / 1024 / 1024); if (kernel_mem[2] > RINGBUFFER_BASE) Xe_Fatal(xe, "sorry, physical memory to use is blocked by kernel.\n"); xe->regs = ioremap(0xec800000ULL, 0x20000, 1); if (!xe->regs) Xe_Fatal(xe, "ioremap failed - %m"); xe->rb = xe->rb_primary = ioremap(RINGBUFFER_BASE, RINGBUFFER_SIZE, 0); // optimize framebuffer to the end, so we have a bit more space: w32(0x6110, 0x1fc00000); xe->tex_fb.ptr = r32(0x6110); xe->tex_fb.pitch = r32(0x6120) * 4; xe->tex_fb.width = r32(0x6134); xe->tex_fb.height = r32(0x6138); xe->tex_fb.bypp = 4; xe->tex_fb.base = (void*)ioremap(xe->tex_fb.ptr, xe->tex_fb.height * xe->tex_fb.pitch, 0); xe->tex_fb.format = XE_FMT_BGRA | XE_FMT_8888; xe->tex_fb.tiled = 1; #if 0 time_t t = time(0); while (t == time(0)); t = time(0) + 10; int nr = 0; while (t > time(0)) { memcpy(rb, rb + RINGBUFFER_SIZE / 2, RINGBUFFER_SIZE / 2); ++nr; } printf("%d kB/s (%d)\n", nr * (RINGBUFFER_SIZE/1024)/2 / 10, nr); return 0; #endif u32 rb_primary_phys = Xe_pRBAlloc(xe); // memset((void*)xe->rb, 0xCC, RINGBUFFER_SIZE); Xe_pMasterInit(xe, rb_primary_phys); Xe_pEnableWriteback(xe, RINGBUFFER_BASE + RPTR_WRITEBACK, 6); Xe_pSyncFromDevice(xe, xe->rb + RPTR_WRITEBACK, 4); Xe_pWriteReg(xe, 0x0774, RINGBUFFER_BASE + SCRATCH_WRITEBACK); Xe_pWriteReg(xe, 0x0770, 0x20033); Xe_pWriteReg(xe, 0x15e0, 0x1234567); Xe_pGInit(xe); Xe_pInvalidateGpuCache(xe, RINGBUFFER_BASE, RINGBUFFER_SIZE); } void Xe_SetRenderTarget(struct XenosDevice *xe, struct XenosSurface *rt) { xe->rt = rt; xe->vp_xres = rt->width; xe->vp_yres = rt->height; xe->msaa_samples = 0; xe->edram_colorformat = 0; int tile_size_x = (xe->msaa_samples < 2) ? 80 : 40, tile_size_y = (xe->msaa_samples > 0) ? 8 : 16; if ((xe->edram_colorformat == 15) || (xe->edram_colorformat == 7) || (xe->edram_colorformat == 5)) tile_size_x /= 2; int tiles_per_line = (xe->vp_xres + tile_size_x - 1) / tile_size_x; tiles_per_line += 1; tiles_per_line &= ~1; int tiles_height = (xe->vp_yres + tile_size_y - 1) / tile_size_y; // what about 64bit targets? xe->edram_pitch = tiles_per_line * tile_size_x; xe->edram_hizpitch = tiles_per_line * tile_size_x; xe->edram_color0base = 0; xe->edram_depthbase = tiles_per_line * tiles_height; } void Xe_pSetEDRAMLayout(struct XenosDevice *xe) { rput32(0x00022000); rput32(SurfaceInfo(xe->edram_pitch, xe->msaa_samples, xe->edram_hizpitch)); // SurfaceInfo rput32((xe->edram_colorformat << 16) | xe->edram_color0base); rput32(xe->edram_depthbase | (0<<16) ); // depth info, float Z } void Xe_ResolveInto(struct XenosDevice *xe, struct XenosSurface *surface, int source, int clear) { Xe_pSetSurfaceClip(xe, 0, 0, 0, 0, surface->width, surface->height); Xe_VBBegin(xe, 2); float vbdata[] = {-.5, -.5, /* never ever dare to mess with these values. NO, you can not resolve arbitrary areas or even shapes. */ surface->width - .5, 0, surface->width - .5, surface->height - .5 }; Xe_VBPut(xe, vbdata, sizeof(vbdata) / 4); struct XenosVertexBuffer *vb = Xe_VBEnd(xe); Xe_VBPoolAdd(xe, vb); Xe_pSetEDRAMLayout(xe); rput32(0x00002104); rput32(0x0000000f); // colormask rput32(0x0005210f); rput32(0x44000000); rput32(0x44000000); rput32(0xc3b40000); rput32(0x43b40000); rput32(0x3f800000); rput32(0x00000000); int msaavals[] = {0,4,6}; int pitch; switch (surface->format & XE_FMT_MASK) { case XE_FMT_8888: pitch = surface->pitch / 4; break; case XE_FMT_16161616: pitch = surface->pitch / 8; break; default: Xe_Fatal(xe, "unsupported resolve target format"); } rput32(0x00032318); rput32(0x00100000 | (msaavals[xe->msaa_samples]<<4) | (clear << 8) | source ); // 300 = color,depth clear enabled! rput32(surface->ptr); rput32(xy32(pitch, surface->height)); rput32(0x01000000 | ((surface->format&XE_FMT_MASK)<<7) | ((surface->format&~XE_FMT_MASK)>>6)); Xe_pWriteReg(xe, 0x8c74, 0xffffff00); // zbuffer / stencil clear: z to -1, stencil to 0 unsigned int clearv[2]; switch (xe->edram_colorformat) { case 0: case 1: clearv[0] = clearv[1] = xe->clearcolor; break; case 4: case 5: clearv[0] = (xe->clearcolor & 0xFF000000); clearv[0] |= (xe->clearcolor & 0x00FF0000)>>8; clearv[0] >>= 1; clearv[0] |= (clearv[0] >> 8) & 0x00FF00FF; clearv[1] = (xe->clearcolor & 0x0000FF00)<<16; clearv[1] |= (xe->clearcolor & 0x000000FF)<<8; clearv[1] >>= 1; clearv[1] |= (clearv[1] >> 8) & 0x00FF00FF; break; default: clearv[0] = clearv[1] = 0; } Xe_pWriteReg(xe, 0x8c78, clearv[0]); Xe_pWriteReg(xe, 0x8c7c, clearv[1]); rput32(0xc0003b00); rput32(0x00000100); rput32(0xc0102b00); rput32(0x00000000); rput32(0x0000000f); rput32(0x10011002); rput32(0x00001200); rput32(0xc4000000); rput32(0x00000000); rput32(0x1003c200); rput32(0x22000000); rput32(0x00080000); rput32(0x00253b48); rput32(0x00000002); rput32(0xc80f803e); rput32(0x00000000); rput32(0xc2000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00012180); rput32(0x00010002); rput32(0x00000000); if (surface->ptr) rput32(0x00002208); rput32(0x00000006); else rput32(0x00002208); rput32(0x00000005); rput32(0x00002200); rput32(0x8777); rput32(0x000005c8); rput32(0x00020000); rput32(0x00002203); rput32(0x00000000); rput32(0x00022100); rput32(0x0000ffff); rput32(0x00000000); rput32(0x00000000); rput32(0x00022204); rput32(0x00010000); rput32(0x00010000); rput32(0x00000300); rput32(0x00002312); rput32(0x0000ffff); rput32(0x0000200d); rput32(0x00000000); rput32(0x00054800); rput32((vb->phys_base) | 3); rput32(0x1000001a); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00025000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0xc0003600); rput32(0x00030088); rput32(0xc0004600); rput32(0x00000006); rput32(0x00002007); rput32(0x00000000); Xe_pInvalidateGpuCacheAll(xe, surface->ptr, surface->pitch * surface->height); rput32(0x0000057e); rput32(0x00010001); rput32(0x00002318); rput32(0x00000000); rput32(0x0000231b); rput32(0x00000000); #if 0 rput32(0x00001844); rput32(surface->ptr); rput32(0xc0022100); rput32(0x00001841); rput32(0xfffff8ff); rput32(0x00000000); rput32(0x00001930); rput32(0x00000000); rput32(0xc0003b00); rput32(0x00007fff); #endif #if 0 rput32(0xc0025800); rput32(0x00000003); // event zeugs rput32(0x1fc4e006); rput32(0xbfb75313); rput32(0xc0025800); rput32(0x00000003); rput32(0x1fc4e002); rput32(0x000286d1); #endif xe->dirty |= DIRTY_MISC; } void Xe_Clear(struct XenosDevice *xe, int flags) { struct XenosSurface surface = *xe->rt; surface.ptr = 0; Xe_ResolveInto(xe, &surface, 0, flags); } void Xe_Resolve(struct XenosDevice *xe) { struct XenosSurface *surface = xe->rt; Xe_ResolveInto(xe, surface, XE_SOURCE_COLOR, XE_CLEAR_COLOR|XE_CLEAR_DS); } void VERTEX_FETCH(u32 *dst, u32 base, int len) { dst[0] = base | 3; dst[1] = 0x10000002 | (len << 2); } void TEXTURE_FETCH(u32 *dst, u32 base, int width, int height, int pitch, int tiled, int format, u32 base_mip, int anisop) { switch (format & XE_FMT_MASK) { case XE_FMT_16161616: pitch /= 256; break; case XE_FMT_8888: pitch /= 128; break; default: abort(); } dst[0] = 0x00000002 | (pitch << 22) | (tiled << 31); dst[1] = 0x00000000 | base | format; /* BaseAddress */ dst[2] = (height << 13) | width; dst[3] = 0x00a80c14 | (anisop << 25); if (base_mip) dst[4] = 0x00000e03; else dst[4] = 0; dst[5] = 0x00000a00 | base_mip; /* MipAddress */ } void Xe_pLoadShader(struct XenosDevice *xe, int base, int type, int size) { rput32(0xc0012700); rput32(base | type); rput32(size); } void Xe_pAlign(struct XenosDevice *xe) { while ((xe->rb_secondary_wptr&3) != 3) rput32(0x80000000); } void Xe_pBlockUntilIdle(struct XenosDevice *xe) { Xe_pWriteReg(xe, 0x1720, 0x20000); } void Xe_pStep(struct XenosDevice *xe, int x) { Xe_pWriteReg(xe, 0x15e0, x); } void Xe_pStuff(struct XenosDevice *xe) { rput32(0x00072380); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); rput32(0x00000000); } void Xe_Fatal(struct XenosDevice *xe, const char *fmt, ...) { va_list arg; va_start(arg, fmt); vprintf(fmt, arg); va_end(arg); abort(); } struct XenosSurface *Xe_GetFramebufferSurface(struct XenosDevice *xe) { return &xe->tex_fb; } void Xe_Execute(struct XenosDevice *xe) { Xe_pBlockUntilIdle(xe); Xe_pRBKick(xe); } void Xe_pDebugSync(struct XenosDevice *xe) { Xe_pWriteReg(xe, 0x15e0, xe->frameidx); Xe_Execute(xe); // printf("waiting for frameidx %08x\n", xe->frameidx); int timeout = 1<<24; do { Xe_pSyncFromDevice(xe, xe->rb + SCRATCH_WRITEBACK, 4); if (!timeout--) Xe_Fatal(xe, "damn, the GPU seems to hang. There is no (known) way to recover, you have to reboot.\n"); // usleep(1000); } while (*(volatile u32*)(xe->rb + SCRATCH_WRITEBACK) != xe->frameidx) ; xe->frameidx++; /// printf("done\n"); } void Xe_Sync(struct XenosDevice *xe) { Xe_pDebugSync(xe); Xe_VBReclaim(xe); } int stat_alu_uploaded = 0; void Xe_pUploadALUConstants(struct XenosDevice *xe) { while (xe->alu_dirty) { int start, end; for (start = 0; start < 32; ++start) if (xe->alu_dirty & (1<alu_dirty & (1<alu_dirty &= ~(1<alu_constants + base * 4, num); } } void Xe_pUploadFetchConstants(struct XenosDevice *xe) { while (xe->fetch_dirty) { int start, end; for (start = 0; start < 32; ++start) if (xe->fetch_dirty & (1<fetch_dirty & (1<fetch_dirty &= ~(1<fetch_constants + base * 2, num); } } void Xe_pUploadClipPlane(struct XenosDevice *xe) { Xe_pAlign(xe); rput32(0x00172388); rput(xe->clipplane, 6*4); } void Xe_pUploadIntegerConstants(struct XenosDevice *xe) { Xe_pAlign(xe); rput32(0x00274900); rput(xe->integer_constants, 10*4); } void Xe_pUploadControl(struct XenosDevice *xe) { rput32(0x00082200); rput(xe->controlpacket, 9); } void Xe_pUploadShader(struct XenosDevice *xe) { u32 program_control = 0, context_misc = 0; if (xe->ps) { Xe_pLoadShader(xe, xe->ps->shader_phys[0], SHADER_TYPE_PIXEL, xe->ps->shader_phys_size); Xe_pUploadShaderConstants(xe, xe->ps); program_control |= xe->ps->program_control; context_misc |= xe->ps->context_misc; } if (xe->vs) { Xe_pLoadShader(xe, xe->vs->shader_phys[xe->vs_index], SHADER_TYPE_VERTEX, xe->vs->shader_phys_size); Xe_pUploadShaderConstants(xe, xe->vs); program_control |= xe->vs->program_control; context_misc |= xe->vs->context_misc; } rput32(0x00022180); rput32(program_control); rput32(context_misc); rput32(0xFFFFFFFF); /* interpolation mode */ } void Xe_pInitControl(struct XenosDevice *xe) { xe->controlpacket[0] = 0x00700736|0x80; // DEPTH xe->controlpacket[1] = 0x00010001; // BLEND xe->controlpacket[2] = 0x87000007; // COLOR xe->controlpacket[3] = 0x00000000; // HI xe->controlpacket[4] = 0x00080000; // CLIP xe->controlpacket[5] = 0x00010006; // MODE if (xe->msaa_samples) xe->controlpacket[5] |= 1<<15; xe->controlpacket[6] = 0x0000043f; // VTE xe->controlpacket[7] = 0; xe->controlpacket[8] = 0x00000004; // EDRAM xe->stencildata[0] = 0xFFFF00; xe->stencildata[1] = 0xFFFF00; xe->dirty |= DIRTY_CONTROL|DIRTY_MISC; } void Xe_SetZFunc(struct XenosDevice *xe, int z_func) { xe->controlpacket[0] = (xe->controlpacket[0]&~0x70) | (z_func<<4); xe->dirty |= DIRTY_CONTROL; } void Xe_SetZWrite(struct XenosDevice *xe, int zw) { xe->controlpacket[0] = (xe->controlpacket[0]&~4) | (zw<<2); xe->dirty |= DIRTY_CONTROL; } void Xe_SetZEnable(struct XenosDevice *xe, int ze) { xe->controlpacket[0] = (xe->controlpacket[0]&~2) | (ze<<1); xe->dirty |= DIRTY_CONTROL; } void Xe_SetFillMode(struct XenosDevice *xe, int front, int back) { xe->controlpacket[5] &= ~(0x3f<<5); xe->controlpacket[5] |= front << 5; xe->controlpacket[5] |= back << 8; xe->controlpacket[5] |= 1<<3; xe->dirty |= DIRTY_CONTROL; } void Xe_SetBlendControl(struct XenosDevice *xe, int col_src, int col_op, int col_dst, int alpha_src, int alpha_op, int alpha_dst) { xe->controlpacket[1] = col_src | (col_op << 5) | (col_dst << 8) | (alpha_src << 16) | (alpha_op << 21) | (alpha_dst << 24); xe->dirty |= DIRTY_CONTROL; } void Xe_SetSrcBlend(struct XenosDevice *xe, unsigned int blend) { assert(blend < 32); xe->controlpacket[1] &= ~0x1F; xe->controlpacket[1] |= blend; xe->dirty |= DIRTY_CONTROL; } void Xe_SetDestBlend(struct XenosDevice *xe, unsigned int blend) { assert(blend < 32); xe->controlpacket[1] &= ~(0x1F<<8); xe->controlpacket[1] |= blend<<8; xe->dirty |= DIRTY_CONTROL; } void Xe_SetBlendOp(struct XenosDevice *xe, unsigned int blendop) { assert(blendop < 8); xe->controlpacket[1] &= ~(0x7<<5); xe->controlpacket[1] |= blendop<<5; xe->dirty |= DIRTY_CONTROL; } void Xe_SetSrcBlendAlpha(struct XenosDevice *xe, unsigned int blend) { assert(blend < 32); xe->controlpacket[1] &= ~(0x1F<<16); xe->controlpacket[1] |= blend << 16; xe->dirty |= DIRTY_CONTROL; } void Xe_SetDestBlendAlpha(struct XenosDevice *xe, unsigned int blend) { assert(blend < 32); xe->controlpacket[1] &= ~(0x1F<<24); xe->controlpacket[1] |= blend<< 24; xe->dirty |= DIRTY_CONTROL; } void Xe_SetBlendOpAlpha(struct XenosDevice *xe, unsigned int blendop) { assert(blendop < 8); xe->controlpacket[1] &= ~(0x7<<21); xe->controlpacket[1] |= blendop<<21; xe->dirty |= DIRTY_CONTROL; } void Xe_SetCullMode(struct XenosDevice *xe, unsigned int cullmode) { assert(cullmode < 8); xe->controlpacket[5] &= ~7; xe->controlpacket[5] |= cullmode; xe->dirty |= DIRTY_CONTROL; } void Xe_SetAlphaTestEnable(struct XenosDevice *xe, int enable) { xe->controlpacket[2] &= ~8; xe->controlpacket[2] |= (!!enable) << 3; xe->dirty |= DIRTY_CONTROL; } void Xe_SetAlphaFunc(struct XenosDevice *xe, unsigned int func) { assert(func <= 7); xe->controlpacket[2] &= ~7; xe->controlpacket[2] |= func; xe->dirty |= DIRTY_CONTROL; } void Xe_SetAlphaRef(struct XenosDevice *xe, float alpharef) { xe->alpharef = alpharef; xe->dirty |= DIRTY_MISC; } void Xe_SetStencilFunc(struct XenosDevice *xe, int bfff, unsigned int func) { assert(func <= 7); if (bfff & 1) { xe->controlpacket[0] &= ~(7<<8); xe->controlpacket[0] |= func << 8; } if (bfff & 2) { xe->controlpacket[0] &= ~(7<<20); xe->controlpacket[0] |= func << 20; } xe->dirty |= DIRTY_CONTROL; } void Xe_SetStencilEnable(struct XenosDevice *xe, unsigned int enable) { assert(enable <= 1); xe->controlpacket[0] &= ~1; xe->controlpacket[0] |= enable; xe->dirty |= DIRTY_CONTROL; } void Xe_SetStencilOp(struct XenosDevice *xe, int bfff, int fail, int zfail, int pass) { assert(fail <= 7); assert(zfail <= 7); assert(pass <= 7); if (bfff & 1) { if (fail >= 0) { xe->controlpacket[0] &= ~(7<<11); xe->controlpacket[0] |= fail << 11; } if (pass >= 0) { xe->controlpacket[0] &= ~(7<<14); xe->controlpacket[0] |= pass << 14; } if (zfail >= 0) { xe->controlpacket[0] &= ~(7<<17); xe->controlpacket[0] |= zfail << 17; } } if (bfff & 2) { if (fail >= 0) { xe->controlpacket[0] &= ~(7<<23); xe->controlpacket[0] |= fail << 23; } if (pass >= 0) { xe->controlpacket[0] &= ~(7<<26); xe->controlpacket[0] |= pass << 26; } if (zfail >= 0) { xe->controlpacket[0] &= ~(7<<29); xe->controlpacket[0] |= zfail << 29; } } xe->dirty |= DIRTY_CONTROL; } void Xe_SetStencilRef(struct XenosDevice *xe, int bfff, int ref) { if (bfff & 1) xe->stencildata[1] = (xe->stencildata[1] & ~0xFF) | ref; if (bfff & 2) xe->stencildata[0] = (xe->stencildata[0] & ~0xFF) | ref; xe->dirty |= DIRTY_MISC; } void Xe_SetStencilMask(struct XenosDevice *xe, int bfff, int mask) { if (bfff & 1) xe->stencildata[1] = (xe->stencildata[1] & ~0xFF00) | (mask<<8); if (bfff & 2) xe->stencildata[0] = (xe->stencildata[0] & ~0xFF00) | (mask<<8); xe->dirty |= DIRTY_MISC; } void Xe_SetStencilWriteMask(struct XenosDevice *xe, int bfff, int writemask) { if (bfff & 1) xe->stencildata[1] = (xe->stencildata[1] & ~0xFF0000) | (writemask<<16); if (bfff & 2) xe->stencildata[0] = (xe->stencildata[0] & ~0xFF0000) | (writemask<<16); xe->dirty |= DIRTY_MISC; } void Xe_InvalidateState(struct XenosDevice *xe) { xe->dirty = ~0; xe->alu_dirty = ~0; xe->fetch_dirty = ~0; Xe_pInitControl(xe); } void Xe_SetShader(struct XenosDevice *xe, int type, struct XenosShader *sh, int index) { struct XenosShader **s; int *i = 0; if (type == SHADER_TYPE_PIXEL) { s = &xe->ps; } else { s = &xe->vs; i = &xe->vs_index; assert(sh->shader_instance[index]); } if ((*s != sh) || (i && *i != index)) { *s = sh; if (i) *i = index; xe->dirty |= DIRTY_SHADER; } } void Xe_pSetState(struct XenosDevice *xe) { if (xe->dirty & DIRTY_CONTROL) Xe_pUploadControl(xe); if (xe->dirty & DIRTY_SHADER) Xe_pUploadShader(xe); if (xe->dirty & DIRTY_ALU) Xe_pUploadALUConstants(xe); if (xe->dirty & DIRTY_FETCH) { Xe_pUploadFetchConstants(xe); rput32(0x00025000); rput32(0x00000000); rput32(0x00025000); rput32(0x00000000); } if (xe->dirty & DIRTY_CLIP) Xe_pUploadClipPlane(xe); if (xe->dirty & DRITY_INTEGER) Xe_pUploadIntegerConstants(xe); // if (xe->dirty & DIRTY_MISC) { Xe_pSetSurfaceClip(xe, 0, 0, 0, 0, xe->vp_xres, xe->vp_yres); Xe_pSetEDRAMLayout(xe); rput32(0x0000200d); rput32(0x00000000); rput32(0x00012100); rput32(0x00ffffff); rput32(0x00000000); rput32(0x00002104); rput32(0x0000000f); rput32(0x0008210c); rput32(xe->stencildata[0]); rput32(xe->stencildata[1]); rputf(xe->alpharef); /* this does not work. */ rputf(xe->vp_xres / 2.0); rputf(xe->vp_xres / 2.0); rputf(-xe->vp_yres / 2.0); rputf(xe->vp_yres / 2.0); rputf(1.0); rputf(0.0); int vals[] = {0, 2 | (4 << 13), 4 | (6 << 13)}; rput32(0x00002301); rput32(vals[xe->msaa_samples]); rput32(0x00002312); rput32(0x0000ffff); } xe->dirty = 0; } void Xe_SetTexture(struct XenosDevice *xe, int index, struct XenosSurface *tex) { TEXTURE_FETCH(xe->fetch_constants + index * 6, tex->ptr, tex->width - 1, tex->height - 1, tex->pitch, tex->tiled, tex->format, tex->ptr_mip, 2); Xe_DirtyFetch(xe, index + index * 3, 3); } void Xe_SetClearColor(struct XenosDevice *xe, u32 clearcolor) { xe->clearcolor = clearcolor; } struct XenosVertexBuffer *Xe_CreateVertexBuffer(struct XenosDevice *xe, int size) { struct XenosVertexBuffer *vb = malloc(sizeof(struct XenosVertexBuffer)); memset(vb, 0, sizeof(struct XenosVertexBuffer)); printf("--- alloc new vb, at %p\n", vb); vb->base = Xe_pAlloc(xe, &vb->phys_base, size, 0x1000); vb->size = 0; vb->space = size; vb->next = 0; vb->vertices = 0; return vb; } struct XenosVertexBuffer *Xe_VBPoolAlloc(struct XenosDevice *xe, int size) { struct XenosVertexBuffer **vbp = &xe->vb_pool; while (*vbp) { struct XenosVertexBuffer *vb = *vbp; if (vb->space >= size) { *vbp = vb->next; vb->next = 0; vb->size = 0; vb->vertices = 0; return vb; } vbp = &vb->next; } return Xe_CreateVertexBuffer(xe, size); } void Xe_VBPoolAdd(struct XenosDevice *xe, struct XenosVertexBuffer *vb) { struct XenosVertexBuffer **vbp = xe->vb_pool_after_frame ? &xe->vb_pool_after_frame->next : &xe->vb_pool_after_frame; while (*vbp) vbp = &(*vbp)->next; *vbp = vb; } void Xe_VBReclaim(struct XenosDevice *xe) { struct XenosVertexBuffer **vbp = xe->vb_pool ? &xe->vb_pool->next : &xe->vb_pool; while (*vbp) vbp = &(*vbp)->next; *vbp = xe->vb_pool_after_frame; xe->vb_pool_after_frame = 0; } void Xe_VBBegin(struct XenosDevice *xe, int pitch) { if (xe->vb_head || xe->vb_current) Xe_Fatal(xe, "FATAL: VertexBegin without VertexEnd!\n"); xe->vb_current_pitch = pitch; } void Xe_VBPut(struct XenosDevice *xe, void *data, int len) { if (len % xe->vb_current_pitch) Xe_Fatal(xe, "FATAL: VertexPut with non-even len\n"); while (len) { int remaining = xe->vb_current ? (xe->vb_current->space - xe->vb_current->size) : 0; remaining -= remaining % xe->vb_current_pitch; if (remaining > len) remaining = len; if (!remaining) { struct XenosVertexBuffer **n = xe->vb_head ? &xe->vb_current->next : &xe->vb_head; xe->vb_current = Xe_VBPoolAlloc(xe, 0x10000); *n = xe->vb_current; continue; } memcpy(xe->vb_current->base + xe->vb_current->size * 4, data, remaining * 4); xe->vb_current->size += remaining; xe->vb_current->vertices += remaining / xe->vb_current_pitch; data += remaining * 4; len -= remaining; } } struct XenosVertexBuffer *Xe_VBEnd(struct XenosDevice *xe) { struct XenosVertexBuffer *res; res = xe->vb_head; while (xe->vb_head) { Xe_pSyncToDevice(xe, xe->vb_head->base, xe->vb_head->space * 4); Xe_pInvalidateGpuCache(xe, xe->vb_head->phys_base, (xe->vb_head->space * 4) + 0x1000); xe->vb_head = xe->vb_head->next; } xe->vb_head = xe->vb_current = 0; return res; } void Xe_Draw(struct XenosDevice *xe, struct XenosVertexBuffer *vb, struct XenosIndexBuffer *ib) { Xe_pStuff(xe); if (vb->lock.start) Xe_Fatal(xe, "cannot draw locked VB"); if (ib && ib->lock.start) Xe_Fatal(xe, "cannot draw locked IB"); while (vb) { Xe_SetStreamSource(xe, 0, vb, 0, 0); Xe_pSetState(xe); rput32(0x00002007); rput32(0x00000000); Xe_pSetIndexOffset(xe, 0); if (!ib) { Xe_pDrawNonIndexed(xe, vb->vertices, XE_PRIMTYPE_TRIANGLELIST); } else Xe_pDrawIndexedPrimitive(xe, XE_PRIMTYPE_TRIANGLELIST, ib->indices, ib->phys_base, ib->indices, ib->fmt); xe->tris_drawn += vb->vertices / 3; vb = vb->next; } } int Xe_pCalcVtxCount(struct XenosDevice *xe, int primtype, int primcnt) { switch (primtype) { case XE_PRIMTYPE_POINTLIST: return primcnt; case XE_PRIMTYPE_LINELIST: return primcnt * 2; case XE_PRIMTYPE_LINESTRIP: return 1 + primcnt; case XE_PRIMTYPE_TRIANGLELIST: return primcnt * 3; case XE_PRIMTYPE_TRIANGLESTRIP: /* fall trough */ case XE_PRIMTYPE_TRIANGLEFAN: return 2 + primcnt; case XE_PRIMTYPE_RECTLIST: return primcnt * 3; default: Xe_Fatal(xe, "unknown primitive type"); } } void Xe_DrawIndexedPrimitive(struct XenosDevice *xe, int type, int base_index, int min_index, int num_vertices, int start_index, int primitive_count) { int cnt; assert(xe->ps); assert(xe->vs); Xe_pStuff(xe); /* fixme */ Xe_pSetState(xe); rput32(0x00002007); rput32(0x00000000); Xe_pSetIndexOffset(xe, base_index); cnt = Xe_pCalcVtxCount(xe, type, primitive_count); int bpi = 2 << xe->current_ib->fmt; Xe_pDrawIndexedPrimitive(xe, type, cnt, xe->current_ib->phys_base + bpi * start_index, cnt, xe->current_ib->fmt); } void Xe_DrawPrimitive(struct XenosDevice *xe, int type, int start, int primitive_count) { int cnt; assert(xe->ps); assert(xe->vs); Xe_pStuff(xe); /* fixme */ Xe_pSetState(xe); rput32(0x00002007); rput32(0x00000000); Xe_pSetIndexOffset(xe, start); /* ?? */ cnt = Xe_pCalcVtxCount(xe, type, primitive_count); Xe_pDrawNonIndexed(xe, 6, 4); // cnt, type); } void Xe_SetStreamSource(struct XenosDevice *xe, int index, struct XenosVertexBuffer *vb, int offset, int stride) { if (vb->lock.start) Xe_Fatal(xe, "cannot use locked VB"); xe->current_vb = vb; VERTEX_FETCH(xe->fetch_constants + (95 + index) * 2, vb->phys_base + offset, vb->space - offset); Xe_DirtyFetch(xe, 95 + index, 1); } void Xe_SetIndices(struct XenosDevice *xe, struct XenosIndexBuffer *ib) { xe->current_ib = ib; } struct XenosIndexBuffer *Xe_CreateIndexBuffer(struct XenosDevice *xe, int length, int format) { struct XenosIndexBuffer *ib = malloc(sizeof(struct XenosIndexBuffer)); memset(ib, 0, sizeof(struct XenosIndexBuffer)); ib->base = Xe_pAlloc(xe, &ib->phys_base, length, 32); ib->size = length; ib->indices = 0; ib->fmt = format; return ib; } void *Xe_VB_Lock(struct XenosDevice *xe, struct XenosVertexBuffer *vb, int offset, int size, int flags) { Xe_pLock(xe, &vb->lock, vb->base + offset, vb->phys_base + offset, size, flags); return vb->base + offset; } void Xe_VB_Unlock(struct XenosDevice *xe, struct XenosVertexBuffer *vb) { Xe_pUnlock(xe, &vb->lock); } void *Xe_IB_Lock(struct XenosDevice *xe, struct XenosIndexBuffer *ib, int offset, int size, int flags) { Xe_pLock(xe, &ib->lock, ib->base + offset, ib->phys_base + offset, size, flags); return ib->base + offset; } void Xe_IB_Unlock(struct XenosDevice *xe, struct XenosIndexBuffer *ib) { Xe_pUnlock(xe, &ib->lock); } void Xe_SetVertexShaderConstantF(struct XenosDevice *xe, int start, const float *data, int count) { // printf("SetVertexShaderConstantF\n"); memcpy(xe->alu_constants + start * 4, data, count * 16); Xe_DirtyAluConstant(xe, start, count); // while (count--) // { // printf("%.3f %.3f %.3f %.3f\n", data[0], data[1], data[2], data[3]); // data += 4; // } } void Xe_SetPixelShaderConstantF(struct XenosDevice *xe, int start, const float *data, int count) { start += 256; // printf("SetPixelShaderConstantF (%d+)\n", start); memcpy(xe->alu_constants + start * 4, data, count * 16); Xe_DirtyAluConstant(xe, start, count); // while (count--) // { // printf("%.3f %.3f %.3f %.3f\n", data[0], data[1], data[2], data[3]); // data += 4; // } } struct XenosSurface *Xe_CreateTexture(struct XenosDevice *xe, unsigned int width, unsigned int height, unsigned int levels, int format, int tiled) { struct XenosSurface *surface = malloc(sizeof(struct XenosSurface)); memset(surface, 0, sizeof(struct XenosSurface)); int bypp = 0; switch (format & XE_FMT_MASK) { case XE_FMT_8888: bypp = 4; break; case XE_FMT_16161616: bypp = 8; break; } assert(bypp); int pitch = (width * bypp + 127) &~127; surface->width = width; surface->height = height; surface->pitch = pitch; surface->tiled = tiled; surface->format = format; surface->ptr_mip = 0; surface->bypp = bypp; surface->base = Xe_pAlloc(xe, &surface->ptr, height * pitch, 1024 * bypp); // 4k seems right return surface; } void *Xe_Surface_LockRect(struct XenosDevice *xe, struct XenosSurface *surface, int x, int y, int w, int h, int flags) { #if 0 if (surface == xe->rt) /* current render target? sync. */ { Xe_Resolve(xe); Xe_Sync(xe); } #endif if (!w) w = surface->width; if (!h) h = surface->height; int offset = y * surface->pitch + x * surface->bypp; int size = h * surface->pitch; Xe_pLock(xe, &surface->lock, surface->base + offset, surface->ptr + offset, size, flags); return surface->base + offset; } void Xe_Surface_Unlock(struct XenosDevice *xe, struct XenosSurface *surface) { Xe_pUnlock(xe, &surface->lock); } gpu-0.0.5/ps.hlsl0000644000175000017500000000064610772605224013142 0ustar tmbinctmbincfloat4 lightDirection: register(c0); /* define the ps input. this must match the vertex shader output, except for oPosition (and other fixed-function things like fog) */ struct Input { float3 oNormal: NORMAL; float4 oUV: TEXCOORD0; }; sampler s; float4 main(Input input): COLOR { float4 tex = tex2D(s, input.oUV); float4 res; res.rgb = dot(input.oNormal, lightDirection) * tex; res.a = tex.a; return res; } gpu-0.0.5/xextex.o0000644000175000017500000003705010771344102013332 0ustar tmbinctmbincELF#4(|!p= 9)}#Kxۡxa HHx|exHxH= 8x;xH8H8cxH<8H8|vx~ijxH<8H8|wx~xH88~xH= =`t<`)= K8c H8H88|xx8xH8xHxH8H8H8H8|yx8$xH8HdxH$xH88889H88|zx899 DxH/@l8/@P}Cy9`| T<0T D T>}*x@U`<0C9kT >TD |Kx8cB88ADx;;`:H8`H= =`Ta:= =`)= Ëi = $= :i= é(```;?C0Ȑ;{HHH @`HHoА@`ؐ``!!(!$ H8H8H~L^P8~xD>H8aA !H8~ųx8H8~x8H88 x8H$xHEx8H8H8888H88H88H88H88H99 8888HH @`H|Г@`ؐАl`!!(!$ H8H8H8H8888H88H88H88H88H88899 8H8HHHH8`HAT~xHdx~cx;`L1HK49 })Kx% : ; I$ > $ > $ >   I : ;  : ;I8 : ; : ; I8 I !I/ : ; I : ; I8  : ; !I/55I.? : ; ' I@4: ; I4: ; I 4: ; I 4: ; I4: ; I  U4: ; I&I4: ; I? < 4: ; I?  R (0W2~@int8aQok-t tZ# # D#  # # # # e# # #$ #(  #, "#0 $#4 0&Z#8 *Z#< ,z#@ 0>#D R1L#F 92#G  6#H ?#P :H#X AI#\ HJ#` OK#d VL%#h NZ#l tP#p F  # # Z#{  t  'Mu32>0/ B1 C# DD# EZ#K L# M# yO# qO#H O#L O#P P#T        >Z# Z#fmtZ#|.numZ#e.# > 0 Z# Z# Z# _Z# Z#ptr# # Z# 9# #$b J# #>$u # Z# Z# Z# 9# # u#   # Z# Z# 9# fmtZ# #Ml # a#@ #F I#F #F #F #H 8#H 0#Hvs#Hps#H }Z#H ,Z#H #Hrb #H  #H l #H Z#H UZ#H rZ#H  #H ] #H d (#Q '>#rt 9# $Z# Z# >Z# ZZ# dZ# .# Z# u# u# Z# u#Ě *u#Ț Z#̚ m ?#К Cu#Ԛ Z#ؚ Z#ܚ NZ# Z# .Z#      '  1  0  (  9{  W  m   Z fb rttg$ u? uBf*BgvbNuhvOibT ?iiU texYjyZZ3xZZQt[ ogfhZiZ`d x_> } u Z{Zvbf    > #>  Q  L_xe xe Oi /usr/lib/gcc/powerpc-linux-gnu/4.2.1/include/usr/include/bits/usr/includexextex.cstddef.htypes.hlibio.hstdio.htime.hxe.hengine.hxee.h /0!=x 2.!M ?.A [#0"/)v,"gI"!"?"!D19#."o.."tbJS v<##!!K!=?/-/-K&YYg=L=gKKKK!K ===gKKKK?30#Z/ ?????????@??@????????????@@?????@@???@??@????@?????@?????@@????@@?????????????????????????????   999 %?I??CHYB?=Ϳps.psuvs.vsu%d fps |A HH P A@ At! Qq U8WW<[[<SStPtlkdtP& V mmain ,_xe >xe__off_t_IO_read_ptr_chainXenosShadersize_tXenosLock_shortbufcurrent_vbedram_color0basetiled_IO_buf_baserb_secondary_boundarylong long unsigned intphys_baseXenosIndexBuffervb_poolshaderrb_primary_wptrnextptr_mipalu_constantslong long intsigned charverticesalpharefeMatrix44sh_vs_filenostencildata_IO_read_endlong intvp_yresXenosRenderTarget_flagsnr_primitives_IO_buf_end_cur_column__quad_tdouble_old_offset_offsetmainxextex.cedram_depthbaseinteger_constantsXenosDeviceXenosVBFFormatedram_hizpitchrb_primaryvb_pool_after_framevp_xres_IO_markerstdinunsigned intframeidxcurrent_ib__bsxlong unsigned intrb_secondary_base_IO_write_ptrsh_ps_sbufwidthsizeshort unsigned intspacecontrolpacket_IO_save_basecontext_misc_lock_flags2_modestdoutalloc_ptrclearcolorbaseusagephysfetch_dirtyrb_secondary_wptrcuberb_secondaryshader_physshader_instance_IO_write_endpitch_IO_lock_t_IO_FILEGNU C 4.2.1 (Debian 4.2.1-5)indicestime_tfloat_posXenosVBFElement_markersmax_verticesedram_colorformatdirtyunsigned charshort intsurface_vtable_offsetfetch_constantsshader_phys_sizeformatprogram_controllightDirectionregscube_indicescharbyppindexXenosSurfacelast_wptrvb_current_pitch_nextalu_dirty__off64_t_IO_read_base_IO_save_endtex_fbedram_pitch__pad1__pad2__pad3__pad4__pad5ucode0ucode1__time_t_unused2vs_indexvb_current_IO_backup_baseg_projXenosVertexBufferframecountmsaa_samplesvb_headtris_drawnstartheightresolve_vb_physlock_IO_write_baseclipplane/home/tmbinc/wz3/gpuGCC: (GNU) 4.2.1 (Debian 4.2.1-5).symtab.strtab.shstrtab.rela.text.data.bss.debug_abbrev.rela.debug_info.rela.debug_line.rodata.rodata.cst4.rodata.str1.4.rela.debug_frame.debug_loc.rela.debug_pubnames.rela.debug_aranges.debug_ranges.debug_str.comment.note.GNU-stack @. &,,,1,D V?5X U mP=  axTi,v2\=  l_*>   > 05!#""'Pp +  H | 'D    16Ml:=E^t$/<Masx %9I[l~xextex.cC.25.3934C.26.3935vbf.3898C.27.3980main_xexeXe_InitXe_GetFramebufferSurfaceXe_CreateRenderTargetXe_SetRenderTargetmemcpyXe_LoadShaderXe_InstantiateShaderXe_ShaderApplyVFetchPatchesg_projM_BuildPerspXe_CreateVertexBufferXe_VB_LockXe_VB_UnlockXe_CreateIndexBufferXe_IB_LockXe_IB_UnlockXe_CreateTextureXe_Surface_LockRectXe_Surface_UnlocktimeXe_InvalidateStateglLoadIdentityglPushMatrixglTranslateglRotateM_LoadMVM_LoadMWXe_SetPixelShaderConstantFXe_SetShaderXe_SetStreamSourceXe_SetIndicesXe_SetTextureXe_SetStencilEnableXe_SetStencilOpXe_SetStencilFuncXe_SetStencilRefXe_SetStencilMaskXe_SetStencilWriteMaskXe_DrawIndexedPrimitiveglPopMatrixXe_SetZFuncXe_SetClearColorXe_ResolveXe_Syncprintf *v             &4! : > B F"J N  R V"Z  \# bh$ z%  & ' (  ) *  + , -               $ $ ( ((. ,/ 00 @1 D0 x2 ~3 4 5 6 6 7  8  9 &,: 2D; JT< Zd= jt> z? @ A 1 2 3 B  : (; .8= >H> NX? ^h< n@ C D E A - - F   !&(3W:2AH~OV@dkp{QktD  *e9HWfu0R9  :A)H8OGVVetu|F/D#2>LZyhqv>?KYgu_9bJ!/=K9Yg|9aI%4CR8a},lUr!]0d?'^$n~>Zd.*0mACRctN. H M o z ~   g   * 3 +Q 8o = G T Y c h` ld ux     {   Q   : Kgpu-0.0.5/ioremap.c0000644000175000017500000000247110760704133013426 0ustar tmbinctmbinc#include #include #include #include #include volatile void * ioremap(unsigned long physaddr, unsigned size, int sync) { int axs_mem_fd = -1; unsigned long page_addr, ofs_addr, reg, pgmask; void* reg_mem = NULL; /* * looks like mmap wants aligned addresses? */ pgmask = getpagesize()-1; page_addr = physaddr & ~pgmask; ofs_addr = physaddr & pgmask; /* * Don't forget O_SYNC, esp. if address is in RAM region. * Note: if you do know you'll access in Read Only mode, * pass O_RDONLY to open, and PROT_READ only to mmap */ if (axs_mem_fd == -1) { axs_mem_fd = open("/dev/mem", O_RDWR|(sync ? O_SYNC : 0)); if (axs_mem_fd < 0) { perror("AXS: can't open /dev/mem"); return NULL; } } /* memory map */ reg_mem = mmap( (caddr_t)reg_mem, size+ofs_addr, PROT_READ|PROT_WRITE, MAP_SHARED, axs_mem_fd, page_addr ); if (reg_mem == MAP_FAILED) { perror("AXS: mmap error"); close(axs_mem_fd); return NULL; } reg = (unsigned long )reg_mem + ofs_addr; return (volatile void *)reg; } int iounmap(volatile void *start, size_t length) { unsigned long ofs_addr; ofs_addr = (unsigned long)start & (getpagesize()-1); /* do some cleanup when you're done with it */ return munmap((unsigned char*)start-ofs_addr, length+ofs_addr); }