{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "82fd788f-d9ff-4343-9ea9-b069e12644ff", "metadata": {}, "outputs": [], "source": [ "import logging\n", "import struct\n", "logger = logging.getLogger('Gecko')" ] }, { "cell_type": "markdown", "id": "800c75a3-b632-46ea-ab1b-c72a6a1e709e", "metadata": {}, "source": [ "## Preparation\n", "- Obtain [MAP files](https://github.com/BitPatty/Super-Mario-Sunshine-C-Kit/tree/master/maps) of each version" ] }, { "cell_type": "markdown", "id": "dcf73577-87ae-4fe4-8c06-bfa3af0d1176", "metadata": {}, "source": [ "## Address converter" ] }, { "cell_type": "code", "execution_count": 2, "id": "cbf57f8a-c117-4123-8b0f-72801927fc5f", "metadata": {}, "outputs": [], "source": [ "class Symbol:\n", " def __init__(self, name, addr, size):\n", " self.name = name\n", " self.addr = addr\n", " self.size = size\n", " def __repr__(self):\n", " return '[%08x] %s (%x)'%(self.addr, self.name, self.size)\n", " \n", "def getSymbols(path):\n", " '''\n", " @param `path`: path to the map file\n", " @returns: (name, section, off, size, addr)[]\n", " '''\n", " syms = []\n", " section = None\n", " with open(path) as f:\n", " for line in f:\n", " if section is None:\n", " if line.strip().endswith('section layout'):\n", " section = line.split(maxsplit=1)[0]\n", " else:\n", " if line.strip() == '':\n", " section = None\n", " else:\n", " cols = line.split()\n", " if len(cols) < 5: continue\n", " off, size, addr, mode, name = cols[:5]\n", " if off != 'UNUSED' and mode == '4':\n", " syms.append((name, section, *(int(x, 16) for x in (off, size, addr))))\n", " return syms\n", "\n", "def bsearch(arr, q, fv=lambda x: x):\n", " '''\n", " @param `arr`: array to perform binary search\n", " @param `q`: query value\n", " @param `fv`: function that maps element to value for comparison\n", " @returns: (index, element). element is None if query is out of range\n", " '''\n", " l, r = 0, len(arr)\n", " if q < fv(arr[0]): return -1, None\n", " if q > fv(arr[-1]): return r, None\n", " while r-l > 1:\n", " m = (l+r)>>1\n", " v = fv(arr[m])\n", " if q < v: r = m\n", " elif q > v: l = m\n", " else: return m, arr[m]\n", " return l, arr[l]" ] }, { "cell_type": "code", "execution_count": 3, "id": "9adc600f-2d7f-4b19-a3dc-8e6e11f01f32", "metadata": {}, "outputs": [], "source": [ "# parse map files to symbol lists\n", "symLsts = {\n", " ver: sorted((\n", " Symbol(name, addr, size)\n", " # *FIXME* MAP file path\n", " for name, section, off, size, addr in getSymbols(f'maps/mario{ver}.MAP')\n", " ), key=lambda sym: sym.addr)\n", " for ver in ('JP', 'JPA', 'US', 'EU')\n", "}\n", "\n", "# make symbol maps\n", "symMaps = {\n", " ver: {\n", " sym.name: sym\n", " for sym in syms\n", " }\n", " for ver, syms in symLsts.items()\n", "}\n", "\n", "# utility functions\n", "searchSymbol = lambda addr, lang: bsearch(symLsts[lang], addr, lambda sym: sym.addr)[1]\n", "def convertAddr(addr, src, dst, verbose=False):\n", " sym0 = searchSymbol(addr, src)\n", " sym = symMaps[dst][sym0.name]\n", " addrN = addr-sym0.addr+sym.addr\n", " if verbose:\n", " print('%08x: %d + %s'%(addrN, addr-sym0.addr, sym))\n", " else:\n", " return addrN, sym" ] }, { "cell_type": "markdown", "id": "2d9d533c-4e1f-46d9-bf0d-a1476c1358cd", "metadata": {}, "source": [ "### Example usage" ] }, { "cell_type": "code", "execution_count": 4, "id": "71253d77-0c94-4097-8073-084a645e3da2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[8002f104] create__18J3DMaterialFactoryCFP11J3DMaterialiUl (924)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "searchSymbol(0x8002F270, 'JP')" ] }, { "cell_type": "code", "execution_count": 5, "id": "239736a9-ea71-48d2-8e16-6012bd2abe1d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "804158dc: 0 + [804158dc] @2861 (6)\n" ] } ], "source": [ "convertAddr(0x8040E178, 'JP', 'US', verbose=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "5d2143f5-ae7d-4fb9-a762-37f5c439acbc", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "e3d5ab74-2071-446a-9598-02138e382a04", "metadata": {}, "source": [ "## Gecko code converter" ] }, { "cell_type": "code", "execution_count": 6, "id": "6d21cda5-1b31-4368-9c81-6851d15bf821", "metadata": {}, "outputs": [], "source": [ "def splitGecko(raw):\n", " '''\n", " @param `raw`: raw gecko code text (hex string) or bytes (e.g. gct file)\n", " @returns: converted gecko code text (hex string)\n", " '''\n", " if type(raw) == str: raw = bytes.fromhex(raw)\n", " itr = struct.iter_unpack('>2I', raw)\n", " ans = []\n", " for a, b in itr:\n", " ctype = a>>24\n", " itype = ctype & 0xee\n", " if ctype == 0xC0 or itype == 0xC2: n = b\n", " elif ctype in [0xF2, 0xF4]: n = b&0xff\n", " elif ctype == 0xF6: n = a&0xff\n", " elif itype == 0x06: n = (b+7)>>3\n", " elif itype == 0x08: n = 1\n", " else: n = 0\n", " ans.append(((a, b), *(next(itr) for _ in range(n))))\n", " return ans\n", "\n", "ORI = 24\n", "LWZ = 32\n", "LHZ = 40\n", "LBZ = 34\n", "def handle_inst(code, srcSymLst, dstSymMap, knownAddrs={}):\n", " '''\n", " @param `code`: code to be converted with type `(int, int)[]`\n", " @param `srcSymLst`: symbol list of source version\n", " @param `dstSymMap`: symbol map of source version\n", " @param `knownAddrs`: known addresses that do not need convert\n", " @returns: converted code with type `(int, int)[]`\n", " '''\n", " insts = [x for p in code for x in p]\n", " # lis + lwz/lhz/lbz/ori\n", " for i in range(1, len(insts)):\n", " if insts[i-1]>>16 &0xfc1f == 15<<10:\n", " '''\n", " lis rA, xxxx\n", " lwz/lhz/lbz/ori rT, yyyy(rA)\n", " '''\n", " rA = insts[i-1]>>21 &31\n", " inst = insts[i]>>26\n", " ha = insts[i-1]&0xffff\n", " lo = insts[i]&0xffff\n", " if inst in [LWZ, LHZ, LBZ] and rA == insts[i]>>21 &31:\n", " addr = (ha if lo<0x8000 else ha-1)<<16 | lo\n", " sgn = True\n", " s = '[load]'\n", " elif inst in [ORI] and rA == insts[i]>>16 &31:\n", " addr = ha<<16 | lo\n", " sgn = False\n", " s = '[*ori]'\n", " else: # TODO\n", " assert rA not in [insts[i]>>21 &31, insts[i]>>16 &31]\n", " if not 0x80000000 <= addr < 0x81800000: continue\n", " sym0 = bsearch(srcSymLst, addr, lambda sym: sym.addr)[1]\n", " if sym0 is None:\n", " if addr not in knownAddrs:\n", " logger.warning('Unknown potential addr: %08x'%addr)\n", " else:\n", " sym = dstSymMap[sym0.name]\n", " addrN = addr + (sym.addr-sym0.addr)\n", " loN = addrN&0xffff\n", " haN = (addrN>>16) + (1 if sgn and loN>=0x8000 else 0)\n", " insts[i-1] = insts[i-1]&0xffff0000 | haN\n", " insts[i] = insts[i]&0xffff0000 | loN\n", " logger.info('%08x -> %08x %s'%(addr, addrN, s))\n", " # lwz/lhz/lbz r13/r2\n", " # TODO\n", " # DONE\n", " return tuple(\n", " tuple(insts[i:i+2])\n", " for i in range(0, len(insts), 2)\n", " )\n", "\n", "def convertGecko(raw, srcSymLst, dstSymMap, knownAddrs={}):\n", " '''\n", " @param `raw`: raw code (hex string or gct file) to be converted\n", " @param `srcSymLst`: symbol list of source version\n", " @param `dstSymMap`: symbol map of source version\n", " @param `knownAddrs`: known addresses that do not need convert\n", " @returns: converted code with type `(int, int)[]`\n", " '''\n", " gecko = splitGecko(raw)\n", " ans = []\n", " for code in gecko:\n", " inst = code[0][0]\n", " # handle instruction that do not involves RAM\n", " ctype = inst>>24\n", " if ctype in [0xC0, 0xE0]: # TODO\n", " ans.append(code)\n", " continue\n", " # handle instruction that involves RAM\n", " addr = 0x80000000|inst&0x1ffffff\n", " sym0 = bsearch(srcSymLst, addr, lambda sym: sym.addr)[1]\n", " instN = inst\n", " if sym0 is None:\n", " if addr not in knownAddrs:\n", " logger.warning('%08X: Addr out of Range'%inst)\n", " elif sym0.name not in dstSymMap:\n", " if addr not in knownAddrs:\n", " logger.error('%08X: Unknown symbol: %s'%(inst, sym0.name))\n", " continue\n", " else:\n", " sym = dstSymMap[sym0.name]\n", " if sym.size != sym0.size:\n", " logger.error(\n", " '%08X: different function size! (%d -> %d) %s',\n", " inst, sym0.size, sym.size, sym.name,\n", " )\n", " continue\n", " instN = inst&0xfe000000 | (addr+sym.addr-sym0.addr)&0x1ffffff\n", " ans.append((\n", " (instN, code[0][1]),\n", " *handle_inst(code[1:], srcSymLst, dstSymMap, knownAddrs=knownAddrs),\n", " ))\n", " logger.info('%08X -> %08X %s', inst, instN, '' if sym0 is None else sym0.name)\n", " return ans" ] }, { "cell_type": "code", "execution_count": null, "id": "e49e3307-91f4-497a-8b25-f02324b2bcbb", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 7, "id": "2a1645b1-0fa7-4603-b39f-c79ec42fd34c", "metadata": {}, "outputs": [], "source": [ "logger.setLevel(logging.WARN)\n", "\n", "# read the gecko code\n", "## change `input.txt` to the path to your code\n", "with open('input.txt') as f:\n", " gecko = f.read()\n", "\n", "# convert the code from US to JP\n", "ans = convertGecko(gecko, symLsts['US'], symMaps['JP'], knownAddrs={\n", " 0x800003E0,\n", " 0x800003E4,\n", " 0x800003E8,\n", " 0x800003B0,\n", " 0x800003E0,\n", "})" ] }, { "cell_type": "code", "execution_count": 8, "id": "d164c505-e970-418a-a8d0-bee0994ff0ae", "metadata": {}, "outputs": [], "source": [ "s = '\\n'.join(\n", " '%08X %08X'%(a, b)\n", " for code in ans\n", " for a, b in code\n", ")\n", "\n", "# copy to clipboard\n", "import win32clipboard\n", "win32clipboard.OpenClipboard()\n", "win32clipboard.EmptyClipboard()\n", "win32clipboard.SetClipboardText(s)\n", "win32clipboard.CloseClipboard()" ] }, { "cell_type": "code", "execution_count": null, "id": "ae9de60e-db13-4641-b16b-56ba9ed9c028", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "6ff45e8f-d9f7-4411-8129-e2427bc7327b", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "a09ea06c-c18b-4490-84b3-5157abe11a5c", "metadata": {}, "source": [ "## LICENSE" ] }, { "cell_type": "raw", "id": "239854dc-5235-4427-bba4-5336f616d78c", "metadata": {}, "source": [ "Copyright (c) 2022 sup39[サポミク]\n", "\n", "Permission is hereby granted, free of charge, to any person\n", "obtaining a copy of this software and associated documentation\n", "files (the \"Software\"), to deal in the Software without\n", "restriction, including without limitation the rights to use,\n", "copy, modify, merge, publish, distribute, sublicense, and/or sell\n", "copies of the Software, and to permit persons to whom the\n", "Software is furnished to do so, subject to the following\n", "conditions:\n", "\n", "The above copyright notice and this permission notice shall be\n", "included in all copies or substantial portions of the Software.\n", "\n", "THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n", "EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES\n", "OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n", "NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n", "HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,\n", "WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n", "FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR\n", "OTHER DEALINGS IN THE SOFTWARE." ] }, { "cell_type": "code", "execution_count": null, "id": "cdd5fe04-c12c-4e7e-9fc9-649e75a05082", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 5 }