In [1]:
import logging
import struct
import sys
# setup logger: stdout(<WARN), stderr(>=WARN)
logger = logging.getLogger('Gecko')
h = logging.StreamHandler(sys.stdout)
h.addFilter(lambda r: r.levelno < logging.WARN)
logger.addHandler(h)
h = logging.StreamHandler()
h.setLevel(logging.WARN)
logger.addHandler(h)

## Preparation
- Obtain [MAP files](https://github.com/BitPatty/Super-Mario-Sunshine-C-Kit/tree/master/maps) of each version

## Address converter

In [2]:
class Symbol:
  def __init__(self, name, addr, size):
    self.name = name
    self.addr = addr
    self.size = size
  def __repr__(self):
    return '[%08x] %s (%x)'%(self.addr, self.name, self.size)
  
def getSymbols(path):
  '''
  @param `path`: path to the map file
  @returns: (name, section, off, size, addr)[]
  '''
  syms = []
  section = None
  with open(path) as f:
    for line in f:
      if section is None:
        if line.strip().endswith('section layout'):
          section = line.split(maxsplit=1)[0]
      else:
        if line.strip() == '':
          section = None
        else:
          cols = line.split()
          if len(cols) < 5: continue
          off, size, addr, mode, name = cols[:5]
          if off != 'UNUSED' and mode == '4':
            syms.append((name, section, *(int(x, 16) for x in (off, size, addr))))
  return syms

def bsearch(arr, q, fv=lambda x: x):
  '''
  @param `arr`: array to perform binary search
  @param `q`: query value
  @param `fv`: function that maps element to value for comparison
  @returns: (index, element). element is None if query is out of range
  '''
  l, r = 0, len(arr)
  if q < fv(arr[0]): return -1, None
  if q > fv(arr[-1]): return r, None
  while r-l > 1:
    m = (l+r)>>1
    v = fv(arr[m])
    if q < v: r = m
    elif q > v: l = m
    else: return m, arr[m]
  return l, arr[l]

In [3]:
# parse map files to symbol lists
symLsts = {
  ver: sorted((
    Symbol(name, addr, size)
    # *FIXME* MAP file path
    for name, section, off, size, addr in getSymbols(f'maps/mario{ver}.MAP')
  ), key=lambda sym: sym.addr)
  for ver in ('JP', 'JPA', 'US', 'EU')
}

# make symbol maps
symMaps = {
  ver: {
    sym.name: sym
    for sym in syms
  }
  for ver, syms in symLsts.items()
}

# utility functions
searchSymbol = lambda addr, lang: bsearch(symLsts[lang], addr, lambda sym: sym.addr)[1]
def convertAddr(addr, src, dst, verbose=False):
  sym0 = searchSymbol(addr, src)
  sym = symMaps[dst][sym0.name]
  addrN = addr-sym0.addr+sym.addr
  if verbose:
    print('%08x: %d + %s'%(addrN, addr-sym0.addr, sym))
  else:
    return addrN, sym

### Example usage

In [4]:
searchSymbol(0x8002F270, 'JP')

[8002f104] create__18J3DMaterialFactoryCFP11J3DMaterialiUl (924)

In [5]:
convertAddr(0x8040E178, 'JP', 'US', verbose=True)

804158dc: 0 + [804158dc] @2861 (6)


## Gecko code converter

In [6]:
def splitGecko(raw):
  '''
  @param `raw`: raw gecko code text (hex string) or bytes (e.g. gct file)
  @returns: converted gecko code text (hex string)
  '''
  if type(raw) == str: raw = bytes.fromhex(raw)
  itr = struct.iter_unpack('>2I', raw)
  ans = []
  for a, b in itr:
    ctype = a>>24
    itype = ctype & 0xee
    if ctype == 0xC0 or itype == 0xC2: n = b
    elif ctype in [0xF2, 0xF4]: n = b&0xff
    elif ctype == 0xF6: n = a&0xff
    elif itype == 0x06: n = (b+7)>>3
    elif itype == 0x08: n = 1
    else: n = 0
    ans.append(((a, b), *(next(itr) for _ in range(n))))
  return ans

ORI = 24
LWZ = 32
LHZ = 40
LBZ = 34
def handle_inst(code, srcSymLst, dstSymMap, knownAddrs={}):
  '''
  @param `code`: code to be converted with type `(int, int)[]`
  @param `srcSymLst`: symbol list of source version
  @param `dstSymMap`: symbol map of source version
  @param `knownAddrs`: known addresses that do not need convert
  @returns: converted code with type `(int, int)[]`
  '''
  insts = [x for p in code for x in p]
  # lis + lwz/lhz/lbz/ori
  for i in range(1, len(insts)):
    if insts[i-1]>>16 &0xfc1f == 15<<10:
      '''
      lis rA, xxxx
      lwz/lhz/lbz/ori rT, yyyy(rA)
      '''
      rA = insts[i-1]>>21 &31
      inst = insts[i]>>26
      ha = insts[i-1]&0xffff
      lo = insts[i]&0xffff
      if inst in [LWZ, LHZ, LBZ] and rA == insts[i]>>21 &31:
        addr = (ha if lo<0x8000 else ha-1)<<16 | lo
        sgn = True
        s = '[load]'
      elif inst in [ORI] and rA == insts[i]>>16 &31:
        addr = ha<<16 | lo
        sgn = False
        s = '[*ori]'
      else: # TODO
        assert rA not in [insts[i]>>21 &31, insts[i]>>16 &31]
      if not 0x80000000 <= addr < 0x81800000: continue
      sym0 = bsearch(srcSymLst, addr, lambda sym: sym.addr)[1]
      if sym0 is None:
        if addr not in knownAddrs:
          logger.warning('Unknown potential addr: %08x'%addr)
      else:
        sym = dstSymMap[sym0.name]
        addrN = addr + (sym.addr-sym0.addr)
        loN = addrN&0xffff
        haN = (addrN>>16) + (1 if sgn and loN>=0x8000 else 0)
        insts[i-1] = insts[i-1]&0xffff0000 | haN
        insts[i] = insts[i]&0xffff0000 | loN
        logger.info('%08x -> %08x %s'%(addr, addrN, s))
  # lwz/lhz/lbz r13/r2
  # TODO
  # DONE
  return tuple(
    tuple(insts[i:i+2])
    for i in range(0, len(insts), 2)
  )

def convertGecko(raw, srcSymLst, dstSymMap, knownAddrs={}):
  '''
  @param `raw`: raw code (hex string or gct file) to be converted
  @param `srcSymLst`: symbol list of source version
  @param `dstSymMap`: symbol map of source version
  @param `knownAddrs`: known addresses that do not need convert
  @returns: converted code with type `(int, int)[]`
  '''
  gecko = splitGecko(raw)
  ans = []
  for code in gecko:
    inst = code[0][0]
    # handle instruction that do not involves RAM
    ctype = inst>>24
    if ctype in [0xC0, 0xE0]: # TODO
      ans.append(code)
      continue
    # handle instruction that involves RAM
    addr = 0x80000000|inst&0x1ffffff
    sym0 = bsearch(srcSymLst, addr, lambda sym: sym.addr)[1]
    instN = inst
    if sym0 is None:
      if addr not in knownAddrs:
        logger.warning('%08X: Addr out of Range'%inst)
    elif sym0.name not in dstSymMap:
      if addr not in knownAddrs:
        logger.error('%08X: Unknown symbol: %s'%(inst, sym0.name))
        continue
    else:
      sym = dstSymMap[sym0.name]
      if sym.size != sym0.size:
        logger.error(
          '%08X: different function size! (%d -> %d) %s',
          inst, sym0.size, sym.size, sym.name,
        )
        continue
      instN = inst&0xfe000000 | (addr+sym.addr-sym0.addr)&0x1ffffff
    ans.append((
      (instN, code[0][1]),
      *handle_inst(code[1:], srcSymLst, dstSymMap, knownAddrs=knownAddrs),
    ))
    logger.info('%08X -> %08X %s', inst, instN, '' if sym0 is None else sym0.name)
  return ans

In [7]:
logger.setLevel(logging.WARN)

# read the gecko code
## change `input.txt` to the path to your code
with open('input.txt') as f:
  gecko = f.read()

# convert the code from US to JP
ans = convertGecko(gecko, symLsts['US'], symMaps['EU'], knownAddrs={
  0x800003E0,
  0x800003E4,
  0x800003E8,
  0x800003B0,
  0x800003E0,
})

041AE6B0: Unknown symbol: render__22TBathWaterFlatRendererFPQ26JDrama9TGraphicsRC12TBathtubDataPP10TBathWaterPP16TBathWaterParamsi
041AE6B0: Unknown symbol: render__22TBathWaterFlatRendererFPQ26JDrama9TGraphicsRC12TBathtubDataPP10TBathWaterPP16TBathWaterParamsi


In [8]:
s = '\n'.join(
  '%08X %08X'%(a, b)
  for code in ans
  for a, b in code
)

# copy to clipboard
import win32clipboard
win32clipboard.OpenClipboard()
win32clipboard.EmptyClipboard()
win32clipboard.SetClipboardText(s)
win32clipboard.CloseClipboard()

## LICENSE