#!/usr/pkg/bin/python
"""routines for creating and dumping ELF files

   this is my plan for the future direction of postForth, to ensure unique
   hashes for every newly defined word in a dictionary, no matter how many
   there may be"""

Copyright = """
    elf -- routines for creating and dumping ELF files
    Copyright (C) 2005  John Comeau <jc.jcomeau.com>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
    """
errormessage = "Not all needed libraries found, upgrade or check path: "
try:
 True # not defined in older Python releases
except:
 True, False = 1, 0
try:
 import sys, os, types, re, pwd
 sys.path.append(os.path.join(pwd.getpwuid(os.geteuid())[5], 'lib', 'python'))
 errormessage = errormessage + repr(sys.path)
 from com.jcomeau import gpl, jclicense
except:
 try:
  sys.stderr.write("%s\n" % errormessage)
 except:
  print errormessage
 raise

# get name this program was called as
myself = os.path.split(sys.argv[0])[1]
command = os.path.splitext(myself)[0]  # chop any suffix (extension)
# now get name we gave it when we wrote it
originalself = re.compile('[0-9A-Za-z]+').search(Copyright).group()

# globals and routines that should be in every program
# (yes, you could import them, but there are problems in that approach too)

def DebugPrint(*whatever):
 return False  # defined instead by pytest module, use that for debugging

def join(*args):
 "for pythons without str.join"
 string, array = args
 if type(array) == types.StringType: array = eval(array)
 if hasattr(str, 'join'):
  return string.join(array)
 else:
  joined = ''
  for index in range(0, len(array)):
   joined = joined + array[index]
   if index != (len(array) - 1):
    joined = joined + string
  return joined

def split(*args):
 "for pythons without str.split"
 string, string_to_split = args
 if not len(string): string = None
 if hasattr('str', 'split'):
  return string_to_split.split(string)
 else:
  return re.compile(re.escape(string)).split(string_to_split)

# other globals, specific to this program
import struct
# 32-bit data types
Elf32_Addr = '<I' # Unsigned program address
Elf32_Half = '<H' # Unsigned medium integer
Elf32_Off  = '<I' # Unsigned file offset
Elf32_Sword = '<I' # Signed large integer
Elf32_Word = '<I' # Unsigned large integer
uchar = '<B' # an unsigned char is 1 byte, same as unsigned small integer
# for e_ident
EI_NIDENT = 16
EI_MAG0 = 0
EI_MAG1 = 1
EI_MAG2 = 2
EI_MAG3 = 3
EI_CLASS = 4
EI_DATA = 5
EI_VERSION = 6
EI_PAD = 7
ELFCLASSNONE = 0
ELFCLASS32 = 1
ELFCLASS64 = 2
ELFDATANONE = 0
ELFDATA2LSB = 1
ELFDATA2MSB = 2
# for e_type
ET_NONE = 0
ET_REL = 1
ET_EXEC = 2
ET_DYN = 3
ET_CORE = 4
ET_LOPROC = 0xff00
ET_HIPROC = 0xffff
# for e_machine
EM_NONE = 0
EM_M32 = 1
EM_SPARC = 2
EM_386 = 3
EM_68K = 4
EM_88K = 5
EM_860 = 7
EM_MIPS = 8
# e_version
EV_NONE = 0
EV_CURRENT = 1
# section header table indices
SHN_UNDEF = 0
SHN_LORESERVE = 0xff00
SHN_LOPROC = 0xff00
SHN_HIPROC = 0xff1f
SHN_ABS = 0xfff1  # symbols defined relative to this section are absolute
SHN_COMMON = 0xfff2  # ""    ""       """    "   "     ""     "  common
SHN_HIRESERVE = 0xffff
# section types, sh_type
# most section types have sh_link = SHN_UNDEF, sh_info = 0
SHT_NULL = 0
SHT_PROGBITS = 1
SHT_SYMTAB = 2  # has sh_link of string table, sh_info of # local symbols
SHT_STRTAB = 3
SHT_RELA = 4
SHT_HASH = 5  # has sh_link of symbol table, sh_info of 0
SHT_DYNAMIC = 6  # has sh_link of string table, sh_info of 0
SHT_NOTE = 7
SHT_NOBITS = 8
SHT_REL = 9  # has sh_link of symbol table, sh_info of section header
SHT_SHLIB = 10
SHT_DYNSYM = 11  # has sh_link of string table, sh_info of 0
SHT_LOPROC = 0x70000000
SHT_HIPROC = 0x7fffffff
SHT_LOUSER = 0x80000000
SHT_HIUSER = 0xffffffff
# section attribute flags, sh_flags
SHF_WRITE = 0x1
SHF_ALLOC = 0x2
SHF_EXECINSTR = 0x4
SHF_MASKPROC = 0xf0000000

# symbol binding, ELF32_ST_BIND
STB_LOCAL = 0
STB_GLOBAL = 1
STB_WEAK = 3
STB_LOPROC = 13
STB_HIPROC = 15
# symbol type, ELF32_ST_TYPE
STT_NOTYPE = 0
STT_OBJECT = 1  # for variables, arrays, etc.
STT_FUNC = 2  # for function name or other executable code
STT_SECTION = 3
STT_FILE = 4  # for source file associated with object file
STT_LOPROC = 13
STT_HIPROC = 15
# struct entries are usually of the form: [name, [pack pattern, default]],
# where default is my choice of a default, not necessarily a good working one

elf_header_struct = [
 ['e_ident', ['<16s', '\x7fELF%c%c%c\0\0\0\0\0\0\0\0\0' % (
  ELFCLASS32, ELFDATA2LSB, EV_CURRENT)]],
 ['e_type', [Elf32_Half, ET_EXEC]],
 ['e_machine', [Elf32_Half, EM_386]],
 ['e_version', [Elf32_Word, EV_CURRENT]],
 ['e_entry', [Elf32_Addr, 0]],  # no entry point in non-executable file
 ['e_phoff', [Elf32_Off, 0]],  # program header offset
 ['e_shoff', [Elf32_Off, 0]],  # section header offset
 ['e_flags', [Elf32_Word, 0]],
 ['e_ehsize', [Elf32_Half, 0]],  # ELF header size
 ['e_phentsize', [Elf32_Half, 0]],  # program header entry size
 ['e_phnum', [Elf32_Half, 0]],  # number of program headers
 ['e_shentsize', [Elf32_Half, 0]],  # section header entry size
 ['e_shnum', [Elf32_Half, 0]],  # number of section headers
 ['e_shstrndx', [Elf32_Half, SHN_UNDEF]],  # index of section name string table
]

section_header_struct = [
 ['sh_name', [Elf32_Word, 0]],
 ['sh_type', [Elf32_Word, SHT_NULL]],
 ['sh_flags', [Elf32_Word, 0]],
 ['sh_addr', [Elf32_Addr, 0]],
 ['sh_offset', [Elf32_Off, 0]],
 ['sh_size', [Elf32_Word, 0]],
 ['sh_link', [Elf32_Word, SHN_UNDEF]],
 ['sh_info', [Elf32_Word, 0]],
 ['sh_addralign', [Elf32_Word, 0]],
 ['sh_entsize', [Elf32_Word, 0]],
]

symtab_entry_struct = [
 ['st_name', [Elf32_Word, 0]],  # index to string in string table
 ['st_value', [Elf32_Addr, 0]],  # value of the symbol
 ['st_size', [Elf32_Word, 0]],  # associated size, varies with symbol type
 ['st_info', [uchar, 0]],  # packed field for symbol binding and type
 ['st_other', [uchar, 0]],  # reserved in spec version 1.1, should be 0
 ['st_shndx', [Elf32_Half, SHN_UNDEF]],  # relevant section header table index
]

# segment types, p_type
PT_NULL = 0
PT_LOAD = 1
PT_DYNAMIC = 2
PT_INTERP = 3
PT_NOTE = 4
PT_SHLIB = 5
PT_PHDR = 6
PT_LOPROC = 0x70000000
PT_HIPROC = 0x7fffffff
SysVloadaddr = 0x8048000  # not part of ELF spec but shown in example
program_header_struct = [
 ['p_type', [Elf32_Word, PT_LOAD]],
 ['p_offset', [Elf32_Off, 0]],
 ['p_vaddr', [Elf32_Addr, 0]],
 ['p_paddr', [Elf32_Addr, 0]],
 ['p_filesz', [Elf32_Word, 0]],
 ['p_memsz', [Elf32_Word, 0]],
 ['p_flags', [Elf32_Word, 0]],
 ['p_align', [Elf32_Word, 0]],
]
# program flags
PF_R = 4
PF_W = 2
PF_X = 1

class struct_element:
 def __init__(self, name, item, *init):
  self.name = name
  self.packinfo = item[0]
  self.default = item[1:]
  if len(init): self.value = init[0]
  else: self.value = None
 def getvalue(self):
  DebugPrint('getvalue of %s' % self.name, self.packinfo, self.default)
  if self.value:
   return struct.pack(self.packinfo, self.value)
  elif len(self.default) == 1:
   return struct.pack(self.packinfo, self.default[0])
  else:
   return eval('struct.pack%s' % repr(tuple([self.packinfo] + self.default)))
 def rawvalue(self):
  if self.value:
   return self.value
  elif len(self.default) == 1:
   return self.default[0]
  else:
   return self.default

class elf_file:
 """this class builds the structure of an ELF file

    dumping a gcc-created ELF file, one finds the sections in this order:
    .hash, .dynsym, .dynstr, .text, .data, .bss
 """
 def __init__(self):
  self.strtab, self.text = '\0', '',
  self.symtab, self.header, self.pheaders = symtab(), elf_header(), []
  self.phentsize = self.header.field['e_phentsize'].rawvalue()
  self.sheaders = [section_header()] # first is always SHT_NULL
  self.text_align = 2 ** 12  # find minimum alignment value that works
 def getvalue(self):
  phoff = len(nopad(self.header.getvalue(), 16))
  DebugPrint('phoff: 0x%x' % phoff)
  if len(self.text):
   if False: # not necessary to run test.elf under DSL
    self.addsegment(PT_PHDR, phoff, SysVloadaddr + phoff,
     2 * self.phentsize,
     PF_R | PF_X, 2 ** 2)
   self.addsegment(PT_LOAD, 0, SysVloadaddr,
    phoff + nopadlen(self.phentsize * (len(self.pheaders) + 1), 16) + \
    len(pad(self.strtab + '.text\0', 8)) + \
    len(pad(self.text, 8)),
    PF_R | PF_X, self.text_align)
   self.addsection('.text', SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR,
    phoff + nopadlen(self.phentsize * len(self.pheaders), 16), 
    len(pad(self.text, 8)), 2 ** 4)
   self.addsection('.shstrtab', SHT_STRTAB, SHF_ALLOC,
    phoff + nopadlen(self.phentsize * len(self.pheaders), 16) + \
    len(pad(self.text, 8)),
    len(pad(self.strtab, 8)), 0x0)
  shoff = phoff + nopadlen(self.phentsize * len(self.pheaders), 16) + \
   len(pad(self.text, 8)) + len(pad(self.strtab, 8))
  self.header.field['e_entry'].value = SysVloadaddr + phoff + \
   nopadlen(self.phentsize * len(self.pheaders), 16)
  self.header.field['e_phoff'].value = phoff
  self.header.field['e_shoff'].value = shoff
  self.header.field['e_phnum'].value = len(self.pheaders)
  self.header.field['e_shnum'].value = len(self.sheaders)
  self.header.field['e_shstrndx'].value = len(self.sheaders) - 1
  value = nopad(self.header.getvalue(), 16)
  for header in self.pheaders:
   value += header.getvalue()
  value = nopad(value, 16)
  value += pad(self.text, 8) + pad(self.strtab, 8)
  for header in self.sheaders:
   value += header.getvalue()
  return value
 def addsegment(self, *args):
  "add a program header, adjusting other things as necessary"
  self.pheaders += [program_header({'p_type': args[0],
   'p_offset': args[1], 'p_vaddr': args[2],
   'p_paddr': args[2], 'p_filesz': args[3], 'p_memsz': args[3],
   'p_flags': args[4], 'p_align': args[5]})]
 def addsection(self, *args):
  "add a section header, adjusting other things as necessary"
  if args[0] + '\0' in self.strtab:
   sh_name = self.strtab.index(args[0] + '\0')
  else:
   sh_name = len(self.strtab)  # index of new entry will be current length
   self.strtab += args[0] + '\0'
  self.sheaders += [section_header({'sh_name': sh_name, 'sh_type': args[1],
   'sh_flags': args[2], 'sh_addr': args[3] + SysVloadaddr,
   'sh_offset': args[3], 'sh_size': args[4], 'sh_addralign': args[5],})]

class elf_structure:
 def __init__(self, descriptor, *init):
  self.packinfo, self.field, self.names = '<', {}, []
  for index in range(len(descriptor)):
   name = descriptor[index][0]
   self.names.append(name)
   self.field[name] = struct_element(name, descriptor[index][1])
   self.packinfo += self.field[name].packinfo[1:]
  if len(init):
   initvalues = struct.unpack(self.packinfo, init[0][0:self.getsize()])
   DebugPrint('initvalues', initvalues)
   index = 0
   for name in self.names:
    DebugPrint('initializing %s with %s' % (name, repr(initvalues[index])))
    self.field[name].value = initvalues[index]
    index += 1
 def getvalue(self):
  data = ''
  for name in self.names:
   data += self.field[name].getvalue()
  return data
 def getsize(self):
  return struct.calcsize(self.packinfo)

def make_hashtable(*names):
 """make hashtable from strings

    found number of buckets equal to len(names)/2 in gcc-generated file
    chains of course is strings+1, the extra being the empty symtab entry"""
 symbols = ['']
 gcc_style = os.getenv('GCC_ELF_HASH')  # do it same as gcc if env var is set
 if len(names) and type(names[0]) != types.StringType:
  names = names[0]
 for name in names:
  symbols.append(name)
  if gcc_style:
   nbuckets = int(len(symbols) / 2)
  else:
   nbuckets = len(symbols) - 1  # just subtract one for empty symbol
  nchains = len(symbols)
  buckets, chains = [0] * nbuckets, [0] * nchains
  print 'before "%s": ' % name, nbuckets, nchains, buckets, chains
  for index in range(1, len(symbols)):
   symbol = symbols[index]
   hash = elf_hash(symbol)
   bucket = hash % nbuckets
   chains[index] = buckets[bucket]
   buckets[bucket] = index
  print 'after "%s": ' % name, nbuckets, nchains, buckets, chains

def pad(string, length):
 "return string as a multiple of length"
 padding = ''
 if length != 0 and len(string) % length > 0:
  padding = '\0' * (length - (len(string) % length))
 return string + padding

def padlen(strlen, length):
 return len(pad('\0' * strlen, length))

def nopad(string, length):
 "nop version of pad, for 'commenting out' padding during testing"
 return string

def nopadlen(strlen, length):
 "nop version of nopadlen, see nopad"
 return strlen
 
def symtab_entry(*args):
 entry = elf_structure(symtab_entry_struct)
 return entry

def elf_hash(symbol):
 """hash a symbol according to this implementation of the algorithm from
    the spec in pmft11.pdf:

unsigned long elf_hash (const unsigned char *name)
{
 unsigned long h = 0, g;
 while (*name)
 {
  h=(h << 4) + *name++;
  if (g = h & 0xf0000000)
   h ^= g >> 24;
  h &= ~g;
 }
 return h;
}

 this will probably work with unicode characters above chr(127) also, but the
 standard doesn't allow that as far as I can see.
"""
 DebugPrint('hashing', symbol)
 hash = 0
 for byte in symbol:
  hash = ((hash << 4) & 0xffffffff) + ord(byte)
  high_nybble = hash & 0xf0000000L
  if high_nybble: hash ^= high_nybble >> 24
  #DebugPrint('hash before ANDing with ~0x%x: 0x%x' % (high_nybble, hash))
  hash &= ~high_nybble
 DebugPrint('hash value of "%s": 0x%x' % (symbol, hash))
 return hash

def section_header(*init):
 header = elf_structure(section_header_struct)
 if len(init):
  for key in init[0].keys():
   header.field[key].default = [init[0][key]]
 return header

def program_header(*init):
 header = elf_structure(program_header_struct)
 if len(init):
  for key in init[0].keys():
   header.field[key].default = [init[0][key]]
 return header

def dump_section_header(section_header):
 header = elf_structure(section_header_struct, section_header)
 return header

def elf_header():
 header = elf_structure(elf_header_struct)
 header.field['e_ehsize'].default = [struct.calcsize(header.packinfo)]
 header.field['e_phentsize'].default = [struct.calcsize(
  program_header().packinfo)]
 header.field['e_shentsize'].default = [struct.calcsize(
  section_header().packinfo)]
 return header

def dump_elf_header(filename):
 file = open(filename)
 data = file.read()
 file.close()
 header = elf_structure(elf_header_struct, data)
 return header.getvalue()

def dump_name(data, offset):
 return data[offset:data.index('\0', offset)]

def dump_elf_file(filename):
 file = open(filename)
 data = file.read()
 file.close()
 file = elf_structure(elf_header_struct, data)
 file.data = data
 file.name_section_index = file.field['e_shstrndx'].rawvalue()
 if file.field['e_shoff']:
  file.section_headers = section_headers(file)
 for section_header in file.section_headers:
  section_type = section_header.field['sh_type'].rawvalue()
  print dump_name(file.data[file.names_offset:],
   section_header.field['sh_name'].rawvalue()), 'type: %d' % section_type
  if section_type == SHT_HASH:
   dump_hash_table(file, section_header)
  elif section_type == SHT_DYNSYM or section_type == SHT_SYMTAB:
   dump_symbol_table(file, section_header)
  else:
   dump_raw(
    file.data[section_header.field['sh_offset'].rawvalue():],
    section_header.field['sh_size'].rawvalue())

def unpack_integers(packed):
 return struct.unpack('<%dI' % (len(packed) / 4), packed)

def section_headers(file):
 entries = []
 headers = file.data[int(file.field['e_shoff'].rawvalue()):]
 sections = file.field['e_shnum'].rawvalue()
 section_header_entry_size = file.field['e_shentsize'].rawvalue()
 section_names = dump_section_header(
  headers[file.name_section_index * section_header_entry_size:])
 file.names_offset = section_names.field['sh_offset'].rawvalue()
 for index in range(sections):
  section_header = dump_section_header(
   headers[index * section_header_entry_size:])
  entries.append(section_header)
 return entries

def dump_hash_table(file, section_header):
 print 'dumping hash table'
 table = file.data[section_header.field['sh_offset'].rawvalue():]
 linked_symbols = file.section_headers[
  section_header.field['sh_link'].rawvalue()]
 dump_symbol_table(file, linked_symbols)
 size = section_header.field['sh_size'].rawvalue()
 file.hash_table = unpack_integers(table[0:size])
 nbucket, nchain = file.hash_table[0], file.hash_table[1]
 buckets = file.hash_table[2:2 + nbucket]
 chains = file.hash_table[2 + nbucket:]
 print buckets, chains
 for entry in range(len(file.symbol_table)):
  string = dump_string(file.linked_strings,
   file.symbol_table[entry].field['st_name'].rawvalue())
  print 'symbol table entry %d: "%s", 0x%x, 0x%x, %s' % (entry, string,
   file.symbol_table[entry].field['st_value'].rawvalue(),
   file.symbol_table[entry].field['st_size'].rawvalue(),
   dump_hash(file, string, buckets, chains))

def dump_hash(file, string, buckets, chains):
 hash = elf_hash(string)
 bucket = hash % len(buckets)
 chain = buckets[bucket]
 chain_list = '%d' % chain
 while True:
  chain_list += '->%d' % chains[chain]
  chain = chains[chain]
  if chain == 0: break
 return '0x%x -> %d: %s' % (hash, bucket, chain_list)

def dump_symbol_table(file, section_header):
 symtab_entry_size = elf_structure(symtab_entry_struct).getsize()
 print 'symtab entry size: %d' % symtab_entry_size
 symbol_table = file.data[section_header.field['sh_offset'].rawvalue():]
 local_symbols = section_header.field['sh_info'].rawvalue()
 print 'local symbols: %d' % local_symbols
 size = section_header.field['sh_size'].rawvalue()
 file.symbol_table = []
 for offset in range(0, len(symbol_table[0:size]), symtab_entry_size):
  file.symbol_table.append(elf_structure(symtab_entry_struct,
   symbol_table[offset:offset + symtab_entry_size]))
 linked_strings = file.section_headers[
  section_header.field['sh_link'].rawvalue()]
 strings_size = linked_strings.field['sh_size'].rawvalue()
 file.linked_strings = file.data[linked_strings.field['sh_offset'].rawvalue():]
 file.linked_strings = file.linked_strings[0:strings_size]
 print 'linked strings: "%s"' % repr(file.linked_strings)
 for entry in range(len(file.symbol_table)):
  if entry < local_symbols: continue
  print 'symbol table entry %d: "%s", 0x%x, 0x%x' % (entry,
   dump_string(file.linked_strings,
    file.symbol_table[entry].field['st_name'].rawvalue()),
   file.symbol_table[entry].field['st_value'].rawvalue(),
   file.symbol_table[entry].field['st_size'].rawvalue())

def dump_string(string_table, offset):
 return string_table[offset:string_table.index('\0', offset)]

def dump_raw(data, size):
 print 'dumping raw data'
 print repr(data[0:size])

def symtab():
 return [symtab_entry()]
  
def dump_symtab(*args):
 print 'dumping symbol table:'

def create_elf_file(objectfile, elffile):
 elf = elf_file()
 elf.text = open(objectfile).read().rstrip(chr(0x90)) # strip trailing NOPs
 elffile = open(elffile, 'wb')
 elffile.write(elf.getvalue())
 elffile.close()
 return True

def elf():
 """default entry point, make symlinks to other entry points as needed"""
 print __doc__

if __name__ == '__main__':
 # if this program was imported by another, the above test will fail,
 # and this following code won't be used...
 function = command; args = sys.argv[1:]  # default case
 if command == originalself:
  try:
   if len(args) and eval('type(%s) == types.FunctionType' % args[0]):
    function = sys.argv[1]; args = sys.argv[2:]
  except: pass
 print eval('%s%s' % (function, repr(tuple(args)))) or ''
else:
 # if you want something to be done on import, do it here; otherwise pass
 pass