gllvm/examples/linux-kernel/built-in-parsing.py
2018-06-15 16:25:32 -07:00

162 lines
8.8 KiB
Python

## Building the linux kernel from llvm bitcode requires writing scripts to copy, extract and recompile a lot of files in the right order.
## A script is only valid for one configuration of the kernel
## The goal of this Python script is to automate the script building process for any configuration of the kernel.
## It takes one or more argument : the first argument is where to write the final script, and all the others are paths to files of which we should not use the bitcode.
import sys,os
import subprocess
# This is where most of the script gets written.
# This function takes as argument:
# a list of path that should not be translated into bitcode
# the depth from the root (this is a recursive function)
# the path from the root to the current folder (its length should be depth)
def write_script(excluded_paths, depth, base_dir):
builtin = open (base_dir+"arbi","w")
subprocess.call(["ar", "-t", base_dir+"built-in.o"],stdout=builtin )
builtin.close()
builtin = open(base_dir+"arbi","r")
out.writelines("mkdir -p $build_home/built-ins/"+base_dir+" \n")
# In directories we store the list of depth 1 directories (and files) found in builtin
directories=[]
for line in builtin.readlines():
line=line.split('\n')[0]
words=line.split('/')
if words[0]=="arch": # We had to make an exception for the arch folder which does not have a built-in.o at the root.
words[0]=words[0]+'/'+words[1]
if words[2] in archbi: #A second exception for arch, not all files in the folder are referenced in the arch/x86/built-in.o. We store the exceptions in the archi list
words[0]=words[0]+'/'+words[2]
if words[depth] not in directories and line not in standalone_objects: #Some objects are not listed in a built-in.o except at the root, so we include them separately in order not to mess the order of linking
directories.append(words[depth])
# folders in which there are excluded folders which we will need to act on recursively
excluded_roots = [path[depth] for path in excluded_paths]
for direc in directories:
print base_dir+direc
# the folder contains an excluded folder
if direc in excluded_roots:
# Check if we have an excluded folder
if base_dir+direc in excluded_dirs:
out.writelines("convert-thin-archive.sh "+base_dir+direc+"/built-in.o \n")
out.writelines("cp "+ base_dir +direc+"/built-in.o.new $build_home/built-ins/"+base_dir+direc+"bi.o \n \n")
link_args.writelines("built-ins/"+base_dir + direc +"bi.o ")
# Else we filter the excluded_path list for only relevant stuff and we call the recursion on that folder
else:
relevant_excluded = [path for path in excluded_paths if (path[depth]==direc and len(path)>depth+1) ]
if relevant_excluded:
write_script(relevant_excluded,depth+1,base_dir+direc+"/")
else:
#If the "directory" is a file, we copy it into the relevant objects folder and compile it
if direc[-2:] == ".o":
out.writelines("get-bc -b "+base_dir+direc+"\n")
out.writelines("mkdir -p $build_home/built-ins/"+base_dir+"objects \n")
out.writelines("cp "+ base_dir+direc+".bc $build_home/built-ins/"+base_dir+"objects \n")
out.writelines("clang -c -no-integrated-as -mcmodel=kernel -o $build_home/built-ins/"+base_dir+ direc + " $build_home/built-ins/"+base_dir+"objects/" + direc+".bc \n \n")
# We then add it to the linker arguments file
link_args.writelines("built-ins/"+base_dir + direc +" ")
# When dealing with a folder, we get the bitcode from the built-in.o file and check for errors in the log.
# For each file that does not have a bitcode version (compiled straight from assembly) we copy it into the build folder directly and add it to the linker args
else:
if os.path.isfile("/vagrant/wrapper-logs/wrapper.log"):
os.rename("/vagrant/wrapper-logs/wrapper.log","/vagrant/wrapper-logs/before_"+direc.replace('/','_')+".log")
path = base_dir + direc +"/built-in.o"
subprocess.call(["get-bc", "-b", path ])
subprocess.call(["touch","/vagrant/wrapper-logs/wrapper.log"])
# Checking to see if any file failed to be extracted to bitcode
llvm_log=open("/vagrant/wrapper-logs/wrapper.log","r")
assembly_objects=[]
for line in llvm_log.readlines():
if len(line)>=54 and line[:54]=="WARNING:Error reading the .llvm_bc section of ELF file":
assembly_objects.append(line[55:-2])
llvm_log.close()
# Deal with those files
if assembly_objects:
out.writelines("mkdir -p $build_home/built-ins/" + base_dir +direc.replace('/','_')+"\n")
for asf in assembly_objects:
out.writelines("cp "+ asf + " $build_home/built-ins/" + base_dir +direc.replace('/','_') +"\n")
filename= asf.split('/')[-1]
link_args.writelines("built-ins/"+base_dir + direc.replace('/','_') +'/'+filename+" ")
# Deal with the rest
if os.path.isfile(base_dir + direc +"/built-in.o.a.bc"):
out.writelines("cp "+ base_dir + direc +"/built-in.o.a.bc $build_home/built-ins/"+ base_dir + direc.replace('/','_') +"bi.o.bc \n")
out.writelines("clang -c -no-integrated-as -mcmodel=kernel -o $build_home/built-ins/"+ base_dir + direc.replace('/','_') + "bibc.o $build_home/built-ins/" + base_dir+direc.replace('/','_')+"bi.o.bc \n \n")
link_args.writelines("built-ins/"+base_dir + direc.replace('/','_') +"bibc.o ")
builtin.close()
# excluded_dirs is a list where the folders excluded from bitcode translation will be stored
excluded_dirs=[]
if len(sys.argv) > 2:
excluded_dirs=sys.argv[2:]
for path in excluded_dirs:
if path[-1]=='/':
path=path[:-1]
# Output file pathlib
build_dir= sys.argv[1]
if build_dir[-1]!='/':
build_dir+='/'
#We transform excluded_dirs into a 2D array for better access to individual folders in the path
excluded=[path.split('/') for path in excluded_dirs]
out = open("build_script.sh","w+")
link_args = open(build_dir+"link-args","w+")
out.writelines("# Script written by the built-in-parsing.py script \n")
out.writelines("export build_home="+build_dir+"\n")
# List exceptions
standalone_objects = ["arch/x86/kernel/head_64.o","arch/x86/kernel/head64.o","arch/x86/kernel/ebda.o","arch/x86/kernel/platform-quirks.o"]#,"usr/initramfs_data.o"]
archbi=["lib","pci","video","power"]
# Calling the main function
write_script(excluded,0,"")
# Dealing with both lib files
out.writelines("get-bc -b lib/lib.a \n ")
out.writelines("mkdir -p $build_home/lib\n")
out.writelines("cp lib/lib.a.bc $build_home/lib/ \n")
out.writelines("clang -c -no-integrated-as -mcmodel=kernel -o $build_home/lib/lib.a.o $build_home/lib/lib.a.bc \n")
if os.path.isfile("/vagrant/wrapper-logs/wrapper.log"):
os.rename("/vagrant/wrapper-logs/wrapper.log","/vagrant/wrapper-logs/before_lib.log")
#os.remove("./wrapper-logs/wrapper.log")
out.writelines("mkdir -p $build_home/arch/x86/lib/objects\n")
subprocess.call(["get-bc", "-b", "arch/x86/lib/lib.a" ])
subprocess.call(["touch","/vagrant/wrapper-logs/wrapper.log"])
llvm_log=open("/vagrant/wrapper-logs/wrapper.log","r")
assembly_objects=[]
for line in llvm_log.readlines():
if len(line)>=55 and line[:54]=="WARNING:Error reading the .llvm_bc section of ELF file":
assembly_objects.append(line[55:-2])
for asf in assembly_objects:
out.writelines("cp "+ asf + " $build_home/arch/x86/lib/objects/ \n")
filename= asf.split('/')[-1]
#link_args.writelines("arch/x86/lib/objects/" +filename+" ") ##ignored to keep the order of linked files
out.writelines("cp arch/x86/lib/lib.a.bc $build_home/arch/x86/lib/lib.a.bc \n")
out.writelines("clang -c -no-integrated-as -mcmodel=kernel -o $build_home/arch/x86/lib/lib.a.o $build_home/arch/x86/lib/lib.a.bc \n \n")
#Deal with individual files
out.writelines("cp arch/x86/kernel/vmlinux.lds $build_home \n")
out.writelines("cp .tmp_kallsyms2.o $build_home \n")
for sto in standalone_objects:
out.writelines("cp --parents "+ sto+" $build_home \n")
out.writelines("\n#linking command \n")
out.writelines("cd $build_home \n")
# Final linking command
out.writelines("ld --build-id -T vmlinux.lds --whole-archive ")
for sto in standalone_objects:
out.writelines(sto+" ")
out.writelines("@link-args ")
out.writelines("--no-whole-archive --start-group lib/lib.a.o arch/x86/lib/lib.a.o arch/x86/lib/objects/* .tmp_kallsyms2.o --end-group -o vmlinux")