Hello world code

Using the C code in hello-jni as an example, the main.c is

#include <stdio.h>

int main(void) {
    printf("hello world\n");
    return 0;
}

How main function is called

We set the executable to be statically linked, compile it using ndk-build, and readelf -h to get the entry point:

  Entry point address:               0x4002a4

Note that in kernel v3.18, syscall SyS_execve will call SyS_execve -> do_execve -> do_execve_common -> exec_binprm -> search_binary_handler -> load_elf_binary -> start_thread. In start_thread, the pc in pt_regs will be set to the entry point in elf.

static int load_elf_binary(struct linux_binprm *bprm)
{
	......
	if (elf_interpreter) {
	......
	} else {
		elf_entry = loc->elf_ex.e_entry;
		if (BAD_ADDR(elf_entry)) {
			retval = -EINVAL;
			goto out_free_dentry;
		}
	}
	......
	start_thread(regs, elf_entry, bprm->p);
}

static inline void start_thread_common(struct pt_regs *regs, unsigned long pc)
{
	memset(regs, 0, sizeof(*regs));
	regs->syscallno = ~0UL;
	regs->pc = pc;
}

static inline void start_thread(struct pt_regs *regs, unsigned long pc,
				unsigned long sp)
{
	start_thread_common(regs, pc);
	regs->pstate = PSR_MODE_EL0t;
	regs->sp = sp;
}

elf_interpreter means dynamic loader. For dynamically linked binary, the elf_entry is the loader address; for statically linked binary, the elf_entry is the entry address in the header, which is 0x4002a4 for our example. Therefore, when syscall execve finishes, the execution will return to user space address 0x4002a4, which is _start function. _start calls _start_main, which in turn calls __libc_init, and passes args as the first parameter and main function pointer as the third parameter.

00000000004002a4 <_start>:
  4002a4:	910003e0 	mov	x0, sp
  4002a8:	14000007 	b	4002c4 <_start_main>

00000000004002ac <__atexit_handler_wrapper>:
  4002ac:	a9bf7bfd 	stp	x29, x30, [sp,#-16]!
  4002b0:	910003fd 	mov	x29, sp
  4002b4:	b4000040 	cbz	x0, 4002bc <__atexit_handler_wrapper+0x10>
  4002b8:	d63f0000 	blr	x0
  4002bc:	a8c17bfd 	ldp	x29, x30, [sp],#16
  4002c0:	d65f03c0 	ret

00000000004002c4 <_start_main>:
  4002c4:	d00002e2 	adrp	x2, 45e000 <prof_node+0xe0>
  4002c8:	d00002e6 	adrp	x6, 45e000 <prof_node+0xe0>
  4002cc:	d00002e5 	adrp	x5, 45e000 <prof_node+0xe0>
  4002d0:	d00002e4 	adrp	x4, 45e000 <prof_node+0xe0>
  4002d4:	a9bd7bfd 	stp	x29, x30, [sp,#-48]!
  4002d8:	d2800001 	mov	x1, #0x0                   	// #0
  4002dc:	910003fd 	mov	x29, sp
  4002e0:	f947d8c6 	ldr	x6, [x6,#4016]
  4002e4:	910063a3 	add	x3, x29, #0x18
  4002e8:	f94788a5 	ldr	x5, [x5,#3856]
  4002ec:	f9473884 	ldr	x4, [x4,#3696]
  4002f0:	f947ac42 	ldr	x2, [x2,#3928]
  4002f4:	f9000fa6 	str	x6, [x29,#24]
  4002f8:	f90013a5 	str	x5, [x29,#32]
  4002fc:	f90017a4 	str	x4, [x29,#40]
  400300:	94000c0b 	bl	40332c <__libc_init>

......

0000000000400328 <main>:
  400328:	a9bf7bfd 	stp	x29, x30, [sp,#-16]!
  40032c:	910003fd 	mov	x29, sp
  400330:	d00001e0 	adrp	x0, 43e000 <__sfp_handle_exceptions+0x20>
  400334:	91020000 	add	x0, x0, #0x80
  400338:	9400027a 	bl	400d20 <puts>
  40033c:	2a1f03e0 	mov	w0, wzr
  400340:	a8c17bfd 	ldp	x29, x30, [sp],#16
  400344:	d65f03c0 	ret

The corresponding C code is in _start and __libc_init.

__LIBC_HIDDEN__ void do_arm64_start(void* raw_args) {
  structors_array_t array;
  array.preinit_array = &__PREINIT_ARRAY__;
  array.init_array = &__INIT_ARRAY__;
  array.fini_array = &__FINI_ARRAY__;
  __libc_init(raw_args, NULL, &main, &array);
}

__asm__ (
"        .text                      \n"
"        .align  2                  \n"
"        .global _start             \n"
"        .hidden _start             \n"
"        .type   _start, %function  \n"
"_start:                            \n"
"        add     x0, sp, xzr        \n"
"        b       do_arm64_start   \n"
"        .size   _start, .-_start   \n"
);

__noreturn void __libc_init(void* raw_args,
                           void (*onexit)(void) __unused,
                           int (*slingshot)(int, char**, char**),
                           structors_array_t const * const structors) {
	KernelArgumentBlock args(raw_args);
	__libc_init_main_thread(args);
	
	// Initializing the globals requires TLS to be available for errno.
	__init_thread_stack_guard(__get_thread());
	__libc_init_globals(args);
	
	__libc_init_AT_SECURE(args);
	__libc_init_common(args);
	
	apply_gnu_relro();
	
	// Several Linux ABIs don't pass the onexit pointer, and the ones that
	// do never use it.  Therefore, we ignore it.
	
	call_array(structors->preinit_array);
	call_array(structors->init_array);
	
	// The executable may have its own destructors listed in its .fini_array
	// so we need to ensure that these are called when the program exits
	// normally.
	if (structors->fini_array != NULL) {
		__cxa_atexit(__libc_fini,structors->fini_array,NULL);
	}
	
	exit(slingshot(args.argc, args.argv, args.envp));
}

References

  1. Understanding the Execution of a C Program - x86 Version