The U-boot boot process, using Xilinx ZCU102 (Rev 1.0) as an example.


 * Startup Code (reset vector)
        bl    _main

arch/arm/lib/crt0_64.S [ENTRY(_main)]


 * This file handles the target-independent stages of the U-Boot
 * start-up where a C runtime environment is needed. Its entry point
 * is _main and is branched into from the target's start.S file.
 * _main execution sequence is:
 * 1. Set up initial environment for calling board_init_f().
 *    This environment only provides a stack and a place to store
 *    the GD ('global data') structure, both located in some readily
 *    available RAM (SRAM, locked cache...). In this context, VARIABLE
 *    global data, initialized or not (BSS), are UNAVAILABLE; only
 *    CONSTANT initialized data are available. GD should be zeroed
 *    before board_init_f() is called.

 * Set up initial C runtime environment and call board_init_f(0).
        ldr     x0, =(CONFIG_TPL_STACK)
#elif defined(CONFIG_SPL_BUILD) && defined(CONFIG_SPL_STACK)
        ldr     x0, =(CONFIG_SPL_STACK)
        adr     x0, __bss_start
        add     x0, x0, #CONFIG_SYS_INIT_SP_BSS_OFFSET
        ldr     x0, =(CONFIG_SYS_INIT_SP_ADDR)
        bic     sp, x0, #0xf    /* 16-byte alignment for ABI compliance */
        mov     x0, sp
        bl      board_init_f_alloc_reserve
        mov     sp, x0
        /* set up gd here, outside any C code */
        mov     x18, x0
        bl      board_init_f_init_reserve


 * 2. Call board_init_f(). This function prepares the hardware for
 *    execution from system RAM (DRAM, DDR...) As system RAM may not
 *    be available yet, , board_init_f() must use the current GD to
 *    store any data which must be passed on to later stages. These
 *    data include the relocation destination, the future stack, and
 *    the future GD location.
        mov     x0, #0
        bl      board_init_f


 * 3. Set up intermediate environment where the stack and GD are the
 *    ones allocated by board_init_f() in system RAM, but BSS and
 *    initialized non-const data are still not available.
#if !defined(CONFIG_SPL_BUILD)
 * Set up intermediate environment (new sp and gd) and call
 * relocate_code(addr_moni). Trick here is that we'll return
 * 'here' but relocated.
        ldr     x0, [x18, #GD_START_ADDR_SP]    /* x0 <- gd->start_addr_sp */
        bic     sp, x0, #0xf    /* 16-byte alignment for ABI compliance */
        ldr     x18, [x18, #GD_NEW_GD]          /* x18 <- gd->new_gd */

        adr     lr, relocation_return
        /* Add in link-vs-runtime offset */
        adr     x0, _start              /* x0 <- Runtime value of _start */
        ldr     x9, _TEXT_BASE          /* x9 <- Linked value of _start */
        sub     x9, x9, x0              /* x9 <- Run-vs-link offset */
        add     lr, lr, x9
        /* Add in link-vs-relocation offset */
        ldr     x9, [x18, #GD_RELOC_OFF]        /* x9 <- gd->reloc_off */
        add     lr, lr, x9      /* new return address after relocation */
        ldr     x0, [x18, #GD_RELOCADDR]        /* x0 <- gd->relocaddr */


 * 4a.For U-Boot proper (not SPL), call relocate_code(). This function
 *    relocates U-Boot from its current location into the relocation
 *    destination computed by board_init_f().
        b       relocate_code



 * 4b.For SPL, board_init_f() just returns (to crt0). There is no
 *    code relocation in SPL.


 * 5. Set up final environment for calling board_init_r(). This
 *    environment has BSS (initialized to 0), initialized non-const
 *    data (initialized to their intended value), and stack in system
 *    RAM (for SPL moving the stack and GD into RAM is optional - see
 *    CONFIG_SPL_STACK_R). GD has retained values set by board_init_f().


 * 6. For U-Boot proper (not SPL), some CPUs have some work left to do
 *    at this point regarding memory, so call c_runtime_cpu_setup.
 * Set up final (full) environment
        bl      c_runtime_cpu_setup             /* still call old routine */
#endif /* !CONFIG_SPL_BUILD */
#if defined(CONFIG_SPL_BUILD)
        bl      spl_relocate_stack_gd           /* may return NULL */
        /* set up gd here, outside any C code, if new stack is returned */
        cmp     x0, #0
        csel    x18, x0, x18, ne
         * Perform 'sp = (x0 != NULL) ? x0 : sp' while working
         * around the constraint that conditional moves can not
         * have 'sp' as an operand
        mov     x1, sp
        cmp     x0, #0
        csel    x0, x0, x1, ne
        mov     sp, x0

 * Clear BSS section
        ldr     x0, =__bss_start                /* this is auto-relocated! */
        ldr     x1, =__bss_end                  /* this is auto-relocated! */
        str     xzr, [x0], #8
        cmp     x0, x1
        b.lo    clear_loop

        /* call board_init_r(gd_t *id, ulong dest_addr) */
        mov     x0, x18                         /* gd_t */
        ldr     x1, [x18, #GD_RELOCADDR]        /* dest_addr */


 * 7. Branch to board_init_r().
        b       board_init_r                    /* PC relative jump */

        /* NOTREACHED - board_init_r() does not return */


void board_init_f(ulong boot_flags)
        gd->flags = boot_flags;
        gd->have_console = 0;

        if (initcall_run_list(init_sequence_f))


void board_init_r(gd_t *new_gd, ulong dest_addr)
         * Set up the new global data pointer. So far only x86 does this
         * here.
         * TODO( Consider doing this for all archs, or
         * dropping the new_gd parameter.

        int i;

#if !defined(CONFIG_X86) && !defined(CONFIG_ARM) && !defined(CONFIG_ARM64)
        gd = new_gd;
        gd->flags &= ~GD_FLG_LOG_READY;

        for (i = 0; i < ARRAY_SIZE(init_sequence_r); i++)
                init_sequence_r[i] += gd->reloc_off;

        if (initcall_run_list(init_sequence_r))

        /* NOTREACHED - run_main_loop() does not return */
