S3C2440 code relocation experiment (Part 3)

Publisher:水云间梦Latest update time:2020-04-19 Source: eefocusKeywords:S3C2440 Reading articles on mobile phones Scan QR code
Read articles on your mobile phone anytime, anywhere

Code relocation

We will now solve the code relocation problem introduced by the code relocation experiment (I).

For S3C2440:


When the BIN file is smaller than 4KB:

If Nand mode is used for startup, there will be no problem.

If it is started in Nor mode, we can just relocate the .data segment

When the BIN file is larger than 4KB:

If Nand mode is used for booting, the entire program needs to be relocated, including the code segment and data segment.

If it is started in Nor mode, only the .data segment needs to be relocated.

Only relocate the .data segment and clear the .bss segment

Normally, the code for relocating the .data segment should be written in assembly language. For simplicity, I wrote it in C language. Since the data segment has not been relocated and the BSS segment has not been cleared, the C function should not be called. However, I ensured that these two functions do not access global variables, so as long as the stack pointer is set correctly, the call can also work normally.

The linker script relocate.lds used is as follows:


SECTIONS {

.text 0 : {*(.text)}

.rodata : {*(.rodata)}

_data_offset = .;

.data 0x30000000 : AT(_data_offset) { 

_data_LMA = LOADADDR(.data);

_data_start = .;

*(.data)

_data_end = .;

}

_bss_start = .;

.bss : { *(.bss) }

_bss_end = .;

}


relocate.c


extern unsigned char _data_offset;

extern unsigned char _data_start;

extern unsigned char _data_end;

extern unsigned char _bss_start;

extern unsigned char _bss_end;


void copyDataSection(void){

volatile unsigned char *dataLMA = &_data_offset;

volatile unsigned char *start = &_data_start;

volatile unsigned char * end = &_data_end;


while(start <= end){

*start = *dataLMA;

start++;

dataLMA++;

}

}


void clearBSS(void){

volatile unsigned char* start = &_bss_start;

volatile unsigned char* end = &_bss_end;

while(start < end){

*start = 0;

start++;

}

}


CRT0.S


.text 

.global _start

_start:

/* 1. Turn off the watchdog */

ldr r0, =0x53000000

ldr r1, =0

str r1, [r0]

/* 2. Set the clock */

/* 2.1 Set LOCKTIME(0x4C000000)=0xFFFFFFFF */

ldr r0, =0x4C000000 

ldr r1, =0xFFFFFFFF

str r1, [r0]

/* 2.2 Set CLKDIVN(0x4C000014) = 0x5 FCLK : HCLK : PCLK = 400m : 100m : 50m*/

ldr r0, =0x4C000014

ldr r1, =0x5

str r1, [r0]

/* 2.3 Set the CPU to asynchronous mode */

mrc p15,0,r0,c1,c0,0

orr r0,r0,#0xc0000000 /* #R1_nF:OR:R1_iA */

mcr p15,0,r0,c1,c0,0

/* 2.4 Set MPLLCON(0x4C000004)=(92<<12) | (1 << 4) | (1 << 0)

* m = MDIV + 8 = 100

* p = PDIV + 2 = 3

* s = SDIV = 1

* Mpll = (2 * m * Fin) / (p * 2 ^ s) = (2 * 100 * 12) / (3 * 2 ^ 1) = 400MHZ

*/

ldr r0, =0x4C000004

ldr r1, =(92<<12) | (1 << 4) | (1 << 0)

str r1, [r0]

      /* Once the PLL is set, the lock time will be locked until the PLL output is stable

       * Then the CPU operates at the new frequency FCLK

       */


/* 3. Set up the stack

* Automatically distinguish NOR boot or NAND boot

* Write 0 to address 0, and read it out. If it is written, it is NAND, otherwise it is NOR

*/


ldr r0, =0

ldr r1, [r0] /* read out the original value backup*/

str r0, [r0] /* write 0 to address 0 */

ldr r2, [r0] /* read again*/

cmp r1, r2

ldr sp, =0x40000000 + 4096 /* nor start*/

movne sp, #4096 /* nand start */

strne r1, [r0] /* restore the original value */


// Initialize the SDRAM memory controller

bl sdram_init

bl copyDataSection

bl clearBSS


bl main


halt:


b halt


main.c:


#include "myprintf.h"

#include "uart.h"

#include "util.h"


char gCh = 'A';

char gCh1;


int main(void) {

uart0_init();

printf("%snr", "NorFlash Relocate Test.");

while(1) {

gCh++;

printf("%c(0x%x)", gCh, gCh);

wait(800000);

}

return 0;

}


Burn the compiled BIN file into NorFlash, start the development board, and find that the main function can modify the global variables normally, as shown in the following figure:

insert image description here

The above code efficiency improvement

As can be seen from the copy function, we only copy one byte at a time. However, the SDRAM of JZ2440 is 32 bits, so the copying efficiency is very low. Therefore, we copy 4 bytes of data at a time. We modify the relocate.c file as follows:

relocate.c


extern unsigned int _data_offset;

extern unsigned int _data_start;

extern unsigned int _data_end;

extern unsigned int _bss_start;

extern unsigned int _bss_end;


void copyDataSection(void){

volatile unsigned int *dataLMA = &_data_offset;

volatile unsigned int *start = &_data_start;

volatile unsigned int * end = &_data_end;


while(start <= end){

*start = *dataLMA;

start++;

dataLMA++;

}

}


void clearBSS(void){

volatile unsigned int* start = &_bss_start;

volatile unsigned int* end = &_bss_end;

while(start < end){

*start = 0;

start++;

}

}


Compile and burn the program to the development board again, start it in Nor mode, and power on to observe.

insert image description here

From the above dynamic diagram, we can see that the global variable gCh was originally equal to 'A', corresponding to 0x41. But now it is 0, which is cleared. Why is this? Why does the value of the .data segment get cleared when the .bss segment is cleared?

We observe the disassembly of the cleared .bss segment, as shown below:


Disassembly of section .data:


30000000 <_data_start>:

30000000: 41 .byte 0x41

Disassembly of section .bss:


30000001 :

...


00000d2c :

 d2c: e52db004 push {fp} ; (str fp, [sp, #-4]!)

 d30: e28db000 add fp, sp, #0 ; 0x0

 d34: e24dd00c sub sp, sp, #12 ; 0xc

 d38: e59f3040 ldr r3, [pc, #64] ; d80

 d3c: e50b300c str r3, [fp, #-12]

 d40: e59f303c ldr r3, [pc, #60] ; d84

 d44: e50b3008 str r3, [fp, #-8]

 d48:ea000005 b d64

 d4c: e51b200c ldr r2, [fp, #-12]

 d50: e3a03000 mov r3, #0 ; 0x0

 d54: e5c23000 strb r3, [r2]

 d58: e51b300c ldr r3, [fp, #-12]

 d5c: e2833001 add r3, r3, #1 ; 0x1

 d60: e50b300c str r3, [fp, #-12]

 d64: e51b200c ldr r2, [fp, #-12]

 d68: e51b3008 ldr r3, [fp, #-8]

 d6c: e1520003 cmp r2, r3

 d70: 3afffff5 bcc d4c

 d74: e28bd000 add sp, fp, #0 ; 0x0

 d78:e8bd0800 pop {fp}

 d7c:e12fff1e bx lr

 d80: 30000001 .word 0x30000001

 d84: 30000002 .word 0x30000002


From this assembly code, we can see that it clears the address from 0x30000001 to the byte before 0x30000002. The address of the data segment .data is at 0x30000000, so in theory it seems that it will not be cleared.


If we look at the circuit diagram of SDRAM again, we can see that:

insert image description here

The lowest two bits of the address line sent by the CPU are not connected to the SDRAM chip. Because two SDRAM chips form a 32-bit data bus, the CPU addresses the memory according to 4 bytes. The lowest two bits of the address line are ignored and default to 0.


So if 4 bytes of data are accessed at a time, no matter the access address is 0x30000001, 0x30000002 or 0x30000003, the CPU will eventually access the four bytes of data starting at 0x30000000. Therefore, the data in the data segment is cleared when the .bss segment is cleared.


Now that the cause of the problem has been found, the solution is simple. We proactively align the data segment and BSS segment to 4 bytes in the link script, and there will be no problem. Modify the link script as follows:

relocate.lds:


SECTIONS {

.text 0 : {*(.text)}

.rodata : {*(.rodata)}

/* 4-byte alignment */

. = ALIGN(4);

_data_offset = .;

.data 0x30000000 : AT(_data_offset) { 

_data_LMA = LOADADDR(.data);

_data_start = .;

*(.data)

_data_end = .;

}

/* 4-byte alignment */

. = ALIGN(4);

_bss_start = .;

.bss : { *(.bss) }

_bss_end = .;

}


Recompile, start in Nor mode, power on and observe, and it returns to normal.

insert image description here

Relocate the entire program

When our development board is started in Nand mode and the BIN file exceeds 4KB, the entire program needs to be relocated, including the code segment and the data segment. To this end, we need to introduce a concept: position-independent code. Position-independent code (PIC) is code that can work normally no matter where it is loaded into any address space.


So how do you write position-independent programs?


Use b or bl relative jump instructions when calling a program

Before relocation, you cannot use absolute addresses and cannot access global or static variables.

Arrays with initializers are not accessible (because these initializers are placed in the .rodata segment, which is position-dependent and not on the stack)

After relocation, you need to use an absolute jump instruction to jump to the runtime address (link address) to start execution, such as ldr pc, =main

Since we have not yet touched upon the Nand Flash operation experiment (next article), and Nand Flash cannot read data like accessing memory, we still burn the BIN file to Nor Flash first, and start it in Nor Flash mode to relocate the entire program.


Jump Instructions

When calling the main function in an assembly file, note that you must use an absolute jump instruction:


ldr pc, =main


However, position-independent instructions such as bl main cannot be used, otherwise it will still run in Nor Flash or SRAM.

Because the ldr pc, =main instruction does not change the return address stored in the lr register, before jumping to the main function, lr must be modified to point to halt, otherwise when returning from the main function, the ldr pc, =main instruction will be executed again, as shown below:


ldr lr, =halt

ldr pc, =main


All the files are as follows:

insert image description here

We list the main source files:

CRT0.S


.text 

.global _start


_start:

/* 1. Turn off the watchdog */

ldr r0, =0x53000000

ldr r1, =0

str r1, [r0]

/* 2. Set the clock */

/* 2.1 Set LOCKTIME(0x4C000000)=0xFFFFFFFF */

ldr r0, =0x4C000000 

ldr r1, =0xFFFFFFFF

str r1, [r0]

/* 2.2 Set CLKDIVN(0x4C000014) = 0x5 FCLK : HCLK : PCLK = 400m : 100m : 50m*/

ldr r0, =0x4C000014

ldr r1, =0x5

str r1, [r0]

/* 2.3 Set the CPU to asynchronous mode */

mrc p15,0,r0,c1,c0,0

orr r0,r0,#0xc0000000 /* #R1_nF:OR:R1_iA */

mcr p15,0,r0,c1,c0,0

/* 2.4 Set MPLLCON(0x4C000004)=(92<<12) | (1 << 4) | (1 << 0)

* m = MDIV + 8 = 100

* p = PDIV + 2 = 3

* s = SDIV = 1

* Mpll = (2 * m * Fin) / (p * 2 ^ s) = (2 * 100 * 12) / (3 * 2 ^ 1) = 400MHZ

*/

ldr r0, =0x4C000004

ldr r1, =(92<<12) | (1 << 4) | (1 << 0)

[1] [2]
Keywords:S3C2440 Reference address:S3C2440 code relocation experiment (Part 3)

Previous article:S3C2440 code relocation experiment (Part 2)
Next article:s3c2440 study notes - relocation and linking scripts

Recommended ReadingLatest update time:2024-11-16 09:02

fedora26 make menuconfig *** Unable to find the ncurses libraries when compiling s3c2440 kernel
# make menuconfig *** Unable to find the ncurses libraries or the *** required header files. *** 'make menuconfig' requires the ncurses libraries. *** *** Install ncurses (ncurses-devel) and try again. *** make : ***   fedora install ncurses library # yum provides "/*/ncurses"   # yum install -y ncurses-devel-6.0-8.
[Microcontroller]
fedora26 make menuconfig *** Unable to find the ncurses libraries when compiling s3c2440 kernel
【ARM】s3c2440 bare metal RTC digital clock
Function Bare metal program to realize LCD display digital clock Main code 1) Background drawing void Brush_ U32 c) {     int x,y ;     for ( y = 0 ; y LCD_HEIGHT ; y++ )     {         for ( x = 0 ; x LCD_WIDTH ; x++ )         {             LCD_BUFFER = c ;         }     } } 2) Text drawing void Draw_Te
[Microcontroller]
【ARM】s3c2440 bare metal RTC digital clock
Port QtEmbedded 4.6.3 tslib1.4 to S3C2440
1. Download the source code package: Go to the QT official website to download the latest version of QT FOR EMBEDDED Download the latest version of tslib1.4   2. Configure the cross-compilation environment Download the cross-compilation tool arm-linux-gcc 4.3.2 (for the version of the cross-compilation tool, please
[Microcontroller]
Yangtze Memory improves 3D NAND storage technology to support 128-layer QLC
Not long ago, China's Yangtze Memory announced the successful development of 128-layer QLC 3D NAND, which has been verified on SSDs of two controller manufacturers, Phison and Maxim Integrated. It can be applied to consumer-grade SSDs and gradually enter enterprise-level servers, data centers and other fields to meet
[Mobile phone portable]
Yangtze Memory improves 3D NAND storage technology to support 128-layer QLC
s3c2440 mpll
S3C2440 has two PLLs (phase locked loops), one is MPLL and the other is UPLL. MPLL is used for CPU and other peripheral devices, and UPLL is used for USB. It is used to generate three frequencies: FCLK, HCLK, and PCLK, which have different uses: FCLK is the clock signal provided by the CPU. HCLK is the clo
[Microcontroller]
s3c2440 bare metal-I2c programming-3.i2c program framework
1. Functions of iiC devices Obviously, the IIC controller provides the ability to transmit data. As for the meaning of the data, the IIC controller does not know. The meaning of the data is an external i2c slave device. We need to read the chip manual to know what kind of data the IIC controller should send. data. The
[Microcontroller]
s3c2440 bare metal-I2c programming-3.i2c program framework
Porting of LWIP-1.3.0 and DM9000 based on S3C2440 on UCOS-II-2.8.6
I usually run on Linux, and I feel a little uncomfortable jumping to UCOS for a while. First of all, the compiler, GCC is a good choice, but GDB debugging is too troublesome. ADS combined with the simulator is a very good debugging environment, so I moved the code to ADS, which is completely independent of Linux, an
[Microcontroller]
Porting of LWIP-1.3.0 and DM9000 based on S3C2440 on UCOS-II-2.8.6
S3C2440 serial communication basic function code (comments + learning)
Function: Send out the content received through the serial port. What you see on the PC is that what you input in the serial communication software will be displayed in real time. Main file: serial.c  1 //The most commonly used registers are ULCON, UCON, UBRDIV, UTRSTAT, UTXH, and URXH  2 #define ULCON0 (*(volatil
[Microcontroller]
Latest Microcontroller Articles
  • Download from the Internet--ARM Getting Started Notes
    A brief introduction: From today on, the ARM notebook of the rookie is open, and it can be regarded as a place to store these notes. Why publish it? Maybe you are interested in it. In fact, the reason for these notes is ...
  • Learn ARM development(22)
    Turning off and on interrupts Interrupts are an efficient dialogue mechanism, but sometimes you don't want to interrupt the program while it is running. For example, when you are printing something, the program suddenly interrupts and another ...
  • Learn ARM development(21)
    First, declare the task pointer, because it will be used later. Task pointer volatile TASK_TCB* volatile g_pCurrentTask = NULL;volatile TASK_TCB* vol ...
  • Learn ARM development(20)
    With the previous Tick interrupt, the basic task switching conditions are ready. However, this "easterly" is also difficult to understand. Only through continuous practice can we understand it. ...
  • Learn ARM development(19)
    After many days of hard work, I finally got the interrupt working. But in order to allow RTOS to use timer interrupts, what kind of interrupts can be implemented in S3C44B0? There are two methods in S3C44B0. ...
  • Learn ARM development(14)
  • Learn ARM development(15)
  • Learn ARM development(16)
  • Learn ARM development(17)
Change More Related Popular Components

EEWorld
subscription
account

EEWorld
service
account

Automotive
development
circle

About Us Customer Service Contact Information Datasheet Sitemap LatestNews


Room 1530, 15th Floor, Building B, No.18 Zhongguancun Street, Haidian District, Beijing, Postal Code: 100190 China Telephone: 008610 8235 0740

Copyright © 2005-2024 EEWORLD.com.cn, Inc. All rights reserved 京ICP证060456号 京ICP备10001474号-1 电信业务审批[2006]字第258号函 京公网安备 11010802033920号