int x[100]; short y[100]; byte z[100]; int i; short j; byte k;
.section ".data" // Start writable section .align 4 // Start at int address boundary x: .skip 400 .align 2 // Start at short address boundary y: .skip 200 z: .skip 100 .align 4 // Start at int address boundary i: .skip 4 .align 2 // Start at short address boundary j: .skip 2 k: .skip 1
x[i + j] = y[i + k] + z[j + k];
(1) Get y[i + k]: sethi %hi(y), %l0 add %l0, %lo(y), %l0 // l0 = #y (address y) sethi %hi(i), %l1 ld [%l1 + %lo(i)], %l1 // l1 = i sethi %hi(j), %l2 ldsh [%l2 + %lo(j)], %l2 // l2 (32 bits) = j (16bits) add %l1, %l2, %l1 // l1 = i + j (32 bits), // it is an index, NOT offset smul %l1, 2, %l1 // Because elements in array // "y" are short ldsh [%l0 + %l1], %l7 // l7 (32 bits) = y[i + k] (16 bits) (2) Get z[j + k]: sethi %hi(z), %l0 add %l0, %lo(z), %l0 // l0 = #z (address z) sethi %hi(j), %l1 ldsh [%l1 + %lo(j)], %l1 // l1 = j sethi %hi(k), %l2 ldsb [%l2 + %lo(k)], %l2 // l2 (32 bits) = k (8 bits) add %l1, %l2, %l1 // l1 = j + k (32 bits), // it is an index, NOT offset smul %l1, 1, %l1 // Because elements in array // "z" are bytes // (This instruction is not needed) ldsb [%l0 + %l1], %l6 // l6 (32 bits) = z[j + k] (8 bits) (3) Add them: add %l7, %l6, %l7 // l7 (32 bits) = y[i + k] + z[j + k] (4) Get the address of x[i + j]: sethi %hi(x), %l0 add %l0, %lo(x), %l0 // l0 = #x (address x) sethi %hi(i), %l1 ld [%l1 + %lo(i)], %l1 // l1 (32 bits) = i (32 bits) sethi %hi(j), %l2 ldsh [%l2 + %lo(j)], %l2 // l2 (32 bits) = j (16 bits) add %l1, %l2, %l1 // l1 = i + j (32 bits), // it is an index, NOT offset smul %l1, 4, %l1 // Because elements in array // "x" are integers (5) Put the value of the sum in memory at address l0 + l1 st %l7, [%l0 + %l1]DONE !!!
The moral: the "technique" is the same:
The most noticeable difference is how to get and store operands from memory.