/* $OpenBSD: swab.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */ /* * Written by J.T. Conklin . * Public domain. */ #include /* * On the i486, this code is negligibly faster than the code generated * by gcc at about half the size. If my i386 databook is correct, it * should be considerably faster than the gcc code on a i386. */ ENTRY(swab) pushl %esi pushl %edi movl 12(%esp),%esi movl 16(%esp),%edi movl 20(%esp),%ecx cld # set direction forward shrl $1,%ecx testl $7,%ecx # copy first group of 1 to 7 words jz L2 # while swaping alternate bytes. .align 2,0x90 L1: lodsw rorw $8,%ax stosw decl %ecx testl $7,%ecx jnz L1 L2: shrl $3,%ecx # copy remainder 8 words at a time jz L4 # while swapping alternate bytes. .align 2,0x90 L3: lodsw rorw $8,%ax stosw lodsw rorw $8,%ax stosw lodsw rorw $8,%ax stosw lodsw rorw $8,%ax stosw lodsw rorw $8,%ax stosw lodsw rorw $8,%ax stosw lodsw rorw $8,%ax stosw lodsw rorw $8,%ax stosw decl %ecx jnz L3 L4: popl %edi popl %esi ret