/* $OpenBSD: bcopy.S,v 1.5 2005/08/07 16:40:13 espie Exp $ */ /* $NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $ */ /* * Copyright (c) 1995 Carnegie-Mellon University. * All rights reserved. * * Author: Trevor Blackwell. Support for use as memcpy() and memmove() * added by Chris Demetriou. * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include #if defined(MEMCOPY) || defined(MEMMOVE) #ifdef MEMCOPY #define FUNCTION memcpy #else #define FUNCTION memmove #endif #define SRCREG a1 #define DSTREG a0 #else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ #define FUNCTION bcopy #define SRCREG a0 #define DSTREG a1 #endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ #define SIZEREG a2 /* * Copy bytes. * * void bcopy(char *from, char *to, size_t len); * char *memcpy(void *to, const void *from, size_t len); * char *memmove(void *to, const void *from, size_t len); * * No matter how invoked, the source and destination registers * for calculation. There's no point in copying them to "working" * registers, since the code uses their values "in place," and * copying them would be slower. */ LEAF(FUNCTION,3) #if defined(MEMCOPY) || defined(MEMMOVE) /* set up return value, while we still can */ mov DSTREG,v0 #endif /* Check for negative length */ ble SIZEREG,bcopy_done /* Check for overlap */ subq DSTREG,SRCREG,t5 cmpult t5,SIZEREG,t5 bne t5,bcopy_overlap /* a3 = end address */ addq SRCREG,SIZEREG,a3 /* Get the first word */ ldq_u t2,0(SRCREG) /* Do they have the same alignment? */ xor SRCREG,DSTREG,t0 and t0,7,t0 and DSTREG,7,t1 bne t0,bcopy_different_alignment /* src & dst have same alignment */ beq t1,bcopy_all_aligned ldq_u t3,0(DSTREG) addq SIZEREG,t1,SIZEREG mskqh t2,SRCREG,t2 mskql t3,SRCREG,t3 or t2,t3,t2 /* Dst is 8-byte aligned */ bcopy_all_aligned: /* If less than 8 bytes,skip loop */ subq SIZEREG,1,t0 and SIZEREG,7,SIZEREG bic t0,7,t0 beq t0,bcopy_samealign_lp_end bcopy_samealign_lp: stq_u t2,0(DSTREG) addq DSTREG,8,DSTREG ldq_u t2,8(SRCREG) subq t0,8,t0 addq SRCREG,8,SRCREG bne t0,bcopy_samealign_lp bcopy_samealign_lp_end: /* If we're done, exit */ bne SIZEREG,bcopy_small_left stq_u t2,0(DSTREG) RET bcopy_small_left: mskql t2,SIZEREG,t4 ldq_u t3,0(DSTREG) mskqh t3,SIZEREG,t3 or t4,t3,t4 stq_u t4,0(DSTREG) RET bcopy_different_alignment: /* * this is the fun part */ addq SRCREG,SIZEREG,a3 cmpule SIZEREG,8,t0 bne t0,bcopy_da_finish beq t1,bcopy_da_noentry /* Do the initial partial word */ subq zero,DSTREG,t0 and t0,7,t0 ldq_u t3,7(SRCREG) extql t2,SRCREG,t2 extqh t3,SRCREG,t3 or t2,t3,t5 insql t5,DSTREG,t5 ldq_u t6,0(DSTREG) mskql t6,DSTREG,t6 or t5,t6,t5 stq_u t5,0(DSTREG) addq SRCREG,t0,SRCREG addq DSTREG,t0,DSTREG subq SIZEREG,t0,SIZEREG ldq_u t2,0(SRCREG) bcopy_da_noentry: subq SIZEREG,1,t0 bic t0,7,t0 and SIZEREG,7,SIZEREG beq t0,bcopy_da_finish2 bcopy_da_lp: ldq_u t3,7(SRCREG) addq SRCREG,8,SRCREG extql t2,SRCREG,t4 extqh t3,SRCREG,t5 subq t0,8,t0 or t4,t5,t5 stq t5,0(DSTREG) addq DSTREG,8,DSTREG beq t0,bcopy_da_finish1 ldq_u t2,7(SRCREG) addq SRCREG,8,SRCREG extql t3,SRCREG,t4 extqh t2,SRCREG,t5 subq t0,8,t0 or t4,t5,t5 stq t5,0(DSTREG) addq DSTREG,8,DSTREG bne t0,bcopy_da_lp bcopy_da_finish2: /* Do the last new word */ mov t2,t3 bcopy_da_finish1: /* Do the last partial word */ ldq_u t2,-1(a3) extql t3,SRCREG,t3 extqh t2,SRCREG,t2 or t2,t3,t2 br zero,bcopy_samealign_lp_end bcopy_da_finish: /* Do the last word in the next source word */ ldq_u t3,-1(a3) extql t2,SRCREG,t2 extqh t3,SRCREG,t3 or t2,t3,t2 insqh t2,DSTREG,t3 insql t2,DSTREG,t2 lda t4,-1(zero) mskql t4,SIZEREG,t5 cmovne t5,t5,t4 insqh t4,DSTREG,t5 insql t4,DSTREG,t4 addq DSTREG,SIZEREG,a4 ldq_u t6,0(DSTREG) ldq_u t7,-1(a4) bic t6,t4,t6 bic t7,t5,t7 and t2,t4,t2 and t3,t5,t3 or t2,t6,t2 or t3,t7,t3 stq_u t3,-1(a4) stq_u t2,0(DSTREG) RET bcopy_overlap: /* * Basically equivalent to previous case, only backwards. * Not quite as highly optimized */ addq SRCREG,SIZEREG,a3 addq DSTREG,SIZEREG,a4 /* less than 8 bytes - don't worry about overlap */ cmpule SIZEREG,8,t0 bne t0,bcopy_ov_short /* Possibly do a partial first word */ and a4,7,t4 beq t4,bcopy_ov_nostart2 subq a3,t4,a3 subq a4,t4,a4 ldq_u t1,0(a3) subq SIZEREG,t4,SIZEREG ldq_u t2,7(a3) ldq t3,0(a4) extql t1,a3,t1 extqh t2,a3,t2 or t1,t2,t1 mskqh t3,t4,t3 mskql t1,t4,t1 or t1,t3,t1 stq t1,0(a4) bcopy_ov_nostart2: bic SIZEREG,7,t4 and SIZEREG,7,SIZEREG beq t4,bcopy_ov_lp_end bcopy_ov_lp: /* This could be more pipelined, but it doesn't seem worth it */ ldq_u t0,-8(a3) subq a4,8,a4 ldq_u t1,-1(a3) subq a3,8,a3 extql t0,a3,t0 extqh t1,a3,t1 subq t4,8,t4 or t0,t1,t0 stq t0,0(a4) bne t4,bcopy_ov_lp bcopy_ov_lp_end: beq SIZEREG,bcopy_done ldq_u t0,0(SRCREG) ldq_u t1,7(SRCREG) ldq_u t2,0(DSTREG) extql t0,SRCREG,t0 extqh t1,SRCREG,t1 or t0,t1,t0 insql t0,DSTREG,t0 mskql t2,DSTREG,t2 or t2,t0,t2 stq_u t2,0(DSTREG) bcopy_done: RET bcopy_ov_short: ldq_u t2,0(SRCREG) br zero,bcopy_da_finish END(FUNCTION)