123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346 |
- /* Name: usbdrvasm16.inc
- * Project: V-USB, virtual USB port for Atmel's(r) AVR(r) microcontrollers
- * Author: Christian Starkjohann
- * Creation Date: 2007-06-15
- * Tabsize: 4
- * Copyright: (c) 2007 by OBJECTIVE DEVELOPMENT Software GmbH
- * License: GNU GPL v2 (see License.txt), GNU GPL v3 or proprietary (CommercialLicense.txt)
- * Revision: $Id: usbdrvasm16.inc 760 2009-08-09 18:59:43Z cs $
- */
-
- /* Do not link this file! Link usbdrvasm.S instead, which includes the
- * appropriate implementation!
- */
-
- /*
- General Description:
- This file is the 16 MHz version of the asssembler part of the USB driver. It
- requires a 16 MHz crystal (not a ceramic resonator and not a calibrated RC
- oscillator).
-
- See usbdrv.h for a description of the entire driver.
-
- Since almost all of this code is timing critical, don't change unless you
- really know what you are doing! Many parts require not only a maximum number
- of CPU cycles, but even an exact number of cycles!
- */
-
- ;max stack usage: [ret(2), YL, SREG, YH, bitcnt, shift, x1, x2, x3, x4, cnt] = 12 bytes
- ;nominal frequency: 16 MHz -> 10.6666666 cycles per bit, 85.333333333 cycles per byte
- ; Numbers in brackets are clocks counted from center of last sync bit
- ; when instruction starts
-
- USB_INTR_VECTOR:
- ;order of registers pushed: YL, SREG YH, [sofError], bitcnt, shift, x1, x2, x3, x4, cnt
- push YL ;[-25] push only what is necessary to sync with edge ASAP
- in YL, SREG ;[-23]
- push YL ;[-22]
- push YH ;[-20]
- ;----------------------------------------------------------------------------
- ; Synchronize with sync pattern:
- ;----------------------------------------------------------------------------
- ;sync byte (D-) pattern LSb to MSb: 01010100 [1 = idle = J, 0 = K]
- ;sync up with J to K edge during sync pattern -- use fastest possible loops
- ;The first part waits at most 1 bit long since we must be in sync pattern.
- ;YL is guarenteed to be < 0x80 because I flag is clear. When we jump to
- ;waitForJ, ensure that this prerequisite is met.
- waitForJ:
- inc YL
- sbis USBIN, USBMINUS
- brne waitForJ ; just make sure we have ANY timeout
- waitForK:
- ;The following code results in a sampling window of < 1/4 bit which meets the spec.
- sbis USBIN, USBMINUS ;[-15]
- rjmp foundK ;[-14]
- sbis USBIN, USBMINUS
- rjmp foundK
- sbis USBIN, USBMINUS
- rjmp foundK
- sbis USBIN, USBMINUS
- rjmp foundK
- sbis USBIN, USBMINUS
- rjmp foundK
- sbis USBIN, USBMINUS
- rjmp foundK
- #if USB_COUNT_SOF
- lds YL, usbSofCount
- inc YL
- sts usbSofCount, YL
- #endif /* USB_COUNT_SOF */
- #ifdef USB_SOF_HOOK
- USB_SOF_HOOK
- #endif
- rjmp sofError
- foundK: ;[-12]
- ;{3, 5} after falling D- edge, average delay: 4 cycles [we want 5 for center sampling]
- ;we have 1 bit time for setup purposes, then sample again. Numbers in brackets
- ;are cycles from center of first sync (double K) bit after the instruction
- push bitcnt ;[-12]
- ; [---] ;[-11]
- lds YL, usbInputBufOffset;[-10]
- ; [---] ;[-9]
- clr YH ;[-8]
- subi YL, lo8(-(usbRxBuf));[-7] [rx loop init]
- sbci YH, hi8(-(usbRxBuf));[-6] [rx loop init]
- push shift ;[-5]
- ; [---] ;[-4]
- ldi bitcnt, 0x55 ;[-3] [rx loop init]
- sbis USBIN, USBMINUS ;[-2] we want two bits K (sample 2 cycles too early)
- rjmp haveTwoBitsK ;[-1]
- pop shift ;[0] undo the push from before
- pop bitcnt ;[2] undo the push from before
- rjmp waitForK ;[4] this was not the end of sync, retry
- ; The entire loop from waitForK until rjmp waitForK above must not exceed two
- ; bit times (= 21 cycles).
-
- ;----------------------------------------------------------------------------
- ; push more registers and initialize values while we sample the first bits:
- ;----------------------------------------------------------------------------
- haveTwoBitsK:
- push x1 ;[1]
- push x2 ;[3]
- push x3 ;[5]
- ldi shift, 0 ;[7]
- ldi x3, 1<<4 ;[8] [rx loop init] first sample is inverse bit, compensate that
- push x4 ;[9] == leap
-
- in x1, USBIN ;[11] <-- sample bit 0
- andi x1, USBMASK ;[12]
- bst x1, USBMINUS ;[13]
- bld shift, 7 ;[14]
- push cnt ;[15]
- ldi leap, 0 ;[17] [rx loop init]
- ldi cnt, USB_BUFSIZE;[18] [rx loop init]
- rjmp rxbit1 ;[19] arrives at [21]
-
- ;----------------------------------------------------------------------------
- ; Receiver loop (numbers in brackets are cycles within byte after instr)
- ;----------------------------------------------------------------------------
-
- ; duration of unstuffing code should be 10.66666667 cycles. We adjust "leap"
- ; accordingly to approximate this value in the long run.
-
- unstuff6:
- andi x2, USBMASK ;[03]
- ori x3, 1<<6 ;[04] will not be shifted any more
- andi shift, ~0x80;[05]
- mov x1, x2 ;[06] sampled bit 7 is actually re-sampled bit 6
- subi leap, -1 ;[07] total duration = 11 bits -> subtract 1/3
- rjmp didUnstuff6 ;[08]
-
- unstuff7:
- ori x3, 1<<7 ;[09] will not be shifted any more
- in x2, USBIN ;[00] [10] re-sample bit 7
- andi x2, USBMASK ;[01]
- andi shift, ~0x80;[02]
- subi leap, 2 ;[03] total duration = 10 bits -> add 1/3
- rjmp didUnstuff7 ;[04]
-
- unstuffEven:
- ori x3, 1<<6 ;[09] will be shifted right 6 times for bit 0
- in x1, USBIN ;[00] [10]
- andi shift, ~0x80;[01]
- andi x1, USBMASK ;[02]
- breq se0 ;[03]
- subi leap, -1 ;[04] total duration = 11 bits -> subtract 1/3
- nop2 ;[05]
- rjmp didUnstuffE ;[06]
-
- unstuffOdd:
- ori x3, 1<<5 ;[09] will be shifted right 4 times for bit 1
- in x2, USBIN ;[00] [10]
- andi shift, ~0x80;[01]
- andi x2, USBMASK ;[02]
- breq se0 ;[03]
- subi leap, -1 ;[04] total duration = 11 bits -> subtract 1/3
- nop2 ;[05]
- rjmp didUnstuffO ;[06]
-
- rxByteLoop:
- andi x1, USBMASK ;[03]
- eor x2, x1 ;[04]
- subi leap, 1 ;[05]
- brpl skipLeap ;[06]
- subi leap, -3 ;1 one leap cycle every 3rd byte -> 85 + 1/3 cycles per byte
- nop ;1
- skipLeap:
- subi x2, 1 ;[08]
- ror shift ;[09]
- didUnstuff6:
- cpi shift, 0xfc ;[10]
- in x2, USBIN ;[00] [11] <-- sample bit 7
- brcc unstuff6 ;[01]
- andi x2, USBMASK ;[02]
- eor x1, x2 ;[03]
- subi x1, 1 ;[04]
- ror shift ;[05]
- didUnstuff7:
- cpi shift, 0xfc ;[06]
- brcc unstuff7 ;[07]
- eor x3, shift ;[08] reconstruct: x3 is 1 at bit locations we changed, 0 at others
- st y+, x3 ;[09] store data
- rxBitLoop:
- in x1, USBIN ;[00] [11] <-- sample bit 0/2/4
- andi x1, USBMASK ;[01]
- eor x2, x1 ;[02]
- andi x3, 0x3f ;[03] topmost two bits reserved for 6 and 7
- subi x2, 1 ;[04]
- ror shift ;[05]
- cpi shift, 0xfc ;[06]
- brcc unstuffEven ;[07]
- didUnstuffE:
- lsr x3 ;[08]
- lsr x3 ;[09]
- rxbit1:
- in x2, USBIN ;[00] [10] <-- sample bit 1/3/5
- andi x2, USBMASK ;[01]
- breq se0 ;[02]
- eor x1, x2 ;[03]
- subi x1, 1 ;[04]
- ror shift ;[05]
- cpi shift, 0xfc ;[06]
- brcc unstuffOdd ;[07]
- didUnstuffO:
- subi bitcnt, 0xab;[08] == addi 0x55, 0x55 = 0x100/3
- brcs rxBitLoop ;[09]
-
- subi cnt, 1 ;[10]
- in x1, USBIN ;[00] [11] <-- sample bit 6
- brcc rxByteLoop ;[01]
- rjmp overflow
-
- macro POP_STANDARD ; 14 cycles
- pop cnt
- pop x4
- pop x3
- pop x2
- pop x1
- pop shift
- pop bitcnt
- endm
- macro POP_RETI ; 7 cycles
- pop YH
- pop YL
- out SREG, YL
- pop YL
- endm
-
- #include "asmcommon.inc"
-
- ; USB spec says:
- ; idle = J
- ; J = (D+ = 0), (D- = 1)
- ; K = (D+ = 1), (D- = 0)
- ; Spec allows 7.5 bit times from EOP to SOP for replies
-
- bitstuffN:
- eor x1, x4 ;[5]
- ldi x2, 0 ;[6]
- nop2 ;[7]
- nop ;[9]
- out USBOUT, x1 ;[10] <-- out
- rjmp didStuffN ;[0]
-
- bitstuff6:
- eor x1, x4 ;[5]
- ldi x2, 0 ;[6] Carry is zero due to brcc
- rol shift ;[7] compensate for ror shift at branch destination
- rjmp didStuff6 ;[8]
-
- bitstuff7:
- ldi x2, 0 ;[2] Carry is zero due to brcc
- rjmp didStuff7 ;[3]
-
-
- sendNakAndReti:
- ldi x3, USBPID_NAK ;[-18]
- rjmp sendX3AndReti ;[-17]
- sendAckAndReti:
- ldi cnt, USBPID_ACK ;[-17]
- sendCntAndReti:
- mov x3, cnt ;[-16]
- sendX3AndReti:
- ldi YL, 20 ;[-15] x3==r20 address is 20
- ldi YH, 0 ;[-14]
- ldi cnt, 2 ;[-13]
- ; rjmp usbSendAndReti fallthrough
-
- ;usbSend:
- ;pointer to data in 'Y'
- ;number of bytes in 'cnt' -- including sync byte [range 2 ... 12]
- ;uses: x1...x4, btcnt, shift, cnt, Y
- ;Numbers in brackets are time since first bit of sync pattern is sent
- ;We don't match the transfer rate exactly (don't insert leap cycles every third
- ;byte) because the spec demands only 1.5% precision anyway.
- usbSendAndReti: ; 12 cycles until SOP
- in x2, USBDDR ;[-12]
- ori x2, USBMASK ;[-11]
- sbi USBOUT, USBMINUS;[-10] prepare idle state; D+ and D- must have been 0 (no pullups)
- in x1, USBOUT ;[-8] port mirror for tx loop
- out USBDDR, x2 ;[-7] <- acquire bus
- ; need not init x2 (bitstuff history) because sync starts with 0
- ldi x4, USBMASK ;[-6] exor mask
- ldi shift, 0x80 ;[-5] sync byte is first byte sent
- txByteLoop:
- ldi bitcnt, 0x35 ;[-4] [6] binary 0011 0101
- txBitLoop:
- sbrs shift, 0 ;[-3] [7]
- eor x1, x4 ;[-2] [8]
- out USBOUT, x1 ;[-1] [9] <-- out N
- ror shift ;[0] [10]
- ror x2 ;[1]
- didStuffN:
- cpi x2, 0xfc ;[2]
- brcc bitstuffN ;[3]
- lsr bitcnt ;[4]
- brcc txBitLoop ;[5]
- brne txBitLoop ;[6]
-
- sbrs shift, 0 ;[7]
- eor x1, x4 ;[8]
- didStuff6:
- out USBOUT, x1 ;[-1] [9] <-- out 6
- ror shift ;[0] [10]
- ror x2 ;[1]
- cpi x2, 0xfc ;[2]
- brcc bitstuff6 ;[3]
- ror shift ;[4]
- didStuff7:
- ror x2 ;[5]
- sbrs x2, 7 ;[6]
- eor x1, x4 ;[7]
- nop ;[8]
- cpi x2, 0xfc ;[9]
- out USBOUT, x1 ;[-1][10] <-- out 7
- brcc bitstuff7 ;[0] [11]
- ld shift, y+ ;[1]
- dec cnt ;[3]
- brne txByteLoop ;[4]
- ;make SE0:
- cbr x1, USBMASK ;[5] prepare SE0 [spec says EOP may be 21 to 25 cycles]
- lds x2, usbNewDeviceAddr;[6]
- lsl x2 ;[8] we compare with left shifted address
- subi YL, 20 + 2 ;[9] Only assign address on data packets, not ACK/NAK in x3
- sbci YH, 0 ;[10]
- out USBOUT, x1 ;[11] <-- out SE0 -- from now 2 bits = 22 cycles until bus idle
- ;2006-03-06: moved transfer of new address to usbDeviceAddr from C-Code to asm:
- ;set address only after data packet was sent, not after handshake
- breq skipAddrAssign ;[0]
- sts usbDeviceAddr, x2; if not skipped: SE0 is one cycle longer
- skipAddrAssign:
- ;end of usbDeviceAddress transfer
- ldi x2, 1<<USB_INTR_PENDING_BIT;[2] int0 occurred during TX -- clear pending flag
- USB_STORE_PENDING(x2) ;[3]
- ori x1, USBIDLE ;[4]
- in x2, USBDDR ;[5]
- cbr x2, USBMASK ;[6] set both pins to input
- mov x3, x1 ;[7]
- cbr x3, USBMASK ;[8] configure no pullup on both pins
- ldi x4, 4 ;[9]
- se0Delay:
- dec x4 ;[10] [13] [16] [19]
- brne se0Delay ;[11] [14] [17] [20]
- out USBOUT, x1 ;[21] <-- out J (idle) -- end of SE0 (EOP signal)
- out USBDDR, x2 ;[22] <-- release bus now
- out USBOUT, x3 ;[23] <-- ensure no pull-up resistors are active
- rjmp doReturn
|