diff --git a/fw/winding-driver.c b/fw/winding-driver.c
index f2ae2fff894400c5e64756f117fc57899031f35e..8e25898833d2920075b7a2627871d676289db56a 100644
--- a/fw/winding-driver.c
+++ b/fw/winding-driver.c
@@ -3,60 +3,69 @@
 #include <avr/sleep.h>
 #include <util/delay.h>
 
-
-#define CLOCK_DIV_bm  TC_CLKSEL_DIV64_gc
-#define CLOCK_DIV     64
-
 #define LED1_mask PIN2_bm
 #define LED2_mask PIN3_bm
 
-#define encoder_mask PIN0_bm
-
 //#define wire_d 202000 //32 AWG, magnet wire diameter in nanometers
-#define wire_d 180000 //33 AWG, magnet wire diameter in nanometers
+uint32_t wire_d = 180000; //33 AWG, magnet wire diameter in nanometers
 //#define wire_d 160000 //34 AWG, magnet wire diameter in nanometers
-#define n_ticks_per_rev 400 //number of encoder edges per revolution
-#define nm_per_step 1270 //full steps for now
-#define n_layers 10 //how many layers total
-#define n_wraps_per_layer 8 //how many coil wraps per layer before reversing
-
-//unsigned uint16_t wire_d = 202000; //32 AWG, magnet wire diameter in nanometers
-//const uint32_t wire_d = 180000; //33 AWG, magnet wire diameter in nanometers
-//unsigned uint16_t wire_d = 160000; //34 AWG, magnet wire diameter in nanometers
-//const uint32_t nm_per_step = 1270; //full steps, TODO: measure this, right now its approximate.
-
-//const uint32_t n_wraps_per_layer = 8; //how many coil wraps per layer before reversing
-//const uint32_t n_layers = 10; //how many layers total
-//const uint32_t n_ticks_per_rev = 200; //number of encoder ticks per revolution
-uint32_t max_x = n_wraps_per_layer*wire_d; //maximum distance traversed
-uint32_t n_ticks_per_layer = n_ticks_per_rev * n_wraps_per_layer; //how many encoder ticks per layer
+uint32_t n_ticks_per_rev = 400; //number of encoder edges per revolution
+uint32_t nm_per_step = 1270; //full steps for now
+uint32_t n_layers = 10; //how many layers total
+uint32_t n_wraps_per_layer = 8; //how many coil wraps per layer before reversing
+
+//uint32_t max_x = n_wraps_per_layer*wire_d; //maximum distance traversed
+//uint32_t n_ticks_per_layer = n_ticks_per_rev * n_wraps_per_layer; //how many encoder ticks per layer
 
 //state variables
+uint32_t n_ticks_per_layer = 0;
+uint32_t count = 0;
 uint32_t n_layers_done = 0;
 uint32_t n_wraps_this_layer = 0;
 uint32_t n_ticks_this_rev = 0;
 uint32_t x_desired = 0; //where we want the tip, measured in nanometers
+uint32_t x_current = 0; //where we are currently, measured in nanometers
 
 void update_desired_x(){
 	//function to implement wraps and layering.
-	//ticks: current number of encoder ticks as measured from the starting point.  
-	// one tick is an edge of the encoder signal
-	//TODO: measure ticks per rev of the coil winder.  It's about 100.
-	n_layers_done = TCC0.CNT / n_ticks_per_layer;
-	n_wraps_this_layer = (TCC0.CNT % n_ticks_per_layer) / n_ticks_per_rev;
-	n_ticks_this_rev = (TCC0.CNT % n_ticks_per_rev);
-	x_desired = n_wraps_this_layer * wire_d + (n_ticks_this_rev * wire_d) / n_ticks_per_rev;
-	if (n_layers_done % 2 == 1){ 
-	  x_desired = max_x - x_desired; //flip odd layer directions
-	} 
+	n_ticks_per_layer = n_ticks_per_rev * n_wraps_per_layer; //how many encoder ticks per layer
+  count = (uint32_t)TCC0.CNT + 65535*(uint32_t)TCC1.CNT;
+  n_layers_done = count / n_ticks_per_layer;
+  if (n_layers_done == n_layers){
+    x_desired = 0;  //stay at home if we're done
+  } else {
+    n_wraps_this_layer = (count % n_ticks_per_layer) / n_ticks_per_rev;
+    n_ticks_this_rev = (count % n_ticks_per_rev);
+    x_desired = n_wraps_this_layer * wire_d + (n_ticks_this_rev * wire_d) / n_ticks_per_rev;
+    if (n_layers_done % 2 == 1){ 
+      x_desired = n_wraps_per_layer*wire_d - x_desired; //flip odd layer directions
+    } 
+  }
 }
 
-void step(){
-  PORTD.OUTSET = PIN5_bm;
-  PORTC.OUTSET = PIN4_bm;
+void step_forward(){
+  PORTD.OUTCLR = PIN6_bm; //dir forward
+  PORTC.OUTCLR = PIN5_bm; //dir forward
+  PORTD.OUTSET = PIN5_bm; //step up
+  PORTC.OUTSET = PIN4_bm; //step up
+  //PORTE.OUTSET = LED1_mask; //light led
   _delay_us(10);
-  PORTD.OUTCLR = PIN5_bm;
-  PORTC.OUTCLR = PIN4_bm;
+  PORTD.OUTCLR = PIN5_bm; //step down
+  PORTC.OUTCLR = PIN4_bm; //step down
+  x_current += nm_per_step; //update current x
+  //PORTE.OUTCLR = LED1_mask; //dark led
+}
+void step_backward(){
+  PORTD.OUTSET = PIN6_bm; //dir backward
+  PORTC.OUTSET = PIN5_bm; //dir backward
+  PORTD.OUTSET = PIN5_bm; //step up
+  PORTC.OUTSET = PIN4_bm; //step up
+  //PORTE.OUTSET = LED2_mask; //light led
+  _delay_us(10);
+  PORTD.OUTCLR = PIN5_bm; //step down
+  PORTC.OUTCLR = PIN4_bm; //step down
+  x_current -= nm_per_step; //update current x
+  //PORTE.OUTCLR = LED2_mask; //dark led
 }
 
 int main(void) {
@@ -83,9 +92,9 @@ int main(void) {
   //cascade tcc0 overflow into tcc1 using event channel 1 so we get 32 bit counter of encoder pulses
   //with 400 ticks per revolution, that's ~160 turns before 16-bit overflow
   //only issue is we can't discriminate overflow from underflow...
-  //EVSYS.CH1MUX = EVSYS_CHMUX_TCC0_OVF_gc;
-  //TCC1.CTRLA = TC_CLKSEL_EVCH1_gc;
-  //TCC1.PER = 0xFFFF;
+  EVSYS.CH1MUX = EVSYS_CHMUX_TCC0_OVF_gc;
+  TCC1.CTRLA = TC_CLKSEL_EVCH1_gc;
+  TCC1.PER = 0xFFFF;
 
   //DAC for current control, PB3 = DAC CHannel 1
   DACB.CTRLA = DAC_CH1EN_bm | DAC_ENABLE_bm;
@@ -98,13 +107,20 @@ int main(void) {
   //stepper 1
   PORTD.DIRSET = PIN3_bm | PIN4_bm | PIN5_bm | PIN6_bm | PIN7_bm; //set outputs
   PORTD.OUTSET = PIN3_bm | PIN4_bm; //full steps
-  //PORTD.OUTSET = PIN7_bm; //enable
+  PORTD.OUTSET = PIN7_bm; //enable
   //stepper 2
   PORTC.DIRSET = PIN2_bm | PIN3_bm | PIN4_bm | PIN5_bm | PIN6_bm; //set outputs
   PORTC.OUTSET = PIN2_bm | PIN3_bm; //full steps
-  //PORTC.OUTSET = PIN6_bm; //enable
+  PORTC.OUTSET = PIN6_bm; //enable
 
   while (1) {
+    update_desired_x();
+    if (x_desired > x_current){
+      step_forward();
+    } else if (x_desired < x_current){
+      step_backward();
+    }
+
 
   	/*if (TCC0.CNT % 100 == 0){ PORTE.OUTSET = LED1_mask; step();}
   	if (TCC0.CNT % 100 == 25){ PORTE.OUTSET = LED2_mask; step();}