/*
 *
 * Each card DMAs the packets it receives. The terminology is from the master's
 * POV, so common->tx is used for packets from master to slave, and common->rx
 * is used for packets from slave to master.
 *
 * The LDT layer driver provides a 64KB shared memory, so we use 32KB for the
 * tx/rx rings. 32K-48K for tx, and 48K-64K for rx. This allows ring size of up
 * to 1024 elements.
 *
 * 
 */

#include <linux/config.h>
#include <linux/module.h>
#include <linux/version.h>

#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/interrupt.h>

#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/sibyte/swarm.h>
#include <asm/sibyte/sb1250.h>
#include <asm/sibyte/sb1250_regs.h>
#include <asm/sibyte/sb1250_uart.h>
#include <asm/sibyte/sb1250_int.h>
#include <asm/sibyte/sb1250_dma.h>
#include <asm/sibyte/64bit.h>

#include <asm/system.h>   /* cli(), *_flags */
#include <asm/segment.h>  /* memcpy and such */
#include <asm/pgtable.h>

#include "bcm-internal.h"

#define DEVNAME "BCM"

#define R_MBOX			0xb00200c0
#define R_MBOX_CLEAR	0xb00200d0

#define PCI_MAP			0xf800000000ULL
#define DM				0xb0020b40

static int tx_size=16, rx_size=16;
MODULE_PARM(tx_size, "i");
MODULE_PARM(rx_size, "i");

int verbose=1;
MODULE_PARM(verbose, "i");

u64 *dmdesc;
u64 dmphys;

static struct common_data * common;
static struct tq_struct tq_rx;
static struct tq_struct tq_tx;

static int initialized=0;

static void rx_task(void *data){
	struct bcm_dev *bdev;
	struct bcm_transfer *rx;
	
	bdev=(struct bcm_dev *)data;
	rx=bdev->rx_finished;
	bdev->rx_finished=NULL;
	bdev->rx_finished_last=NULL;
	while(rx!=NULL){
		struct protocol *p;
		struct bcm_transfer *tr;
		p=find_protocol(bdev, rx->proto);
		
		if(p) {
			tr=kmalloc(sizeof(struct bcm_transfer), GFP_KERNEL);
			memcpy(tr, rx, sizeof(struct bcm_transfer));
			tr->dev=bdev->info;
			p->payload->rx_action(bdev->info, tr);
		} else {
			kfree(rx->buffer);
		}
		
		tr=rx;
		rx=rx->next;
		kfree(tr);
	}
	bdev->common->msg=__cpu_to_le32(3);
	pciconfig[0x24]=1;
}
		

static void rx_dm_handler(int irq, void *dev_id, struct pt_regs *regs) {
	int tail, head;
	u64 status;
	struct bcm_dev *bdev;
	
	bdev=(struct bcm_dev *)dev_id;
	status=in64(DM);
	if(status==0x1000000000000000ULL)return;
	if(verbose>2)printk(KERN_ERR DEVNAME ": rx_dm_handler, status=%016Lx\n",status);
	bdev->rx_dma_running=0;
	out64(0, DM);
	if(status&(1ULL<<61)) {
		out64(1ULL<<62, DM);	/* reset DM */
	} else {
		struct bcm_transfer *tr;
		tr=kmalloc(sizeof(struct bcm_transfer), GFP_ATOMIC);
		memcpy(tr, &bdev->rx_in_progress, sizeof(struct bcm_transfer));
		tr->next=NULL;
		if(bdev->rx_finished_last==NULL) {
			bdev->rx_finished=tr;
		} else {
			bdev->rx_finished_last->next=tr;
		}
		bdev->rx_finished_last=tr;
		schedule_task(&bdev->tq_rx);
	}
			
	tail=__le32_to_cpu(bdev->common->tx_tail);
	tail++;
	if(tail==bdev->tx_size)tail=0;
	bdev->common->tx_tail=__le32_to_cpu(tail);
	head=__le32_to_cpu(bdev->common->tx_head);
		
	if(verbose>2)printk(KERN_ERR DEVNAME": tail=%i  head=%i\n",tail, head);
	if(head != tail) { /* queue not empty yet */
		int size;
		u64 src, dest;
		
		size=__le32_to_cpu(bdev->tx_ring[tail].len);
		src=__le64_to_cpu(bdev->tx_ring[tail].phys) | PCI_MAP;
		
		bdev->rx_in_progress.buffer=kmalloc(size, GFP_ATOMIC);
		bdev->rx_in_progress.len=size;
		bdev->rx_in_progress.proto=__le32_to_cpu(bdev->tx_ring[tail].proto);
		
		dest=virt_to_phys(bdev->rx_in_progress.buffer);
		dmdesc[0]=dest | (0x0406ULL<<40); /* dest cachable, interrupt */
		dmdesc[1]=src | ((u64)size << 40);
		out64(dmphys | (1ULL<<40) | (1ULL<<61) | (1ULL<<63), DM);
		out64(1, DM+8);
		bdev->rx_dma_running=1;
	}
}

void mbox_rx_task(unsigned long data) {
	int head, tail;
	struct bcm_dev *bdev;

	if(verbose>1)printk(KERN_DEBUG DEVNAME ": data tx interrupt\n");
	bdev=*(struct bcm_dev **)data;
	head=__le32_to_cpu(bdev->common->tx_head);
	tail=__le32_to_cpu(bdev->common->tx_tail);
	if(head==tail) {
		if(verbose>1)
			printk(KERN_WARNING DEVNAME ": data tx interrupt while tx queue is empty.\n");
	} else if(!bdev->rx_dma_running){
		int size;
		u64 src, dest;
		
		size=__le32_to_cpu(bdev->tx_ring[tail].len);
		src=__le64_to_cpu(bdev->tx_ring[tail].phys) | PCI_MAP;
		bdev->rx_in_progress.buffer=kmalloc(size, GFP_ATOMIC);
		bdev->rx_in_progress.len=size;
		bdev->rx_in_progress.flags=__le32_to_cpu(bdev->tx_ring[tail].flags);
		bdev->rx_in_progress.proto=__le32_to_cpu(bdev->tx_ring[tail].proto);
		
		dest=virt_to_phys(bdev->rx_in_progress.buffer);
		dmdesc[0]=dest | (0x0406ULL<<40); /* dest cachable, interrupt */
		dmdesc[1]=src | ((u64)size << 40);
		out64(dmphys | (1ULL<<40) | (1ULL<<61) | (1ULL<<63), DM);
		out64(1, DM+8);
		bdev->rx_dma_running=1;
		if(verbose>2)printk(KERN_ERR DEVNAME ": status is %016Lx   %016Lx   %016Lx\n",
				in64(DM+0x18),dmdesc[0],dmdesc[1]);
	}
}

DECLARE_TASKLET(mbox_rx_tasklet, mbox_rx_task, (unsigned long)&bdev);

static void callback(int irq) {

	switch(irq) {
		case 2: /* init interrupt, from master to slave */
			if(ldt_master) {
				printk(KERN_WARNING DEVNAME ": Master card received init interrupt.\n");
			} else {
				if( common->signature[0]!='B' ||
					common->signature[1]!='C' ||
					common->signature[2]!='M' ||
					common->signature[3]!=175 )	return;
				
				tx_size=common->tx_size;
				rx_size=common->rx_size;
				initialized=1;
			}
			break;
		case 4: /* packet ready in tx ring */
			break;
		case 8: /* packet was freed from transmit ring */
			break;
		
	}
		
}

static int __init s(void) {

	dmdesc=kmalloc(32, GFP_KERNEL);
	dmphys=virt_to_bus(dmdesc);
	bdev->tx_dma_running=0;
	bdev->rx_dma_running=0;

	common = (struct common_data *) ldt_buf;

	if(ldt_master) {
		if(tx_size>1023 || rx_size>1023) {
			printk(KERN_ERR DEVNAME ": queue size is limited to 1023\n");
			kfree(dmdesc);
			return -1;
		}
		common->signature[0]='B';
		common->signature[1]='C';
		common->signature[2]='M';
		common->signature[3]=175;
		common->tx_size=tx_size;
		common->rx_size=rx_size;
		
		common->tx_head=0;
		common->tx_tail=0;
		common->rx_head=0;
		common->rx_tail=0;
	}
	

	tq_rx.data=bdev;
	tq_rx.routine=rx_task;
	tq_rx.sync=0;

	tq_tx.data=bdev;
	tq_tx.routine=tx_task;
	tq_tx.sync=0;
	tx_card=NULL;
	rx_card=NULL;
	tx_finished=NULL;
	tx_finished_last=NULL;
	rx_finished=NULL;
	rx_finished_last=NULL;
		
	request_irq(K_INT_DM_CH_2, rx_dm_handler, SA_SHIRQ, DEVNAME, 0x12341234);
	ldt_callback=callback;

	if(ldt_master) {
		ldt_interrupt(2<<13);
		initialized=1;
	}
	
	return 0;
}

static void __exit e(void) {
	free_irq(K_INT_DM_CH_2, 0x12341234);
	kfree(dmdesc);
	return ;
}
module_init(s);
module_exit(e);


/*
 * external interface
 */
struct dev_info di;

int bcm_register_payload(struct dev_info *dev, struct bcm_payload *pl) {
	struct protocol *p;
	struct bcm_dev * bdev=dev->bcm;
	
	p=find_protocol(bdev, pl->proto);
	if(p!=NULL) {
		p->payload=pl;
	} else {
		p=bdev->prot;
		bdev->prot=kmalloc(sizeof(struct protocol), GFP_KERNEL);
		bdev->prot->next=p;
		bdev->prot->payload=pl;
		bdev->prot->proto=pl->proto;
	}
	return 0;
}

int bcm_unregister_payload(struct dev_info *dev, struct bcm_payload *pl) {
	struct protocol *p;
	struct bcm_dev *bdev=dev->bcm;
	p=bdev->prot;
	while(p!=NULL && p->payload!=pl)p=p->next;
	if(p!=NULL) {
		if(p==bdev->prot) {
			bdev->prot=p->next;
			kfree(p);
		} else {
			struct protocol *t;
			t=bdev->prot;
			while(t!=NULL && t->next!=p)t=t->next;
			if(t->next==p) {
				t->next=p->next;
				kfree(p);
			}
		}
		return 0;
	}
	return -1;
}

int bcm_transfer_data(struct bcm_transfer *tr) {
	struct bcm_dev *bdev;
	struct bcm_transfer *t, *s;

	bdev=tr->dev->bcm;
	s=kmalloc(sizeof(struct bcm_transfer), GFP_KERNEL);
	memcpy(s, tr, sizeof(struct bcm_transfer));
	s->next=NULL;
	
	disable_irq(K_INT_DM_CH_1);
	t=bdev->tx_card;
	if(t==NULL) {
		bdev->tx_card=s;
	} else {
		while(t->next!=NULL){
			t=t->next;
		}
		t->next=s;
	}
	enable_irq(K_INT_DM_CH_1);
	if(!bdev->tx_dma_running)start_tx(bdev);
	return 0;
}

EXPORT_SYMBOL(bcm_register_payload);
EXPORT_SYMBOL(bcm_unregister_payload);
EXPORT_SYMBOL(bcm_transfer_data);
