#include <linux/config.h>
#include <linux/module.h>
#include <linux/version.h>

#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>

#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/sibyte/swarm.h>
#include <asm/sibyte/sb1250.h>
#include <asm/sibyte/sb1250_regs.h>
#include <asm/sibyte/sb1250_uart.h>
#include <asm/sibyte/sb1250_int.h>
#include <asm/sibyte/sb1250_dma.h>
#include <asm/sibyte/64bit.h>

#include <asm/system.h>   /* cli(), *_flags */
#include <asm/segment.h>  /* memcpy and such */
#include <asm/pgtable.h>

#include "bcm.h"

#define DEVNAME "BCM"

#define R_MBOX			0xb00200c0
#define R_MBOX_CLEAR	0xb00200d0

#define PCI_MAP			0xf800000000ULL
#define DM				0xb0020b00

static struct protocol *find_protocol(struct bcm_dev *bdev, u32 proto);

struct bcm_dev *bdev;

u32 * pciconfig=0;

int verbose=1;
MODULE_PARM(verbose, "i");

u64 *dmdesc;
u64 dmphys;
static int rx_dma_running;
static int tx_dma_running;
struct bcm_transfer * tx_in_progress;
struct bcm_transfer rx_in_progress;

static void tx_task(void *data){
	struct bcm_dev *bdev;
	struct bcm_transfer *tr;

	bdev=(struct bcm_dev *)data;
	tr=bdev->tx_finished;
	bdev->tx_finished=NULL;
	while(tr!=NULL){
		struct protocol *p;
		struct bcm_transfer *tmp;
		tmp=tr->next;
		if(tr->flags&FLAGS_NOTIFY && (p=find_protocol(bdev,tr->proto))) {
			p->payload->tx_notify(bdev, tr);
		} else {
			kfree(tr->buffer);
			kfree(tr);
		}
		tr=tmp;
	}
}

static void rx_task(void *data){
	struct bcm_dev *bdev;
	struct bcm_transfer *rx;
	
	bdev=(struct bcm_dev *)data;
	rx=bdev->rx_finished;
	bdev->rx_finished=NULL;
printk(KERN_ERR DEVNAME ": in rx_task\n");
	while(rx!=NULL){
		struct protocol *p;
		struct bcm_transfer *tr;
		p=find_protocol(bdev, rx->proto);
		
		if(p) {
//			tr=kmalloc(sizeof(struct bcm_transfer), GFP_KERNEL);
			tr=kmalloc(sizeof(struct bcm_transfer), GFP_ATOMIC);
			memcpy(tr, rx, sizeof(struct bcm_transfer));
			tr->dev=bdev;
			p->payload->rx_action(bdev, tr);
		} else {
			kfree(rx->buffer);
		}
		
		tr=rx;
		rx=rx->next;
		kfree(tr);
	}
	bdev->common->msg=__cpu_to_le32(3);
	pciconfig[0x24]=1;
}

void start_tx(struct bcm_dev *bdev) {
	int tail, head;
	struct bcm_transfer *tr;
	int size;
	u64 src, dest;

	tail=__le32_to_cpu(bdev->common->rx_tail);
	head=__le32_to_cpu(bdev->common->rx_head);
	printk(KERN_ERR DEVNAME ": start_tx, tail=%i, head=%i\n",tail, head);
	if(tail==head)return;

	tr=bdev->tx_card;
	if(tr==NULL)return;
	
	size=tr->len;
	dest=__le64_to_cpu(bdev->rx_ring[tail].phys) | PCI_MAP;
	src=virt_to_phys(tr->buffer);
	dmdesc[2]=dest | (0x0406ULL<<40); /* dest cachable, interrupt */
	dmdesc[3]=src | ((u64)size << 40);
	out64((dmphys+16) | (1ULL<40) | (1ULL<61) | (1ULL<<63), DM+0x20);
	out64(1, DM+8+0x20);
	tx_dma_running=1;
	bdev->tx_card=tr->next;
	tx_in_progress=tr;
	tail++;
	if(tail==bdev->rx_size)tail=0;
	bdev->common->rx_tail=__cpu_to_le32(tail);
}

static void tx_dm_handler(int irq, void *dev_id, struct pt_regs *regs) {
	u64 status;
	struct bcm_dev *bdev=(struct bcm_dev *)dev_id;
	
	status=in64(DM+0x20);
	if(verbose)printk(KERN_ERR DEVNAME ": tx_dm_handler, status=%016Lx\n",status);
	rx_dma_running=0;
	out64(0, DM);

	if(status&(1ULL<<61)) {
		out64(1ULL<<62, DM+0x20);
		kfree(tx_in_progress->buffer);
		kfree(tx_in_progress);
		tx_in_progress=NULL;
		tx_dma_running=0;
	} else {
		tx_in_progress->next=bdev->tx_finished;
		bdev->tx_finished=tx_in_progress;
//		schedule_task(&bdev->tq_tx);
		bdev->tq_tx.routine(bdev);
		tx_in_progress=NULL;
		tx_dma_running=0;
		out64(0, DM+0x20);
		if(bdev->tx_card)start_tx(bdev);
	}
	bdev->common->msg=__cpu_to_le32(2);
	pciconfig[0x24]=1;
}
		

static void rx_dm_handler(int irq, void *dev_id, struct pt_regs *regs) {
	int tail, head;
	u64 status;

	status=in64(DM);
	if(verbose)printk(KERN_ERR DEVNAME ": rx_dm_handler, status=%016Lx\n",status);
	rx_dma_running=0;
	out64(0, DM);
	if(status&(1ULL<<61)) {
		out64(1ULL<<62, DM);	/* reset DM */
	} else {
		struct bcm_transfer *tr;
		tr=kmalloc(sizeof(struct bcm_transfer), GFP_ATOMIC);
		memcpy(tr, &rx_in_progress, sizeof(struct bcm_transfer));
		tr->next=bdev->rx_finished;
		bdev->rx_finished=tr;
//		printk(KERN_ERR DEVNAME ": schedule=%i\n",schedule_task(&bdev->tq_rx));
		rx_task(bdev);
	}
			
	tail=__le32_to_cpu(bdev->common->tx_tail);
//	bdev->tx_ring[tail].flags=__cpu_to_le32(FLAGS_OWNER_HOST);
	tail++;
	if(tail==bdev->tx_size)tail=0;
	bdev->common->tx_tail=__le32_to_cpu(tail);
	head=__le32_to_cpu(bdev->common->tx_head);
		
	if(verbose)printk(KERN_ERR DEVNAME": tail=%i  head=%i\n",tail, head);
	if(head != tail) { /* queue not empty yet */
		int size;
		u64 src, dest;
		
		size=__le32_to_cpu(bdev->tx_ring[tail].len);
		src=__le64_to_cpu(bdev->tx_ring[tail].phys) | PCI_MAP;
		
		rx_in_progress.buffer=kmalloc(size, GFP_ATOMIC);
		rx_in_progress.len=size;
		rx_in_progress.proto=__le32_to_cpu(bdev->tx_ring[tail].proto);
		
		dest=virt_to_phys(rx_in_progress.buffer);
		dmdesc[0]=dest | (0x0406ULL<<40); /* dest cachable, interrupt */
		dmdesc[1]=src | ((u64)size << 40);
		out64(dmphys | (1ULL<40) | (1ULL<<61) | (1ULL<<63), DM);
		out64(1, DM+8);
		rx_dma_running=1;
	}
}

static void mbox_handler(int irq, void *dev_id, struct pt_regs *regs) {
	u64 mbox;

	mbox=in64(R_MBOX) & 0xffff0000ULL;
	
	while(mbox) {	
	
		if(mbox&INT_ACK) {
			bdev->common->msg=0;
			pciconfig[0x24]=0;
			out64(INT_ACK, R_MBOX_CLEAR);
			if(verbose)printk(KERN_DEBUG DEVNAME ": acknowledge interrupt\n");
		}
		if(mbox&INT_INIT) {
			bdev->tx_size=__le32_to_cpu(bdev->common->tx_size);
			bdev->rx_size=__le32_to_cpu(bdev->common->rx_size);
			bdev->tx_ring=ioremap(__le64_to_cpu(bdev->common->tx_phys)+ PCI_MAP, 
					bdev->tx_size*sizeof(ring_element));
			bdev->rx_ring=ioremap(__le64_to_cpu(bdev->common->rx_phys)+ PCI_MAP, 
					bdev->tx_size*sizeof(ring_element));
			if(verbose)printk(KERN_DEBUG DEVNAME ": init interrupt  rx=%08Lx:%i  tx=%08Lx:%i\n",
					__le64_to_cpu(bdev->common->rx_phys), bdev->rx_size, 
					__le64_to_cpu(bdev->common->tx_phys), bdev->tx_size);
			out64(INT_INIT, R_MBOX_CLEAR);
			bdev->common->msg=__cpu_to_le32(1);
			pciconfig[0x24]=1; /* all ready */

		}
		if(mbox&0x20000) {
			int tail, head;
			if(verbose)printk(KERN_DEBUG DEVNAME ": data tx interrupt\n");
			out64(0x20000, R_MBOX_CLEAR);
			head=__le32_to_cpu(bdev->common->tx_head);
			tail=bdev->tx_dev_tail;
			if(head==tail) {
				printk(KERN_WARNING DEVNAME ": data tx interrupt while tx queue is empty.\n");
			} else if(!tx_dma_running){
				int size;
				u64 src, dest;
				
				size=__le32_to_cpu(bdev->tx_ring[tail].len);
				src=__le64_to_cpu(bdev->tx_ring[tail].phys) | PCI_MAP;
				printk("SIZE=%i\n",size);	
				rx_in_progress.buffer=kmalloc(size, GFP_ATOMIC);
				rx_in_progress.len=size;
				rx_in_progress.flags=__le32_to_cpu(bdev->tx_ring[tail].flags);
				rx_in_progress.proto=__le32_to_cpu(bdev->tx_ring[tail].proto);
				
				dest=virt_to_phys(rx_in_progress.buffer);
				dmdesc[0]=dest | (0x0406ULL<<40); /* dest cachable, interrupt */
				dmdesc[1]=src | ((u64)size << 40);
				out64(dmphys | (1ULL<<40) | (1ULL<<61) | (1ULL<<63), DM);
				out64(1, DM+8);
				rx_dma_running=1;
				if(verbose)printk(KERN_ERR DEVNAME ": status is %016Lx   %016Lx   %016Lx\n",in64(DM+0x18),dmdesc[0],dmdesc[1]);
				while (in64(DM+0x18)&(0x8ULL<<48))
				if(verbose)printk(KERN_ERR DEVNAME ": status is %016Lx\n",in64(DM+0x18));
			}
		}
		if(mbox&INT_UNK) {
			if(verbose)printk(KERN_WARNING DEVNAME 
					": unexpected interrupt, mbox[31:16]=%04Lx\n", mbox>>16);
			out64(INT_UNK, R_MBOX_CLEAR);
		}
		mbox=in64(R_MBOX) & 0xffff0000ULL;
	}
		
}

static int __init s(void) {
	unsigned long addr;
	unsigned long p;

	addr=__get_free_pages(GFP_KERNEL, 8); 
	if(!addr) {
		printk(KERN_ERR "Can't allocate bdev->common 1MB\n");
		return -1;
	}
	
	bdev=kmalloc(sizeof(struct bcm_dev), GFP_KERNEL);
	
	dmdesc=kmalloc(32, GFP_KERNEL);
	dmphys=virt_to_bus(dmdesc);
	tx_dma_running=0;
	rx_dma_running=0;

	bdev->common = (struct common_data *) addr;

	p=virt_to_phys(bdev->common);
	pciconfig=ioremap(0xfe000000ULL, 0x100);

	pciconfig[0x11]=((p>>8)&0xfffff00)|1;
	pciconfig[0x24]=0;
	bdev->common->signature[0]='B';
	bdev->common->signature[1]='C';
	bdev->common->signature[2]='M';
	bdev->common->signature[3]=174;

	bdev->tx_size=-1;
	bdev->rx_size=-1;
    bdev->tx_dev_tail=0;
	bdev->tq_rx.data=bdev;
	bdev->tq_rx.routine=rx_task;

	bdev->tq_tx.data=bdev;
	bdev->tq_tx.routine=tx_task;
	bdev->tx_card=NULL;
	bdev->rx_card=NULL;
	bdev->tx_finished=NULL;
	bdev->rx_finished=NULL;
		
	bdev->prot=NULL;
	
	request_irq(K_INT_MBOX_2, mbox_handler, SA_SHIRQ, DEVNAME, bdev->common);
	request_irq(K_INT_DM_CH_0, rx_dm_handler, SA_SHIRQ, DEVNAME, bdev->common);
	//enable_irq(K_INT_DM_CH_0);
	request_irq(K_INT_DM_CH_1, tx_dm_handler, SA_SHIRQ, DEVNAME, bdev->common);
	
	return 0;
}

static void __exit e(void) {
	if(bdev->common) {
		free_pages((unsigned long)bdev->common, 8);
		if(pciconfig) {
			pciconfig[0x11]=0;
		}
	}
	free_irq(K_INT_MBOX_2, bdev->common);
	kfree(dmdesc);
	kfree(bdev);
	return ;
}
module_init(s);
module_exit(e);


static struct protocol *find_protocol(struct bcm_dev *bdev, u32 proto) {
	struct protocol *p;
	
	p=bdev->prot;
	while(p!=NULL && p->proto!=proto) p=p->next;
	return p;
}

/*
 * external interface
 */

int bcm_register_driver(struct bcm_driver *dr) {
        int i;
        struct dev_info di;

        for(i=0;i<1;i++) {
                di.bcm=bdev;
                strcpy(di.identity, "BCM");
                dr->init(&di);
        }
        return 0;
}

int bcm_register_payload(struct bcm_dev *bdev, struct bcm_payload *pl) {
	struct protocol *p;
	
	p=find_protocol(bdev, pl->proto);
	if(p!=NULL) {
		p->payload=pl;
	} else {
		p=bdev->prot;
		bdev->prot=kmalloc(sizeof(struct protocol), GFP_KERNEL);
		bdev->prot->next=p;
		bdev->prot->payload=pl;
		bdev->prot->proto=pl->proto;
	}
	return 0;
}

int bcm_unregister_payload(struct bcm_dev *bdev, struct bcm_payload *pl) {
	struct protocol *p;
	p=bdev->prot;
	while(p!=NULL && p->payload!=pl)p=p->next;
	if(p!=NULL) {
		if(p==bdev->prot) {
			bdev->prot=p->next;
			kfree(p);
		} else {
			struct protocol *t;
			t=bdev->prot;
			while(t!=NULL && t->next!=p)t=t->next;
			if(t->next==p) {
				t->next=p->next;
				kfree(p);
			}
		}
		return 0;
	}
	return -1;
}

int bcm_transfer_data(struct bcm_transfer *tr) {
	struct bcm_dev *bdev;
	struct bcm_transfer *t;

	bdev=tr->dev;
	t=bdev->tx_card;
	if(t==NULL) {
		t=kmalloc(sizeof(struct bcm_transfer), GFP_KERNEL);
		memcpy(t, tr, sizeof(struct bcm_transfer));
		t->next=NULL;
		bdev->tx_card=t;
	} else {
		while(t->next!=NULL)t=t->next;
		t->next=kmalloc(sizeof(struct bcm_transfer), GFP_KERNEL);
		t->next->next=NULL;
		memcpy(t->next, tr, sizeof(struct bcm_transfer));
	}
	if(!tx_dma_running)start_tx(bdev);
	return 0;
}

EXPORT_SYMBOL(bcm_register_driver);
EXPORT_SYMBOL(bcm_register_payload);
EXPORT_SYMBOL(bcm_unregister_payload);
EXPORT_SYMBOL(bcm_transfer_data);
