[PATCH][v4][9/24] Add Mellanox HCA low-level driver

From: Roland Dreier (roland_at_topspin.com)
Date: 12/20/04

  • Next message: Roland Dreier: "[PATCH][v4][10/24] Add Mellanox HCA low-level driver (midlayer interface)"
    Date:	Sun, 19 Dec 2004 22:14:59 -0800
    To: linux-kernel@vger.kernel.org
    
    

    Add a low-level driver for Mellanox MT23108 and MT25208 HCAs. The
    MT25208 is only fully supported when in MT23108 compatibility mode;
    only the very beginnings of support for native MT25208 mode (required
    for HCAs without local memory) is present.

    (As a side note, I believe this driver would be the first in-tree
    consumer of the PCI MSI/MSI-X API)

    Signed-off-by: Roland Dreier <roland@topspin.com>

    --- linux-bk.orig/drivers/infiniband/Kconfig 2004-12-19 22:04:11.935940222 -0800
    +++ linux-bk/drivers/infiniband/Kconfig 2004-12-19 22:04:14.496562875 -0800
    @@ -7,4 +7,6 @@
               any protocols you wish to use as well as drivers for your
               InfiniBand hardware.
     
    +source "drivers/infiniband/hw/mthca/Kconfig"
    +
     endmenu
    --- linux-bk.orig/drivers/infiniband/Makefile 2004-12-19 22:04:11.960936538 -0800
    +++ linux-bk/drivers/infiniband/Makefile 2004-12-19 22:04:14.472566412 -0800
    @@ -1 +1,2 @@
     obj-$(CONFIG_INFINIBAND) += core/
    +obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-bk/drivers/infiniband/hw/mthca/Kconfig 2004-12-19 22:04:14.520559339 -0800
    @@ -0,0 +1,26 @@
    +config INFINIBAND_MTHCA
    + tristate "Mellanox HCA support"
    + depends on PCI && INFINIBAND
    + ---help---
    + This is a low-level driver for Mellanox InfiniHost host
    + channel adapters (HCAs), including the MT23108 PCI-X HCA
    + ("Tavor") and the MT25208 PCI Express HCA ("Arbel").
    +
    +config INFINIBAND_MTHCA_DEBUG
    + bool "Verbose debugging output"
    + depends on INFINIBAND_MTHCA
    + default n
    + ---help---
    + This option causes the mthca driver produce a bunch of debug
    + messages. Select this is you are developing the driver or
    + trying to diagnose a problem.
    +
    +config INFINIBAND_MTHCA_SSE_DOORBELL
    + bool "SSE doorbell code"
    + depends on INFINIBAND_MTHCA && X86 && !X86_64
    + default n
    + ---help---
    + This option will have the mthca driver use SSE instructions
    + to ring hardware doorbell registers. This may improve
    + performance for some workloads, but the driver will not run
    + on processors without SSE instructions.
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-bk/drivers/infiniband/hw/mthca/Makefile 2004-12-19 22:04:14.543555950 -0800
    @@ -0,0 +1,12 @@
    +EXTRA_CFLAGS += -Idrivers/infiniband/include
    +
    +ifdef CONFIG_INFINIBAND_MTHCA_DEBUG
    +EXTRA_CFLAGS += -DDEBUG
    +endif
    +
    +obj-$(CONFIG_INFINIBAND_MTHCA) += ib_mthca.o
    +
    +ib_mthca-y := mthca_main.o mthca_cmd.o mthca_profile.o mthca_reset.o \
    + mthca_allocator.o mthca_eq.o mthca_pd.o mthca_cq.o \
    + mthca_mr.o mthca_qp.o mthca_av.o mthca_mcg.o mthca_mad.o \
    + mthca_provider.o
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-bk/drivers/infiniband/hw/mthca/mthca_allocator.c 2004-12-19 22:04:14.567552414 -0800
    @@ -0,0 +1,179 @@
    +/*
    + * Copyright (c) 2004 Topspin Communications. All rights reserved.
    + *
    + * This software is available to you under a choice of one of two
    + * licenses. You may choose to be licensed under the terms of the GNU
    + * General Public License (GPL) Version 2, available from the file
    + * COPYING in the main directory of this source tree, or the
    + * OpenIB.org BSD license below:
    + *
    + * Redistribution and use in source and binary forms, with or
    + * without modification, are permitted provided that the following
    + * conditions are met:
    + *
    + * - Redistributions of source code must retain the above
    + * copyright notice, this list of conditions and the following
    + * disclaimer.
    + *
    + * - Redistributions in binary form must reproduce the above
    + * copyright notice, this list of conditions and the following
    + * disclaimer in the documentation and/or other materials
    + * provided with the distribution.
    + *
    + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
    + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
    + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    + * SOFTWARE.
    + *
    + * $Id: mthca_allocator.c 1349 2004-12-16 21:09:43Z roland $
    + */
    +
    +#include <linux/errno.h>
    +#include <linux/slab.h>
    +#include <linux/bitmap.h>
    +
    +#include "mthca_dev.h"
    +
    +/* Trivial bitmap-based allocator */
    +u32 mthca_alloc(struct mthca_alloc *alloc)
    +{
    + u32 obj;
    +
    + spin_lock(&alloc->lock);
    + obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
    + if (obj >= alloc->max) {
    + alloc->top = (alloc->top + alloc->max) & alloc->mask;
    + obj = find_first_zero_bit(alloc->table, alloc->max);
    + }
    +
    + if (obj < alloc->max) {
    + set_bit(obj, alloc->table);
    + obj |= alloc->top;
    + } else
    + obj = -1;
    +
    + spin_unlock(&alloc->lock);
    +
    + return obj;
    +}
    +
    +void mthca_free(struct mthca_alloc *alloc, u32 obj)
    +{
    + obj &= alloc->max - 1;
    + spin_lock(&alloc->lock);
    + clear_bit(obj, alloc->table);
    + alloc->last = min(alloc->last, obj);
    + alloc->top = (alloc->top + alloc->max) & alloc->mask;
    + spin_unlock(&alloc->lock);
    +}
    +
    +int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
    + u32 reserved)
    +{
    + int i;
    +
    + /* num must be a power of 2 */
    + if (num != 1 << (ffs(num) - 1))
    + return -EINVAL;
    +
    + alloc->last = 0;
    + alloc->top = 0;
    + alloc->max = num;
    + alloc->mask = mask;
    + spin_lock_init(&alloc->lock);
    + alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof (long),
    + GFP_KERNEL);
    + if (!alloc->table)
    + return -ENOMEM;
    +
    + bitmap_zero(alloc->table, num);
    + for (i = 0; i < reserved; ++i)
    + set_bit(i, alloc->table);
    +
    + return 0;
    +}
    +
    +void mthca_alloc_cleanup(struct mthca_alloc *alloc)
    +{
    + kfree(alloc->table);
    +}
    +
    +/*
    + * Array of pointers with lazy allocation of leaf pages. Callers of
    + * _get, _set and _clear methods must use a lock or otherwise
    + * serialize access to the array.
    + */
    +
    +void *mthca_array_get(struct mthca_array *array, int index)
    +{
    + int p = (index * sizeof (void *)) >> PAGE_SHIFT;
    +
    + if (array->page_list[p].page) {
    + int i = index & (PAGE_SIZE / sizeof (void *) - 1);
    + return array->page_list[p].page[i];
    + } else
    + return NULL;
    +}
    +
    +int mthca_array_set(struct mthca_array *array, int index, void *value)
    +{
    + int p = (index * sizeof (void *)) >> PAGE_SHIFT;
    +
    + /* Allocate with GFP_ATOMIC because we'll be called with locks held. */
    + if (!array->page_list[p].page)
    + array->page_list[p].page = (void **) get_zeroed_page(GFP_ATOMIC);
    +
    + if (!array->page_list[p].page)
    + return -ENOMEM;
    +
    + array->page_list[p].page[index & (PAGE_SIZE / sizeof (void *) - 1)] =
    + value;
    + ++array->page_list[p].used;
    +
    + return 0;
    +}
    +
    +void mthca_array_clear(struct mthca_array *array, int index)
    +{
    + int p = (index * sizeof (void *)) >> PAGE_SHIFT;
    +
    + if (--array->page_list[p].used == 0) {
    + free_page((unsigned long) array->page_list[p].page);
    + array->page_list[p].page = NULL;
    + }
    +
    + if (array->page_list[p].used < 0)
    + pr_debug("Array %p index %d page %d with ref count %d < 0\n",
    + array, index, p, array->page_list[p].used);
    +}
    +
    +int mthca_array_init(struct mthca_array *array, int nent)
    +{
    + int npage = (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE;
    + int i;
    +
    + array->page_list = kmalloc(npage * sizeof *array->page_list, GFP_KERNEL);
    + if (!array->page_list)
    + return -ENOMEM;
    +
    + for (i = 0; i < npage; ++i) {
    + array->page_list[i].page = NULL;
    + array->page_list[i].used = 0;
    + }
    +
    + return 0;
    +}
    +
    +void mthca_array_cleanup(struct mthca_array *array, int nent)
    +{
    + int i;
    +
    + for (i = 0; i < (nent * sizeof (void *) + PAGE_SIZE - 1) / PAGE_SIZE; ++i)
    + free_page((unsigned long) array->page_list[i].page);
    +
    + kfree(array->page_list);
    +}
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-bk/drivers/infiniband/hw/mthca/mthca_config_reg.h 2004-12-19 22:04:14.591548878 -0800
    @@ -0,0 +1,55 @@
    +/*
    + * Copyright (c) 2004 Topspin Communications. All rights reserved.
    + *
    + * This software is available to you under a choice of one of two
    + * licenses. You may choose to be licensed under the terms of the GNU
    + * General Public License (GPL) Version 2, available from the file
    + * COPYING in the main directory of this source tree, or the
    + * OpenIB.org BSD license below:
    + *
    + * Redistribution and use in source and binary forms, with or
    + * without modification, are permitted provided that the following
    + * conditions are met:
    + *
    + * - Redistributions of source code must retain the above
    + * copyright notice, this list of conditions and the following
    + * disclaimer.
    + *
    + * - Redistributions in binary form must reproduce the above
    + * copyright notice, this list of conditions and the following
    + * disclaimer in the documentation and/or other materials
    + * provided with the distribution.
    + *
    + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
    + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
    + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    + * SOFTWARE.
    + *
    + * $Id: mthca_config_reg.h 1349 2004-12-16 21:09:43Z roland $
    + */
    +
    +#ifndef MTHCA_CONFIG_REG_H
    +#define MTHCA_CONFIG_REG_H
    +
    +#include <asm/page.h>
    +
    +#define MTHCA_HCR_BASE 0x80680
    +#define MTHCA_HCR_SIZE 0x0001c
    +#define MTHCA_ECR_BASE 0x80700
    +#define MTHCA_ECR_SIZE 0x00008
    +#define MTHCA_ECR_CLR_BASE 0x80708
    +#define MTHCA_ECR_CLR_SIZE 0x00008
    +#define MTHCA_ECR_OFFSET (MTHCA_ECR_BASE - MTHCA_HCR_BASE)
    +#define MTHCA_ECR_CLR_OFFSET (MTHCA_ECR_CLR_BASE - MTHCA_HCR_BASE)
    +#define MTHCA_CLR_INT_BASE 0xf00d8
    +#define MTHCA_CLR_INT_SIZE 0x00008
    +
    +#define MTHCA_MAP_HCR_SIZE (MTHCA_ECR_CLR_BASE + \
    + MTHCA_ECR_CLR_SIZE - \
    + MTHCA_HCR_BASE)
    +
    +#endif /* MTHCA_CONFIG_REG_H */
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-bk/drivers/infiniband/hw/mthca/mthca_dev.h 2004-12-19 22:04:14.615545341 -0800
    @@ -0,0 +1,391 @@
    +/*
    + * Copyright (c) 2004 Topspin Communications. All rights reserved.
    + *
    + * This software is available to you under a choice of one of two
    + * licenses. You may choose to be licensed under the terms of the GNU
    + * General Public License (GPL) Version 2, available from the file
    + * COPYING in the main directory of this source tree, or the
    + * OpenIB.org BSD license below:
    + *
    + * Redistribution and use in source and binary forms, with or
    + * without modification, are permitted provided that the following
    + * conditions are met:
    + *
    + * - Redistributions of source code must retain the above
    + * copyright notice, this list of conditions and the following
    + * disclaimer.
    + *
    + * - Redistributions in binary form must reproduce the above
    + * copyright notice, this list of conditions and the following
    + * disclaimer in the documentation and/or other materials
    + * provided with the distribution.
    + *
    + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
    + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
    + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    + * SOFTWARE.
    + *
    + * $Id: mthca_dev.h 1349 2004-12-16 21:09:43Z roland $
    + */
    +
    +#ifndef MTHCA_DEV_H
    +#define MTHCA_DEV_H
    +
    +#include <linux/spinlock.h>
    +#include <linux/kernel.h>
    +#include <linux/pci.h>
    +#include <linux/dma-mapping.h>
    +#include <asm/semaphore.h>
    +#include <asm/scatterlist.h>
    +
    +#include "mthca_provider.h"
    +#include "mthca_doorbell.h"
    +
    +#define DRV_NAME "ib_mthca"
    +#define PFX DRV_NAME ": "
    +#define DRV_VERSION "0.06-pre"
    +#define DRV_RELDATE "November 8, 2004"
    +
    +/* Types of supported HCA */
    +enum {
    + TAVOR, /* MT23108 */
    + ARBEL_COMPAT, /* MT25208 in Tavor compat mode */
    + ARBEL_NATIVE /* MT25208 with extended features */
    +};
    +
    +enum {
    + MTHCA_FLAG_DDR_HIDDEN = 1 << 1,
    + MTHCA_FLAG_SRQ = 1 << 2,
    + MTHCA_FLAG_MSI = 1 << 3,
    + MTHCA_FLAG_MSI_X = 1 << 4,
    + MTHCA_FLAG_NO_LAM = 1 << 5
    +};
    +
    +enum {
    + MTHCA_KAR_PAGE = 1,
    + MTHCA_MAX_PORTS = 2
    +};
    +
    +enum {
    + MTHCA_MPT_ENTRY_SIZE = 0x40,
    + MTHCA_EQ_CONTEXT_SIZE = 0x40,
    + MTHCA_CQ_CONTEXT_SIZE = 0x40,
    + MTHCA_QP_CONTEXT_SIZE = 0x200,
    + MTHCA_AV_SIZE = 0x20,
    + MTHCA_MGM_ENTRY_SIZE = 0x40
    +};
    +
    +enum {
    + MTHCA_EQ_CMD,
    + MTHCA_EQ_ASYNC,
    + MTHCA_EQ_COMP,
    + MTHCA_NUM_EQ
    +};
    +
    +struct mthca_cmd {
    + int use_events;
    + struct semaphore hcr_sem;
    + struct semaphore poll_sem;
    + struct semaphore event_sem;
    + int max_cmds;
    + spinlock_t context_lock;
    + int free_head;
    + struct mthca_cmd_context *context;
    + u16 token_mask;
    +};
    +
    +struct mthca_limits {
    + int num_ports;
    + int vl_cap;
    + int mtu_cap;
    + int gid_table_len;
    + int pkey_table_len;
    + int local_ca_ack_delay;
    + int max_sg;
    + int num_qps;
    + int reserved_qps;
    + int num_srqs;
    + int reserved_srqs;
    + int num_eecs;
    + int reserved_eecs;
    + int num_cqs;
    + int reserved_cqs;
    + int num_eqs;
    + int reserved_eqs;
    + int num_mpts;
    + int num_mtt_segs;
    + int mtt_seg_size;
    + int reserved_mtts;
    + int reserved_mrws;
    + int num_rdbs;
    + int reserved_uars;
    + int num_mgms;
    + int num_amgms;
    + int reserved_mcgs;
    + int num_pds;
    + int reserved_pds;
    +};
    +
    +struct mthca_alloc {
    + u32 last;
    + u32 top;
    + u32 max;
    + u32 mask;
    + spinlock_t lock;
    + unsigned long *table;
    +};
    +
    +struct mthca_array {
    + struct {
    + void **page;
    + int used;
    + } *page_list;
    +};
    +
    +struct mthca_pd_table {
    + struct mthca_alloc alloc;
    +};
    +
    +struct mthca_mr_table {
    + struct mthca_alloc mpt_alloc;
    + int max_mtt_order;
    + unsigned long **mtt_buddy;
    + u64 mtt_base;
    +};
    +
    +struct mthca_eq_table {
    + struct mthca_alloc alloc;
    + void __iomem *clr_int;
    + u32 clr_mask;
    + struct mthca_eq eq[MTHCA_NUM_EQ];
    + int have_irq;
    + u8 inta_pin;
    +};
    +
    +struct mthca_cq_table {
    + struct mthca_alloc alloc;
    + spinlock_t lock;
    + struct mthca_array cq;
    +};
    +
    +struct mthca_qp_table {
    + struct mthca_alloc alloc;
    + int sqp_start;
    + spinlock_t lock;
    + struct mthca_array qp;
    +};
    +
    +struct mthca_av_table {
    + struct pci_pool *pool;
    + int num_ddr_avs;
    + u64 ddr_av_base;
    + void __iomem *av_map;
    + struct mthca_alloc alloc;
    +};
    +
    +struct mthca_mcg_table {
    + struct semaphore sem;
    + struct mthca_alloc alloc;
    +};
    +
    +struct mthca_dev {
    + struct ib_device ib_dev;
    + struct pci_dev *pdev;
    +
    + int hca_type;
    + unsigned long mthca_flags;
    +
    + u32 rev_id;
    +
    + /* firmware info */
    + u64 fw_ver;
    + union {
    + struct {
    + u64 fw_start;
    + u64 fw_end;
    + } tavor;
    + struct {
    + u64 clr_int_base;
    + u64 eq_arm_base;
    + u64 eq_set_ci_base;
    + struct scatterlist *mem;
    + u16 fw_pages;
    + } arbel;
    + } fw;
    +
    + u64 ddr_start;
    + u64 ddr_end;
    +
    + MTHCA_DECLARE_DOORBELL_LOCK(doorbell_lock)
    +
    + void __iomem *hcr;
    + void __iomem *clr_base;
    + void __iomem *kar;
    +
    + struct mthca_cmd cmd;
    + struct mthca_limits limits;
    +
    + struct mthca_pd_table pd_table;
    + struct mthca_mr_table mr_table;
    + struct mthca_eq_table eq_table;
    + struct mthca_cq_table cq_table;
    + struct mthca_qp_table qp_table;
    + struct mthca_av_table av_table;
    + struct mthca_mcg_table mcg_table;
    +
    + struct mthca_pd driver_pd;
    + struct mthca_mr driver_mr;
    +
    + struct ib_mad_agent *send_agent[MTHCA_MAX_PORTS][2];
    + struct ib_ah *sm_ah[MTHCA_MAX_PORTS];
    + spinlock_t sm_lock;
    +};
    +
    +#define mthca_dbg(mdev, format, arg...) \
    + dev_dbg(&mdev->pdev->dev, format, ## arg)
    +#define mthca_err(mdev, format, arg...) \
    + dev_err(&mdev->pdev->dev, format, ## arg)
    +#define mthca_info(mdev, format, arg...) \
    + dev_info(&mdev->pdev->dev, format, ## arg)
    +#define mthca_warn(mdev, format, arg...) \
    + dev_warn(&mdev->pdev->dev, format, ## arg)
    +
    +extern void __buggy_use_of_MTHCA_GET(void);
    +extern void __buggy_use_of_MTHCA_PUT(void);
    +
    +#define MTHCA_GET(dest, source, offset) \
    + do { \
    + void *__p = (char *) (source) + (offset); \
    + switch (sizeof (dest)) { \
    + case 1: (dest) = *(u8 *) __p; break; \
    + case 2: (dest) = be16_to_cpup(__p); break; \
    + case 4: (dest) = be32_to_cpup(__p); break; \
    + case 8: (dest) = be64_to_cpup(__p); break; \
    + default: __buggy_use_of_MTHCA_GET(); \
    + } \
    + } while (0)
    +
    +#define MTHCA_PUT(dest, source, offset) \
    + do { \
    + __typeof__(source) *__p = \
    + (__typeof__(source) *) ((char *) (dest) + (offset)); \
    + switch (sizeof(source)) { \
    + case 1: *__p = (source); break; \
    + case 2: *__p = cpu_to_be16(source); break; \
    + case 4: *__p = cpu_to_be32(source); break; \
    + case 8: *__p = cpu_to_be64(source); break; \
    + default: __buggy_use_of_MTHCA_PUT(); \
    + } \
    + } while (0)
    +
    +int mthca_reset(struct mthca_dev *mdev);
    +
    +u32 mthca_alloc(struct mthca_alloc *alloc);
    +void mthca_free(struct mthca_alloc *alloc, u32 obj);
    +int mthca_alloc_init(struct mthca_alloc *alloc, u32 num, u32 mask,
    + u32 reserved);
    +void mthca_alloc_cleanup(struct mthca_alloc *alloc);
    +void *mthca_array_get(struct mthca_array *array, int index);
    +int mthca_array_set(struct mthca_array *array, int index, void *value);
    +void mthca_array_clear(struct mthca_array *array, int index);
    +int mthca_array_init(struct mthca_array *array, int nent);
    +void mthca_array_cleanup(struct mthca_array *array, int nent);
    +
    +int mthca_init_pd_table(struct mthca_dev *dev);
    +int mthca_init_mr_table(struct mthca_dev *dev);
    +int mthca_init_eq_table(struct mthca_dev *dev);
    +int mthca_init_cq_table(struct mthca_dev *dev);
    +int mthca_init_qp_table(struct mthca_dev *dev);
    +int mthca_init_av_table(struct mthca_dev *dev);
    +int mthca_init_mcg_table(struct mthca_dev *dev);
    +
    +void mthca_cleanup_pd_table(struct mthca_dev *dev);
    +void mthca_cleanup_mr_table(struct mthca_dev *dev);
    +void mthca_cleanup_eq_table(struct mthca_dev *dev);
    +void mthca_cleanup_cq_table(struct mthca_dev *dev);
    +void mthca_cleanup_qp_table(struct mthca_dev *dev);
    +void mthca_cleanup_av_table(struct mthca_dev *dev);
    +void mthca_cleanup_mcg_table(struct mthca_dev *dev);
    +
    +int mthca_register_device(struct mthca_dev *dev);
    +void mthca_unregister_device(struct mthca_dev *dev);
    +
    +int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd);
    +void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd);
    +
    +int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
    + u32 access, struct mthca_mr *mr);
    +int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
    + u64 *buffer_list, int buffer_size_shift,
    + int list_len, u64 iova, u64 total_size,
    + u32 access, struct mthca_mr *mr);
    +void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr);
    +
    +int mthca_poll_cq(struct ib_cq *ibcq, int num_entries,
    + struct ib_wc *entry);
    +void mthca_arm_cq(struct mthca_dev *dev, struct mthca_cq *cq,
    + int solicited);
    +int mthca_init_cq(struct mthca_dev *dev, int nent,
    + struct mthca_cq *cq);
    +void mthca_free_cq(struct mthca_dev *dev,
    + struct mthca_cq *cq);
    +void mthca_cq_event(struct mthca_dev *dev, u32 cqn);
    +void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn);
    +
    +void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
    + enum ib_event_type event_type);
    +int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask);
    +int mthca_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
    + struct ib_send_wr **bad_wr);
    +int mthca_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
    + struct ib_recv_wr **bad_wr);
    +int mthca_free_err_wqe(struct mthca_qp *qp, int is_send,
    + int index, int *dbd, u32 *new_wqe);
    +int mthca_alloc_qp(struct mthca_dev *dev,
    + struct mthca_pd *pd,
    + struct mthca_cq *send_cq,
    + struct mthca_cq *recv_cq,
    + enum ib_qp_type type,
    + enum ib_sig_type send_policy,
    + enum ib_sig_type recv_policy,
    + struct mthca_qp *qp);
    +int mthca_alloc_sqp(struct mthca_dev *dev,
    + struct mthca_pd *pd,
    + struct mthca_cq *send_cq,
    + struct mthca_cq *recv_cq,
    + enum ib_sig_type send_policy,
    + enum ib_sig_type recv_policy,
    + int qpn,
    + int port,
    + struct mthca_sqp *sqp);
    +void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
    +int mthca_create_ah(struct mthca_dev *dev,
    + struct mthca_pd *pd,
    + struct ib_ah_attr *ah_attr,
    + struct mthca_ah *ah);
    +int mthca_destroy_ah(struct mthca_dev *dev, struct mthca_ah *ah);
    +int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
    + struct ib_ud_header *header);
    +
    +int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
    +int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
    +
    +int mthca_process_mad(struct ib_device *ibdev,
    + int mad_flags,
    + u8 port_num,
    + u16 slid,
    + struct ib_mad *in_mad,
    + struct ib_mad *out_mad);
    +int mthca_create_agents(struct mthca_dev *dev);
    +void mthca_free_agents(struct mthca_dev *dev);
    +
    +static inline struct mthca_dev *to_mdev(struct ib_device *ibdev)
    +{
    + return container_of(ibdev, struct mthca_dev, ib_dev);
    +}
    +
    +#endif /* MTHCA_DEV_H */
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-bk/drivers/infiniband/hw/mthca/mthca_doorbell.h 2004-12-19 22:04:14.639541805 -0800
    @@ -0,0 +1,123 @@
    +/*
    + * Copyright (c) 2004 Topspin Communications. All rights reserved.
    + *
    + * This software is available to you under a choice of one of two
    + * licenses. You may choose to be licensed under the terms of the GNU
    + * General Public License (GPL) Version 2, available from the file
    + * COPYING in the main directory of this source tree, or the
    + * OpenIB.org BSD license below:
    + *
    + * Redistribution and use in source and binary forms, with or
    + * without modification, are permitted provided that the following
    + * conditions are met:
    + *
    + * - Redistributions of source code must retain the above
    + * copyright notice, this list of conditions and the following
    + * disclaimer.
    + *
    + * - Redistributions in binary form must reproduce the above
    + * copyright notice, this list of conditions and the following
    + * disclaimer in the documentation and/or other materials
    + * provided with the distribution.
    + *
    + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
    + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
    + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    + * SOFTWARE.
    + *
    + * $Id: mthca_doorbell.h 1349 2004-12-16 21:09:43Z roland $
    + */
    +
    +#include <linux/config.h>
    +#include <linux/types.h>
    +#include <linux/preempt.h>
    +
    +#define MTHCA_RD_DOORBELL 0x00
    +#define MTHCA_SEND_DOORBELL 0x10
    +#define MTHCA_RECEIVE_DOORBELL 0x18
    +#define MTHCA_CQ_DOORBELL 0x20
    +#define MTHCA_EQ_DOORBELL 0x28
    +
    +#if BITS_PER_LONG == 64
    +/*
    + * Assume that we can just write a 64-bit doorbell atomically. s390
    + * actually doesn't have writeq() but S/390 systems don't even have
    + * PCI so we won't worry about it.
    + */
    +
    +#define MTHCA_DECLARE_DOORBELL_LOCK(name)
    +#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0)
    +#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL)
    +
    +static inline void mthca_write64(u32 val[2], void __iomem *dest,
    + spinlock_t *doorbell_lock)
    +{
    + __raw_writeq(*(u64 *) val, dest);
    +}
    +
    +#elif defined(CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL)
    +/* Use SSE to write 64 bits atomically without a lock. */
    +
    +#define MTHCA_DECLARE_DOORBELL_LOCK(name)
    +#define MTHCA_INIT_DOORBELL_LOCK(ptr) do { } while (0)
    +#define MTHCA_GET_DOORBELL_LOCK(ptr) (NULL)
    +
    +static inline unsigned long mthca_get_fpu(void)
    +{
    + unsigned long cr0;
    +
    + preempt_disable();
    + asm volatile("mov %%cr0,%0; clts" : "=r" (cr0));
    + return cr0;
    +}
    +
    +static inline void mthca_put_fpu(unsigned long cr0)
    +{
    + asm volatile("mov %0,%%cr0" : : "r" (cr0));
    + preempt_enable();
    +}
    +
    +static inline void mthca_write64(u32 val[2], void __iomem *dest,
    + spinlock_t *doorbell_lock)
    +{
    + /* i386 stack is aligned to 8 bytes, so this should be OK: */
    + u8 xmmsave[8] __attribute__((aligned(8)));
    + unsigned long cr0;
    +
    + cr0 = mthca_get_fpu();
    +
    + asm volatile (
    + "movlps %%xmm0,(%0); \n\t"
    + "movlps (%1),%%xmm0; \n\t"
    + "movlps %%xmm0,(%2); \n\t"
    + "movlps (%0),%%xmm0; \n\t"
    + :
    + : "r" (xmmsave), "r" (val), "r" (dest)
    + : "memory" );
    +
    + mthca_put_fpu(cr0);
    +}
    +
    +#else
    +/* Just fall back to a spinlock to protect the doorbell */
    +
    +#define MTHCA_DECLARE_DOORBELL_LOCK(name) spinlock_t name;
    +#define MTHCA_INIT_DOORBELL_LOCK(ptr) spin_lock_init(ptr)
    +#define MTHCA_GET_DOORBELL_LOCK(ptr) (ptr)
    +
    +static inline void mthca_write64(u32 val[2], void __iomem *dest,
    + spinlock_t *doorbell_lock)
    +{
    + unsigned long flags;
    +
    + spin_lock_irqsave(doorbell_lock, flags);
    + __raw_writel(val[0], dest);
    + __raw_writel(val[1], dest + 4);
    + spin_unlock_irqrestore(doorbell_lock, flags);
    +}
    +
    +#endif
    --- /dev/null 1970-01-01 00:00:00.000000000 +0000
    +++ linux-bk/drivers/infiniband/hw/mthca/mthca_main.c 2004-12-19 22:04:14.663538269 -0800
    @@ -0,0 +1,933 @@
    +/*
    + * Copyright (c) 2004 Topspin Communications. All rights reserved.
    + *
    + * This software is available to you under a choice of one of two
    + * licenses. You may choose to be licensed under the terms of the GNU
    + * General Public License (GPL) Version 2, available from the file
    + * COPYING in the main directory of this source tree, or the
    + * OpenIB.org BSD license below:
    + *
    + * Redistribution and use in source and binary forms, with or
    + * without modification, are permitted provided that the following
    + * conditions are met:
    + *
    + * - Redistributions of source code must retain the above
    + * copyright notice, this list of conditions and the following
    + * disclaimer.
    + *
    + * - Redistributions in binary form must reproduce the above
    + * copyright notice, this list of conditions and the following
    + * disclaimer in the documentation and/or other materials
    + * provided with the distribution.
    + *
    + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
    + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
    + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
    + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
    + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
    + * SOFTWARE.
    + *
    + * $Id: mthca_main.c 1349 2004-12-16 21:09:43Z roland $
    + */
    +
    +#include <linux/config.h>
    +#include <linux/version.h>
    +#include <linux/module.h>
    +#include <linux/init.h>
    +#include <linux/errno.h>
    +#include <linux/pci.h>
    +#include <linux/interrupt.h>
    +
    +#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL
    +#include <asm/cpufeature.h>
    +#endif
    +
    +#include "mthca_dev.h"
    +#include "mthca_config_reg.h"
    +#include "mthca_cmd.h"
    +#include "mthca_profile.h"
    +
    +MODULE_AUTHOR("Roland Dreier");
    +MODULE_DESCRIPTION("Mellanox InfiniBand HCA low-level driver");
    +MODULE_LICENSE("Dual BSD/GPL");
    +MODULE_VERSION(DRV_VERSION);
    +
    +#ifdef CONFIG_PCI_MSI
    +
    +static int msi_x = 0;
    +module_param(msi_x, int, 0444);
    +MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
    +
    +static int msi = 0;
    +module_param(msi, int, 0444);
    +MODULE_PARM_DESC(msi, "attempt to use MSI if nonzero");
    +
    +#else /* CONFIG_PCI_MSI */
    +
    +#define msi_x (0)
    +#define msi (0)
    +
    +#endif /* CONFIG_PCI_MSI */
    +
    +static const char mthca_version[] __devinitdata =
    + "ib_mthca: Mellanox InfiniBand HCA driver v"
    + DRV_VERSION " (" DRV_RELDATE ")\n";
    +
    +static int __devinit mthca_tune_pci(struct mthca_dev *mdev)
    +{
    + int cap;
    + u16 val;
    +
    + /* First try to max out Read Byte Count */
    + cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
    + if (cap) {
    + if (pci_read_config_word(mdev->pdev, cap + PCI_X_CMD, &val)) {
    + mthca_err(mdev, "Couldn't read PCI-X command register, "
    + "aborting.\n");
    + return -ENODEV;
    + }
    + val = (val & ~PCI_X_CMD_MAX_READ) | (3 << 2);
    + if (pci_write_config_word(mdev->pdev, cap + PCI_X_CMD, val)) {
    + mthca_err(mdev, "Couldn't write PCI-X command register, "
    + "aborting.\n");
    + return -ENODEV;
    + }
    + } else if (mdev->hca_type == TAVOR)
    + mthca_info(mdev, "No PCI-X capability, not setting RBC.\n");
    +
    + cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
    + if (cap) {
    + if (pci_read_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, &val)) {
    + mthca_err(mdev, "Couldn't read PCI Express device control "
    + "register, aborting.\n");
    + return -ENODEV;
    + }
    + val = (val & ~PCI_EXP_DEVCTL_READRQ) | (5 << 12);
    + if (pci_write_config_word(mdev->pdev, cap + PCI_EXP_DEVCTL, val)) {
    + mthca_err(mdev, "Couldn't write PCI Express device control "
    + "register, aborting.\n");
    + return -ENODEV;
    + }
    + } else if (mdev->hca_type == ARBEL_NATIVE ||
    + mdev->hca_type == ARBEL_COMPAT)
    + mthca_info(mdev, "No PCI Express capability, "
    + "not setting Max Read Request Size.\n");
    +
    + return 0;
    +}
    +
    +static int __devinit mthca_dev_lim(struct mthca_dev *mdev, struct mthca_dev_lim *dev_lim)
    +{
    + int err;
    + u8 status;
    +
    + err = mthca_QUERY_DEV_LIM(mdev, dev_lim, &status);
    + if (err) {
    + mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
    + return err;
    + }
    + if (status) {
    + mthca_err(mdev, "QUERY_DEV_LIM returned status 0x%02x, "
    + "aborting.\n", status);
    + return -EINVAL;
    + }
    + if (dev_lim->min_page_sz > PAGE_SIZE) {
    + mthca_err(mdev, "HCA minimum page size of %d bigger than "
    + "kernel PAGE_SIZE of %ld, aborting.\n",
    + dev_lim->min_page_sz, PAGE_SIZE);
    + return -ENODEV;
    + }
    + if (dev_lim->num_ports > MTHCA_MAX_PORTS) {
    + mthca_err(mdev, "HCA has %d ports, but we only support %d, "
    + "aborting.\n",
    + dev_lim->num_ports, MTHCA_MAX_PORTS);
    + return -ENODEV;
    + }
    +
    + mdev->limits.num_ports = dev_lim->num_ports;
    + mdev->limits.vl_cap = dev_lim->max_vl;
    + mdev->limits.mtu_cap = dev_lim->max_mtu;
    + mdev->limits.gid_table_len = dev_lim->max_gids;
    + mdev->limits.pkey_table_len = dev_lim->max_pkeys;
    + mdev->limits.local_ca_ack_delay = dev_lim->local_ca_ack_delay;
    + mdev->limits.max_sg = dev_lim->max_sg;
    + mdev->limits.reserved_qps = dev_lim->reserved_qps;
    + mdev->limits.reserved_srqs = dev_lim->reserved_srqs;
    + mdev->limits.reserved_eecs = dev_lim->reserved_eecs;
    + mdev->limits.reserved_cqs = dev_lim->reserved_cqs;
    + mdev->limits.reserved_eqs = dev_lim->reserved_eqs;
    + mdev->limits.reserved_mtts = dev_lim->reserved_mtts;
    + mdev->limits.reserved_mrws = dev_lim->reserved_mrws;
    + mdev->limits.reserved_uars = dev_lim->reserved_uars;
    + mdev->limits.reserved_pds = dev_lim->reserved_pds;
    +
    + if (dev_lim->flags & DEV_LIM_FLAG_SRQ)
    + mdev->mthca_flags |= MTHCA_FLAG_SRQ;
    +
    + return 0;
    +}
    +
    +static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
    +{
    + u8 status;
    + int err;
    + struct mthca_dev_lim dev_lim;
    + struct mthca_init_hca_param init_hca;
    + struct mthca_adapter adapter;
    +
    + err = mthca_SYS_EN(mdev, &status);
    + if (err) {
    + mthca_err(mdev, "SYS_EN command failed, aborting.\n");
    + return err;
    + }
    + if (status) {
    + mthca_err(mdev, "SYS_EN returned status 0x%02x, "
    + "aborting.\n", status);
    + return -EINVAL;
    + }
    +
    + err = mthca_QUERY_FW(mdev, &status);
    + if (err) {
    + mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
    + goto err_out_disable;
    + }
    + if (status) {
    + mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
    + "aborting.\n", status);
    + err = -EINVAL;
    + goto err_out_disable;
    + }
    + err = mthca_QUERY_DDR(mdev, &status);
    + if (err) {
    + mthca_err(mdev, "QUERY_DDR command failed, aborting.\n");
    + goto err_out_disable;
    + }
    + if (status) {
    + mthca_err(mdev, "QUERY_DDR returned status 0x%02x, "
    + "aborting.\n", status);
    + err = -EINVAL;
    + goto err_out_disable;
    + }
    +
    + err = mthca_dev_lim(mdev, &dev_lim);
    +
    + err = mthca_make_profile(mdev, &dev_lim, &init_hca);
    + if (err)
    + goto err_out_disable;
    +
    + err = mthca_INIT_HCA(mdev, &init_hca, &status);
    + if (err) {
    + mthca_err(mdev, "INIT_HCA command failed, aborting.\n");
    + goto err_out_disable;
    + }
    + if (status) {
    + mthca_err(mdev, "INIT_HCA returned status 0x%02x, "
    + "aborting.\n", status);
    + err = -EINVAL;
    + goto err_out_disable;
    + }
    +
    + err = mthca_QUERY_ADAPTER(mdev, &adapter, &status);
    + if (err) {
    + mthca_err(mdev, "QUERY_ADAPTER command failed, aborting.\n");
    + goto err_out_disable;
    + }
    + if (status) {
    + mthca_err(mdev, "QUERY_ADAPTER returned status 0x%02x, "
    + "aborting.\n", status);
    + err = -EINVAL;
    + goto err_out_close;
    + }
    +
    + mdev->eq_table.inta_pin = adapter.inta_pin;
    + mdev->rev_id = adapter.revision_id;
    +
    + return 0;
    +
    +err_out_close:
    + mthca_CLOSE_HCA(mdev, 0, &status);
    +
    +err_out_disable:
    + mthca_SYS_DIS(mdev, &status);
    +
    + return err;
    +}
    +
    +static int __devinit mthca_load_fw(struct mthca_dev *mdev)
    +{
    + u8 status;
    + int err;
    + int num_ent, num_sg, fw_pages, cur_order;
    + int i;
    +
    + /* FIXME: use HCA-attached memory for FW if present */
    +
    + mdev->fw.arbel.mem = kmalloc(sizeof *mdev->fw.arbel.mem *
    + mdev->fw.arbel.fw_pages,
    + GFP_KERNEL);
    + if (!mdev->fw.arbel.mem) {
    + mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
    + return -ENOMEM;
    + }
    +
    + memset(mdev->fw.arbel.mem, 0,
    + sizeof *mdev->fw.arbel.mem * mdev->fw.arbel.fw_pages);
    +
    + fw_pages = mdev->fw.arbel.fw_pages;
    + num_ent = 0;
    + /*
    + * We allocate in as big chunks as we can, up to a maximum of
    + * 256 KB per chunk.
    + */
    + cur_order = get_order(1 << 18);
    + while (fw_pages > 0) {
    + while (1 << cur_order > fw_pages)
    + --cur_order;
    +
    + /*
    + * We allocate with GFP_HIGHUSER because only the
    + * firmware is going to touch these pages, so there's
    + * no need for a kernel virtual address. We use
    + * __GFP_NOWARN because we'll deal with any allocation
    + * failures ourselves.
    + */
    + mdev->fw.arbel.mem[num_ent].page = alloc_pages(GFP_HIGHUSER | __GFP_NOWARN,
    + cur_order);
    + mdev->fw.arbel.mem[num_ent].length = PAGE_SIZE << cur_order;
    + if (!mdev->fw.arbel.mem[num_ent].page) {
    + --cur_order;
    + if (cur_order < 0) {
    + mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
    + err = -ENOMEM;
    + goto err_free;
    + }
    + } else {
    + ++num_ent;
    + fw_pages -= 1 << cur_order;
    + }
    + }
    + num_sg = pci_map_sg(mdev->pdev, mdev->fw.arbel.mem, num_ent,
    + PCI_DMA_BIDIRECTIONAL);
    + if (num_sg <= 0) {
    + mthca_err(mdev, "Couldn't allocate FW area, aborting.\n");
    + err = -ENOMEM;
    + goto err_free;
    + }
    +
    + err = mthca_MAP_FA(mdev, num_sg, mdev->fw.arbel.mem, &status);
    + if (err) {
    + mthca_err(mdev, "MAP_FA command failed, aborting.\n");
    + goto err_unmap;
    + }
    + if (status) {
    + mthca_err(mdev, "MAP_FA returned status 0x%02x, aborting.\n", status);
    + err = -EINVAL;
    + goto err_unmap;
    + }
    + err = mthca_RUN_FW(mdev, &status);
    + if (err) {
    + mthca_err(mdev, "RUN_FW command failed, aborting.\n");
    + goto err_unmap_fa;
    + }
    + if (status) {
    + mthca_err(mdev, "RUN_FW returned status 0x%02x, aborting.\n", status);
    + err = -EINVAL;
    + goto err_unmap_fa;
    + }
    +
    + return 0;
    +
    +err_unmap_fa:
    + mthca_UNMAP_FA(mdev, &status);
    +
    +err_unmap:
    + pci_unmap_sg(mdev->pdev, mdev->fw.arbel.mem,
    + mdev->fw.arbel.fw_pages, PCI_DMA_BIDIRECTIONAL);
    +err_free:
    + for (i = 0; i < mdev->fw.arbel.fw_pages; ++i)
    + if (mdev->fw.arbel.mem[i].page)
    + __free_pages(mdev->fw.arbel.mem[i].page,
    + get_order(mdev->fw.arbel.mem[i].length));
    + kfree(mdev->fw.arbel.mem);
    + return err;
    +}
    +
    +static int __devinit mthca_init_arbel(struct mthca_dev *mdev)
    +{
    + struct mthca_dev_lim dev_lim;
    + u8 status;
    + int err;
    +
    + err = mthca_QUERY_FW(mdev, &status);
    + if (err) {
    + mthca_err(mdev, "QUERY_FW command failed, aborting.\n");
    + return err;
    + }
    + if (status) {
    + mthca_err(mdev, "QUERY_FW returned status 0x%02x, "
    + "aborting.\n", status);
    + return -EINVAL;
    + }
    +
    + err = mthca_ENABLE_LAM(mdev, &status);
    + if (err) {
    + mthca_err(mdev, "ENABLE_LAM command failed, aborting.\n");
    + return err;
    + }
    + if (status == MTHCA_CMD_STAT_LAM_NOT_PRE) {
    + mthca_dbg(mdev, "No HCA-attached memory (running in MemFree mode)\n");
    + mdev->mthca_flags |= MTHCA_FLAG_NO_LAM;
    + } else if (status) {
    + mthca_err(mdev, "ENABLE_LAM returned status 0x%02x, "
    + "aborting.\n", status);
    + return -EINVAL;
    + }
    +
    + err = mthca_load_fw(mdev);
    + if (err) {
    + mthca_err(mdev, "Failed to start FW, aborting.\n");
    + goto err_out_disable;
    + }
    +
    + err = mthca_dev_lim(mdev, &dev_lim);
    + if (err) {
    + mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
    + goto err_out_disable;
    + }
    +
    + mthca_warn(mdev, "Sorry, native MT25208 mode support is not done, "
    + "aborting.\n");
    + err = -ENODEV;
    +
    +err_out_disable:
    + if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
    + mthca_DISABLE_LAM(mdev, &status);
    + return err;
    +}
    +
    +static int __devinit mthca_init_hca(struct mthca_dev *mdev)
    +{
    + if (mdev->hca_type == ARBEL_NATIVE)
    + return mthca_init_arbel(mdev);
    + else
    + return mthca_init_tavor(mdev);
    +}
    +
    +static int __devinit mthca_setup_hca(struct mthca_dev *dev)
    +{
    + int err;
    +
    + MTHCA_INIT_DOORBELL_LOCK(&dev->doorbell_lock);
    +
    + err = mthca_init_pd_table(dev);
    + if (err) {
    + mthca_err(dev, "Failed to initialize "
    + "protection domain table, aborting.\n");
    + return err;
    + }
    +
    + err = mthca_init_mr_table(dev);
    + if (err) {
    + mthca_err(dev, "Failed to initialize "
    + "memory region table, aborting.\n");
    + goto err_out_pd_table_free;
    + }
    +
    + err = mthca_pd_alloc(dev, &dev->driver_pd);
    + if (err) {
    + mthca_err(dev, "Failed to create driver PD, "
    + "aborting.\n");
    + goto err_out_mr_table_free;
    + }
    +
    + err = mthca_init_eq_table(dev);
    + if (err) {
    + mthca_err(dev, "Failed to initialize "
    + "event queue table, aborting.\n");
    + goto err_out_pd_free;
    + }
    +
    + err = mthca_cmd_use_events(dev);
    + if (err) {
    + mthca_err(dev, "Failed to switch to event-driven "
    + "firmware commands, aborting.\n");
    + goto err_out_eq_table_free;
    + }
    +
    + err = mthca_init_cq_table(dev);
    + if (err) {
    + mthca_err(dev, "Failed to initialize "
    + "completion queue table, aborting.\n");
    + goto err_out_cmd_poll;
    + }
    +
    + err = mthca_init_qp_table(dev);
    + if (err) {
    + mthca_err(dev, "Failed to initialize "
    + "queue pair table, aborting.\n");
    + goto err_out_cq_table_free;
    + }
    +
    + err = mthca_init_av_table(dev);
    + if (err) {
    + mthca_err(dev, "Failed to initialize "
    + "address vector table, aborting.\n");
    + goto err_out_qp_table_free;
    + }
    +
    + err = mthca_init_mcg_table(dev);
    + if (err) {
    + mthca_err(dev, "Failed to initialize "
    + "multicast group table, aborting.\n");
    + goto err_out_av_table_free;
    + }
    +
    + return 0;
    +
    +err_out_av_table_free:
    + mthca_cleanup_av_table(dev);
    +
    +err_out_qp_table_free:
    + mthca_cleanup_qp_table(dev);
    +
    +err_out_cq_table_free:
    + mthca_cleanup_cq_table(dev);
    +
    +err_out_cmd_poll:
    + mthca_cmd_use_polling(dev);
    +
    +err_out_eq_table_free:
    + mthca_cleanup_eq_table(dev);
    +
    +err_out_pd_free:
    + mthca_pd_free(dev, &dev->driver_pd);
    +
    +err_out_mr_table_free:
    + mthca_cleanup_mr_table(dev);
    +
    +err_out_pd_table_free:
    + mthca_cleanup_pd_table(dev);
    + return err;
    +}
    +
    +static int __devinit mthca_request_regions(struct pci_dev *pdev,
    + int ddr_hidden)
    +{
    + int err;
    +
    + /*
    + * We request our first BAR in two chunks, since the MSI-X
    + * vector table is right in the middle.
    + *
    + * This is why we can't just use pci_request_regions() -- if
    + * we did then setting up MSI-X would fail, since the PCI core
    + * wants to do request_mem_region on the MSI-X vector table.
    + */
    + if (!request_mem_region(pci_resource_start(pdev, 0) +
    + MTHCA_HCR_BASE,
    + MTHCA_MAP_HCR_SIZE,
    + DRV_NAME))
    + return -EBUSY;
    +
    + if (!request_mem_region(pci_resource_start(pdev, 0) +
    + MTHCA_CLR_INT_BASE,
    + MTHCA_CLR_INT_SIZE,
    + DRV_NAME)) {
    + err = -EBUSY;
    + goto err_out_bar0_beg;
    + }
    +
    + err = pci_request_region(pdev, 2, DRV_NAME);
    + if (err)
    + goto err_out_bar0_end;
    +
    + if (!ddr_hidden) {
    + err = pci_request_region(pdev, 4, DRV_NAME);
    + if (err)
    + goto err_out_bar2;
    + }
    +
    + return 0;
    +
    +err_out_bar0_beg:
    + release_mem_region(pci_resource_start(pdev, 0) +
    + MTHCA_HCR_BASE,
    + MTHCA_MAP_HCR_SIZE);
    +
    +err_out_bar0_end:
    + release_mem_region(pci_resource_start(pdev, 0) +
    + MTHCA_CLR_INT_BASE,
    + MTHCA_CLR_INT_SIZE);
    +
    +err_out_bar2:
    + pci_release_region(pdev, 2);
    + return err;
    +}
    +
    +static void mthca_release_regions(struct pci_dev *pdev,
    + int ddr_hidden)
    +{
    + release_mem_region(pci_resource_start(pdev, 0) +
    + MTHCA_HCR_BASE,
    + MTHCA_MAP_HCR_SIZE);
    + release_mem_region(pci_resource_start(pdev, 0) +
    + MTHCA_CLR_INT_BASE,
    + MTHCA_CLR_INT_SIZE);
    + pci_release_region(pdev, 2);
    + if (!ddr_hidden)
    + pci_release_region(pdev, 4);
    +}
    +
    +static int __devinit mthca_enable_msi_x(struct mthca_dev *mdev)
    +{
    + struct msix_entry entries[3];
    + int err;
    +
    + entries[0].entry = 0;
    + entries[1].entry = 1;
    + entries[2].entry = 2;
    +
    + err = pci_enable_msix(mdev->pdev, entries, ARRAY_SIZE(entries));
    + if (err) {
    + if (err > 0)
    + mthca_info(mdev, "Only %d MSI-X vectors available, "
    + "not using MSI-X\n", err);
    + return err;
    + }
    +
    + mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
    + mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
    + mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector = entries[2].vector;
    +
    + return 0;
    +}
    +
    +static void mthca_close_hca(struct mthca_dev *mdev)
    +{
    + u8 status;
    + int i;
    +
    + mthca_CLOSE_HCA(mdev, 0, &status);
    +
    + if (mdev->hca_type == ARBEL_NATIVE) {
    + mthca_UNMAP_FA(mdev, &status);
    +
    + pci_unmap_sg(mdev->pdev, mdev->fw.arbel.mem,
    + mdev->fw.arbel.fw_pages, PCI_DMA_BIDIRECTIONAL);
    +
    + for (i = 0; i < mdev->fw.arbel.fw_pages; ++i)
    + if (mdev->fw.arbel.mem[i].page)
    + __free_pages(mdev->fw.arbel.mem[i].page,
    + get_order(mdev->fw.arbel.mem[i].length));
    + kfree(mdev->fw.arbel.mem);
    +
    + if (!(mdev->mthca_flags & MTHCA_FLAG_NO_LAM))
    + mthca_DISABLE_LAM(mdev, &status);
    + } else
    + mthca_SYS_DIS(mdev, &status);
    +}
    +
    +static int __devinit mthca_init_one(struct pci_dev *pdev,
    + const struct pci_device_id *id)
    +{
    + static int mthca_version_printed = 0;
    + int ddr_hidden = 0;
    + int err;
    + unsigned long mthca_base;
    + struct mthca_dev *mdev;
    +
    + if (!mthca_version_printed) {
    + printk(KERN_INFO "%s", mthca_version);
    + ++mthca_version_printed;
    + }
    +
    + printk(KERN_INFO PFX "Initializing %s (%s)\n",
    + pci_pretty_name(pdev), pci_name(pdev));
    +
    + err = pci_enable_device(pdev);
    + if (err) {
    + dev_err(&pdev->dev, "Cannot enable PCI device, "
    + "aborting.\n");
    + return err;
    + }
    +
    + /*
    + * Check for BARs. We expect 0: 1MB, 2: 8MB, 4: DDR (may not
    + * be present)
    + */
    + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
    + pci_resource_len(pdev, 0) != 1 << 20) {
    + dev_err(&pdev->dev, "Missing DCS, aborting.");
    + err = -ENODEV;
    + goto err_out_disable_pdev;
    + }
    + if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM) ||
    + pci_resource_len(pdev, 2) != 1 << 23) {
    + dev_err(&pdev->dev, "Missing UAR, aborting.");
    + err = -ENODEV;
    + goto err_out_disable_pdev;
    + }
    + if (!(pci_resource_flags(pdev, 4) & IORESOURCE_MEM))
    + ddr_hidden = 1;
    +
    + err = mthca_request_regions(pdev, ddr_hidden);
    + if (err) {
    + dev_err(&pdev->dev, "Cannot obtain PCI resources, "
    + "aborting.\n");
    + goto err_out_disable_pdev;
    + }
    +
    + pci_set_master(pdev);
    +
    + err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
    + if (err) {
    + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n");
    + err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
    + if (err) {
    + dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n");
    + goto err_out_free_res;
    + }
    + }
    + err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
    + if (err) {
    + dev_warn(&pdev->dev, "Warning: couldn't set 64-bit "
    + "consistent PCI DMA mask.\n");
    + err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
    + if (err) {
    + dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, "
    + "aborting.\n");
    + goto err_out_free_res;
    + }
    + }
    +
    + mdev = (struct mthca_dev *) ib_alloc_device(sizeof *mdev);
    + if (!mdev) {
    + dev_err(&pdev->dev, "Device struct alloc failed, "
    + "aborting.\n");
    + err = -ENOMEM;
    + goto err_out_free_res;
    + }
    +
    + mdev->pdev = pdev;
    + mdev->hca_type = id->driver_data;
    +
    + if (ddr_hidden)
    + mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
    +
    + /*
    + * Now reset the HCA before we touch the PCI capabilities or
    + * attempt a firmware command, since a boot ROM may have left
    + * the HCA in an undefined state.
    + */
    + err = mthca_reset(mdev);
    + if (err) {
    + mthca_err(mdev, "Failed to reset HCA, aborting.\n");
    + goto err_out_free_dev;
    + }
    +
    + if (msi_x && !mthca_enable_msi_x(mdev))
    + mdev->mthca_flags |= MTHCA_FLAG_MSI_X;
    + if (msi && !(mdev->mthca_flags & MTHCA_FLAG_MSI_X) &&
    + !pci_enable_msi(pdev))
    + mdev->mthca_flags |= MTHCA_FLAG_MSI;
    +
    + sema_init(&mdev->cmd.hcr_sem, 1);
    + sema_init(&mdev->cmd.poll_sem, 1);
    + mdev->cmd.use_events = 0;
    +
    + mthca_base = pci_resource_start(pdev, 0);
    + mdev->hcr = ioremap(mthca_base + MTHCA_HCR_BASE, MTHCA_MAP_HCR_SIZE);
    + if (!mdev->hcr) {
    + mthca_err(mdev, "Couldn't map command register, "
    + "aborting.\n");
    + err = -ENOMEM;
    + goto err_out_free_dev;
    + }
    + mdev->clr_base = ioremap(mthca_base + MTHCA_CLR_INT_BASE,
    + MTHCA_CLR_INT_SIZE);
    + if (!mdev->clr_base) {
    + mthca_err(mdev, "Couldn't map command register, "
    + "aborting.\n");
    + err = -ENOMEM;
    + goto err_out_iounmap;
    + }
    +
    + mthca_base = pci_resource_start(pdev, 2);
    + mdev->kar = ioremap(mthca_base + PAGE_SIZE * MTHCA_KAR_PAGE, PAGE_SIZE);
    + if (!mdev->kar) {
    + mthca_err(mdev, "Couldn't map kernel access region, "
    + "aborting.\n");
    + err = -ENOMEM;
    + goto err_out_iounmap_clr;
    + }
    +
    + err = mthca_tune_pci(mdev);
    + if (err)
    + goto err_out_iounmap_kar;
    +
    + err = mthca_init_hca(mdev);
    + if (err)
    + goto err_out_iounmap_kar;
    +
    + err = mthca_setup_hca(mdev);
    + if (err)
    + goto err_out_close;
    +
    + err = mthca_register_device(mdev);
    + if (err)
    + goto err_out_cleanup;
    +
    + err = mthca_create_agents(mdev);
    + if (err)
    + goto err_out_unregister;
    +
    + pci_set_drvdata(pdev, mdev);
    +
    + return 0;
    +
    +err_out_unregister:
    + mthca_unregister_device(mdev);
    +
    +err_out_cleanup:
    + mthca_cleanup_mcg_table(mdev);
    + mthca_cleanup_av_table(mdev);
    + mthca_cleanup_qp_table(mdev);
    + mthca_cleanup_cq_table(mdev);
    + mthca_cmd_use_polling(mdev);
    + mthca_cleanup_eq_table(mdev);
    +
    + mthca_pd_free(mdev, &mdev->driver_pd);
    +
    + mthca_cleanup_mr_table(mdev);
    + mthca_cleanup_pd_table(mdev);
    +
    +err_out_close:
    + mthca_close_hca(mdev);
    +
    +err_out_iounmap_kar:
    + iounmap(mdev->kar);
    +
    +err_out_iounmap_clr:
    + iounmap(mdev->clr_base);
    +
    +err_out_iounmap:
    + iounmap(mdev->hcr);
    +
    +err_out_free_dev:
    + if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
    + pci_disable_msix(pdev);
    + if (mdev->mthca_flags & MTHCA_FLAG_MSI)
    + pci_disable_msi(pdev);
    +
    + ib_dealloc_device(&mdev->ib_dev);
    +
    +err_out_free_res:
    + mthca_release_regions(pdev, ddr_hidden);
    +
    +err_out_disable_pdev:
    + pci_disable_device(pdev);
    + pci_set_drvdata(pdev, NULL);
    + return err;
    +}
    +
    +static void __devexit mthca_remove_one(struct pci_dev *pdev)
    +{
    + struct mthca_dev *mdev = pci_get_drvdata(pdev);
    + u8 status;
    + int p;
    +
    + if (mdev) {
    + mthca_free_agents(mdev);
    + mthca_unregister_device(mdev);
    +
    + for (p = 1; p <= mdev->limits.num_ports; ++p)
    + mthca_CLOSE_IB(mdev, p, &status);
    +
    + mthca_cleanup_mcg_table(mdev);
    + mthca_cleanup_av_table(mdev);
    + mthca_cleanup_qp_table(mdev);
    + mthca_cleanup_cq_table(mdev);
    + mthca_cmd_use_polling(mdev);
    + mthca_cleanup_eq_table(mdev);
    +
    + mthca_pd_free(mdev, &mdev->driver_pd);
    +
    + mthca_cleanup_mr_table(mdev);
    + mthca_cleanup_pd_table(mdev);
    +
    + mthca_close_hca(mdev);
    +
    + iounmap(mdev->hcr);
    + iounmap(mdev->clr_base);
    +
    + if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
    + pci_disable_msix(pdev);
    + if (mdev->mthca_flags & MTHCA_FLAG_MSI)
    + pci_disable_msi(pdev);
    +
    + ib_dealloc_device(&mdev->ib_dev);
    + mthca_release_regions(pdev, mdev->mthca_flags &
    + MTHCA_FLAG_DDR_HIDDEN);
    + pci_disable_device(pdev);
    + pci_set_drvdata(pdev, NULL);
    + }
    +}
    +
    +static struct pci_device_id mthca_pci_table[] = {
    + { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
    + .driver_data = TAVOR },
    + { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_TAVOR),
    + .driver_data = TAVOR },
    + { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT),
    + .driver_data = ARBEL_COMPAT },
    + { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT),
    + .driver_data = ARBEL_COMPAT },
    + { PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_ARBEL),
    + .driver_data = ARBEL_NATIVE },
    + { PCI_DEVICE(PCI_VENDOR_ID_TOPSPIN, PCI_DEVICE_ID_MELLANOX_ARBEL),
    + .driver_data = ARBEL_NATIVE },
    + { 0, }
    +};
    +
    +MODULE_DEVICE_TABLE(pci, mthca_pci_table);
    +
    +static struct pci_driver mthca_driver = {
    + .name = "ib_mthca",
    + .id_table = mthca_pci_table,
    + .probe = mthca_init_one,
    + .remove = __devexit_p(mthca_remove_one)
    +};
    +
    +static int __init mthca_init(void)
    +{
    + int ret;
    +
    + /*
    + * TODO: measure whether dynamically choosing doorbell code at
    + * runtime affects our performance. Is there a "magic" way to
    + * choose without having to follow a function pointer every
    + * time we ring a doorbell?
    + */
    +#ifdef CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL
    + if (!cpu_has_xmm) {
    + printk(KERN_ERR PFX "mthca was compiled with SSE doorbell code, but\n");
    + printk(KERN_ERR PFX "the current CPU does not support SSE.\n");
    + printk(KERN_ERR PFX "Turn off CONFIG_INFINIBAND_MTHCA_SSE_DOORBELL "
    + "and recompile.\n");
    + return -ENODEV;
    + }
    +#endif
    +
    + ret = pci_register_driver(&mthca_driver);
    + return ret < 0 ? ret : 0;
    +}
    +
    +static void __exit mthca_cleanup(void)
    +{
    + pci_unregister_driver(&mthca_driver);
    +}
    +
    +module_init(mthca_init);
    +module_exit(mthca_cleanup);

    -
    To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
    the body of a message to majordomo@vger.kernel.org
    More majordomo info at http://vger.kernel.org/majordomo-info.html
    Please read the FAQ at http://www.tux.org/lkml/


  • Next message: Roland Dreier: "[PATCH][v4][10/24] Add Mellanox HCA low-level driver (midlayer interface)"

    Relevant Pages