[PATCH 06/13] [RFC] ipath LLD core, part 3



Last part of core driver

---

drivers/infiniband/hw/ipath/ipath_driver.c | 2380 ++++++++++++++++++++++++++++
1 files changed, 2380 insertions(+), 0 deletions(-)

f7ffc0cabd62be5e13ad84027d5712e6f92d9cc1
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 0dee4ce..87b6dae 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -4877,3 +4877,2383 @@ static int ipath_wait_intr(ipath_portdat
}
return 0;
}
+
+/*
+ * The new implementation as of Oct 2004 is that the driver assigns
+ * the tid and returns it to the caller. To make it easier to
+ * catch bugs, and to reduce search time, we keep a cursor for
+ * each port, walking the shadow tid array to find one that's not
+ * in use.
+ *
+ * For now, if we can't allocate the full list, we fail, although
+ * in the long run, we'll allocate as many as we can, and the
+ * caller will deal with that by trying the remaining pages later.
+ * That means that when we fail, we have to mark the tids as not in
+ * use again, in our shadow copy.
+ *
+ * It's up to the caller to free the tids when they are done.
+ * We'll unlock the pages as they free them.
+ *
+ * Also, right now we are locking one page at a time, but since
+ * the intended use of this routine is for a single group of
+ * virtually contiguous pages, that should change to improve
+ * performance.
+ */
+static int ipath_tid_update(ipath_portdata * pd, struct _tidupd *tidu)
+{
+ int ret = 0, ntids;
+ uint32_t tid, porttid, cnt, i, tidcnt;
+ struct _tidupd tu;
+ uint16_t *tidlist;
+ ipath_devdata *dd = &devdata[pd->port_unit];
+ uint64_t vaddr, physaddr, lenvalid;
+ volatile uint64_t *tidbase;
+ uint64_t tidmap[8];
+ struct page **pagep = NULL;
+
+ tu.tidcnt = 0; /* for early errors */
+ if (!dd->ipath_pageshadow) {
+ ret = -ENOMEM;
+ goto done;
+ }
+ if (copy_from_user(&tu, tidu, sizeof tu)) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ if (!(cnt = tu.tidcnt)) {
+ _IPATH_DBG("After copyin, tidcnt 0, tidlist %llx\n",
+ tu.tidlist);
+ /* or should we treat as success? likely a bug */
+ ret = -EFAULT;
+ goto done;
+ }
+ tidcnt = dd->ipath_rcvtidcnt;
+ if (cnt >= tidcnt) { /* make sure it all fits in port_tid_pg_list */
+ _IPATH_INFO
+ ("Process tried to allocate %u TIDs, only trying max (%u)\n",
+ cnt, tidcnt);
+ cnt = tidcnt;
+ }
+ pagep = (struct page **)pd->port_tid_pg_list;
+ tidlist = (uint16_t *) (&pagep[cnt]);
+
+ memset(tidmap, 0, sizeof(tidmap));
+ tid = pd->port_tidcursor;
+ /* before decrement; chip actual # */
+ porttid = pd->port_port * tidcnt;
+ ntids = tidcnt;
+ tidbase = (volatile uint64_t *)((volatile char *)
+ (devdata[pd->port_unit].
+ ipath_kregbase) +
+ devdata[pd->port_unit].
+ ipath_rcvtidbase +
+ porttid * sizeof(*tidbase));
+
+ _IPATH_VDBG("Port%u %u tids, cursor %u, tidbase %p\n", pd->port_port,
+ cnt, tid, tidbase);
+
+ vaddr = tu.tidvaddr; /* virtual address of first page in transfer */
+ if (!access_ok(VERIFY_WRITE, (void *)vaddr, cnt * PAGE_SIZE)) {
+ _IPATH_DBG("Fail vaddr %llx, %u pages, !access_ok\n",
+ vaddr, cnt);
+ ret = -EFAULT;
+ goto done;
+ }
+ if ((ret = ipath_mlock((unsigned long)vaddr, cnt, pagep))) {
+ if (ret == -EBUSY) {
+ _IPATH_DBG
+ ("Failed to lock addr %p, %u pages (already locked)\n",
+ (void *)vaddr, cnt);
+ /*
+ * for now, continue, and see what happens
+ * but with the new implementation, this should
+ * never happen, unless perhaps the user has
+ * mpin'ed the pages themselves (something we
+ * need to test)
+ */
+ ret = 0;
+ } else {
+ _IPATH_INFO
+ ("Failed to lock addr %p, %u pages: errno %d\n",
+ (void *)vaddr, cnt, -ret);
+ goto done;
+ }
+ }
+ for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
+ for (; ntids--; tid++) {
+ if (tid == tidcnt)
+ tid = 0;
+ if (!dd->ipath_pageshadow[porttid + tid])
+ break;
+ }
+ if (ntids < 0) {
+ /*
+ * oops, wrapped all the way through their TIDs,
+ * and didn't have enough free; see comments at
+ * start of routine
+ */
+ _IPATH_DBG
+ ("Not enough free TIDs for %u pages (index %d), failing\n",
+ cnt, i);
+ i--; /* last tidlist[i] not filled in */
+ ret = -ENOMEM;
+ break;
+ }
+ tidlist[i] = tid;
+ _IPATH_VDBG("Updating idx %u to TID %u, vaddr %llx\n",
+ i, tid, vaddr);
+ /* for now we "know" system pages and TID pages are same size */
+ /* for ipath_free_tid */
+ dd->ipath_pageshadow[porttid + tid] = pagep[i];
+ __set_bit(tid, tidmap); /* don't need atomic or it's overhead */
+ physaddr = page_to_phys(pagep[i]);
+ ipath_stats.sps_pagelocks++;
+ _IPATH_VDBG("TID %u, vaddr %llx, physaddr %llx pgp %p\n",
+ tid, vaddr, physaddr, pagep[i]);
+ /*
+ * in words (fixed, full page). could make less for very last
+ * page in transfer, but for now we won't worry about it.
+ */
+ lenvalid = PAGE_SIZE >> 2;
+ lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
+ physaddr |= lenvalid | INFINIPATH_RT_VALID;
+ ipath_kput_memq(pd->port_unit, &tidbase[tid], physaddr);
+ /*
+ * don't check this tid in ipath_portshadow, since we
+ * just filled it in; start with the next one.
+ */
+ tid++;
+ }
+
+ if (ret) {
+ uint32_t limit;
+ uint64_t tidval;
+ /*
+ * chip errata bug 7358, try to work around it by
+ * marking invalid tids as having max length
+ */
+ tidval =
+ (~0ULL & INFINIPATH_RT_BUFSIZE_MASK) <<
+ INFINIPATH_RT_BUFSIZE_SHIFT;
+ cleanup:
+ /* jump here if copy out of updated info failed... */
+ _IPATH_DBG("After failure (ret=%d), undo %d of %d entries\n",
+ -ret, i, cnt);
+ /* same code that's in ipath_free_tid() */
+ if ((limit = sizeof(tidmap) * _BITS_PER_BYTE) > tidcnt)
+ /* just in case size changes in future */
+ limit = tidcnt;
+ tid = find_first_bit((const unsigned long *)tidmap, limit);
+ /*
+ * chip errata bug 7358, try to work around it by
+ * marking invalid tids as having max length
+ */
+ tidval =
+ (~0ULL & INFINIPATH_RT_BUFSIZE_MASK) <<
+ INFINIPATH_RT_BUFSIZE_SHIFT;
+ for (; tid < limit; tid++) {
+ if (!test_bit(tid, tidmap))
+ continue;
+ if (dd->ipath_pageshadow[porttid + tid]) {
+ _IPATH_VDBG("Freeing TID %u\n", tid);
+ ipath_kput_memq(pd->port_unit, &tidbase[tid],
+ tidval);
+ dd->ipath_pageshadow[porttid + tid] = NULL;
+ ipath_stats.sps_pageunlocks++;
+ }
+ }
+ (void)ipath_munlock(cnt, pagep);
+ } else {
+ /*
+ * copy the updated array, with ipath_tid's filled in,
+ * back to user. Since we did the copy in already, this
+ * "should never fail"
+ * If it does, we have to clean up...
+ */
+ int r;
+ if ((r =
+ copy_to_user((void *)tu.tidlist, tidlist,
+ cnt * sizeof(*tidlist)))) {
+ _IPATH_DBG
+ ("Failed to copy out %d TIDs (%lx bytes) to %llx (ret %x)\n",
+ cnt, cnt * sizeof(*tidlist), tu.tidlist, r);
+ ret = -EFAULT;
+ goto cleanup;
+ }
+ if (copy_to_user((void *)tu.tidmap, tidmap, sizeof tidmap)) {
+ _IPATH_DBG("Failed to copy out TID map to %llx\n",
+ tu.tidmap);
+ ret = -EFAULT;
+ goto cleanup;
+ }
+ if (tid == tidcnt)
+ tid = 0;
+ pd->port_tidcursor = tid;
+ }
+
+done:
+ if (ret)
+ _IPATH_DBG
+ ("Failed to map %u TID pages, failing with %d, tidu %p\n",
+ tu.tidcnt, -ret, tidu);
+ return ret;
+}
+
+/*
+ * right now we are unlocking one page at a time, but since
+ * the intended use of this routine is for a single group of
+ * virtually contiguous pages, that should change to improve
+ * performance. We check that the TID is in range for this port
+ * but otherwise don't check validity; if user has an error and
+ * frees the wrong tid, it's only their own data that can thereby
+ * be corrupted. We do check that the TID was in use, for sanity
+ * We always use our idea of the saved address, not the address that
+ * they pass in to us.
+ */
+
+static int ipath_tid_free(ipath_portdata * pd, struct _tidupd *tidu)
+{
+ int ret = 0;
+ uint32_t tid, porttid, cnt, limit, tidcnt;
+ struct _tidupd tu;
+ ipath_devdata *dd = &devdata[pd->port_unit];
+ uint64_t *tidbase;
+ uint64_t tidmap[8];
+ uint64_t tidval;
+
+ tu.tidcnt = 0; /* for early errors */
+ if (!dd->ipath_pageshadow) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ if (copy_from_user(&tu, tidu, sizeof tu)) {
+ _IPATH_DBG("copy of tidupd structure failed\n");
+ ret = -EFAULT;
+ goto done;
+ }
+ if (copy_from_user(tidmap, (void *)tu.tidmap, sizeof tidmap)) {
+ _IPATH_DBG("copy of tidmap failed\n");
+ ret = -EFAULT;
+ goto done;
+ }
+
+ porttid = pd->port_port * dd->ipath_rcvtidcnt;
+ tidbase =
+ (uint64_t *) ((char *)(devdata[pd->port_unit].ipath_kregbase) +
+ devdata[pd->port_unit].ipath_rcvtidbase +
+ porttid * sizeof(*tidbase));
+
+ tidcnt = dd->ipath_rcvtidcnt;
+ if ((limit = sizeof(tidmap) * _BITS_PER_BYTE) > tidcnt)
+ limit = tidcnt; /* just in case size changes in future */
+ tid = find_first_bit((const unsigned long *)tidmap, limit);
+ _IPATH_VDBG
+ ("Port%u free %u tids; first bit (max=%d) set is %d, porttid %u\n",
+ pd->port_port, tu.tidcnt, limit, tid, porttid);
+ /*
+ * chip errata bug 7358, try to work around it by marking invalid
+ * tids as having max length
+ */
+ tidval =
+ (~0ULL & INFINIPATH_RT_BUFSIZE_MASK) << INFINIPATH_RT_BUFSIZE_SHIFT;
+ for (cnt = 0; tid < limit; tid++) {
+ /*
+ * small optimization; if we detect a run of 3 or so without
+ * any set, use find_first_bit again. That's mainly to
+ * accelerate the case where we wrapped, so we have some at
+ * the beginning, and some at the end, and a big gap
+ * in the middle.
+ */
+ if (!test_bit(tid, tidmap))
+ continue;
+ cnt++;
+ if (dd->ipath_pageshadow[porttid + tid]) {
+ _IPATH_VDBG("Freeing TID %u\n", tid);
+ ipath_kput_memq(pd->port_unit, &tidbase[tid], tidval);
+ ipath_munlock(1, &dd->ipath_pageshadow[porttid + tid]);
+ dd->ipath_pageshadow[porttid + tid] = NULL;
+ ipath_stats.sps_pageunlocks++;
+ } else
+ _IPATH_DBG("Unused tid %u, ignoring\n", tid);
+ }
+ if (cnt != tu.tidcnt)
+ _IPATH_DBG("passed in tidcnt %d, only %d bits set in map\n",
+ tu.tidcnt, cnt);
+done:
+ if (ret)
+ _IPATH_DBG("Failed to unmap %u TID pages, failing with %d\n",
+ tu.tidcnt, -ret);
+ return ret;
+}
+
+/* called from user init code, and also layered driver init */
+int ipath_setrcvhdrsize(const ipath_type mdev, unsigned rhdrsize)
+{
+ int ret = 0;
+ if (devdata[mdev].ipath_flags & IPATH_RCVHDRSZ_SET) {
+ if (devdata[mdev].ipath_rcvhdrsize != rhdrsize) {
+ _IPATH_INFO
+ ("Error: can't set protocol header size %u, already %u\n",
+ rhdrsize, devdata[mdev].ipath_rcvhdrsize);
+ ret = -EAGAIN;
+ } else
+ /* OK if set already, with same value, nothing to do */
+ _IPATH_VDBG("Reuse same protocol header size %u\n",
+ devdata[mdev].ipath_rcvhdrsize);
+ } else if (rhdrsize >
+ (devdata[mdev].ipath_rcvhdrentsize -
+ (sizeof(uint64_t) / sizeof(uint32_t)))) {
+ _IPATH_DBG
+ ("Error: can't set protocol header size %u (> max %u)\n",
+ rhdrsize,
+ devdata[mdev].ipath_rcvhdrentsize -
+ (uint32_t) (sizeof(uint64_t) / sizeof(uint32_t)));
+ ret = -EOVERFLOW;
+ } else {
+ devdata[mdev].ipath_flags |= IPATH_RCVHDRSZ_SET;
+ devdata[mdev].ipath_rcvhdrsize = rhdrsize;
+ ipath_kput_kreg(mdev, kr_rcvhdrsize,
+ devdata[mdev].ipath_rcvhdrsize);
+ _IPATH_VDBG("Set protocol header size to %u\n",
+ devdata[mdev].ipath_rcvhdrsize);
+ }
+ return ret;
+}
+
+/*
+ * find an available pio buffer, and do appropriate marking as busy, etc.
+ * returns buffer number if one found (>=0), negative number is error.
+ * Used by ipath_send_smapkt and ipath_layer_send
+ */
+int ipath_getpiobuf(int mdev)
+{
+ int i, j, starti, updated = 0;
+ unsigned piobcnt, iter;
+ unsigned long flags;
+ ipath_devdata *dd = &devdata[mdev];
+ uint64_t *shadow = dd->ipath_pioavailshadow;
+
+ piobcnt = (unsigned)dd->ipath_piobcnt;
+ starti = dd->ipath_lastport_piobuf;
+ iter = piobcnt - starti;
+ if (dd->ipath_upd_pio_shadow) {
+ /*
+ * minor optimization. If we had no buffers on last call,
+ * start out by doing the update; continue and do scan
+ * even if no buffers were updated, to be paranoid
+ */
+ ipath_update_pio_bufs(mdev);
+ /* we scanned here, don't do it at end of scan */
+ updated = 1;
+ i = starti;
+ } else
+ i = dd->ipath_lastpioindex;
+
+rescan:
+ /*
+ * while test_and_set_bit() is atomic,
+ * we do that and then the change_bit(), and the pair is not.
+ * See if this is the cause of the remaining armlaunch errors.
+ */
+ spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ for (j = 0; j < iter; j++, i++) {
+ if (i >= piobcnt)
+ i = starti;
+ /*
+ * To avoid bus lock overhead, we first find a candidate
+ * buffer, then do the test and set, and continue if
+ * that fails.
+ */
+ if (test_bit((2 * i) + 1, shadow) ||
+ test_and_set_bit((2 * i) + 1, shadow)) {
+ continue;
+ }
+ /* flip generation bit */
+ change_bit(2 * i, shadow);
+ break;
+ }
+ spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+
+ if (j == iter) {
+ /*
+ * first time through; shadow exhausted, but may be
+ * real buffers available, so go see; if any updated,
+ * rescan (once)
+ */
+ if (!updated) {
+ ipath_update_pio_bufs(mdev);
+ updated = 1;
+ i = starti;
+ goto rescan;
+ }
+ dd->ipath_upd_pio_shadow = 1;
+ /* not atomic, but if we lose one once in a while, that's OK */
+ ipath_stats.sps_nopiobufs++;
+ if (!(++dd->ipath_consec_nopiobuf % 100000)) {
+ _IPATH_DBG
+ ("%u pio sends with no bufavail; dmacopy: %llx %llx %llx %llx; shadow: %llx %llx %llx %llx\n",
+ dd->ipath_consec_nopiobuf,
+ dd->ipath_pioavailregs_dma[0],
+ dd->ipath_pioavailregs_dma[1],
+ dd->ipath_pioavailregs_dma[2],
+ dd->ipath_pioavailregs_dma[3],
+ shadow[0], shadow[1], shadow[2], shadow[3]);
+ /*
+ * 4 buffers per byte, 4 registers above, cover
+ * rest below
+ */
+ if (dd->ipath_piobcnt > (sizeof(shadow[0]) * 4 * 4))
+ _IPATH_DBG
+ ("2nd group: dmacopy: %llx %llx %llx %llx; shadow: %llx %llx %llx %llx\n",
+ dd->ipath_pioavailregs_dma[4],
+ dd->ipath_pioavailregs_dma[5],
+ dd->ipath_pioavailregs_dma[6],
+ dd->ipath_pioavailregs_dma[7],
+ shadow[4], shadow[5], shadow[6],
+ shadow[7]);
+ }
+ return -EBUSY;
+ }
+
+ if (updated && dd->ipath_layer.l_intr) {
+ /*
+ * ran out of bufs, now some (at least this one we just got)
+ * are now available, so tell the layered driver.
+ */
+ dd->ipath_layer.l_intr(mdev, IPATH_LAYER_INT_SEND_CONTINUE);
+ }
+
+ /*
+ * set next starting place. Since it's just an optimization,
+ * it doesn't matter who wins on this, so no locking
+ */
+ dd->ipath_lastpioindex = i + 1;
+ if(dd->ipath_upd_pio_shadow)
+ dd->ipath_upd_pio_shadow = 0;
+ if(dd->ipath_consec_nopiobuf)
+ dd->ipath_consec_nopiobuf = 0;
+ return i;
+}
+
+/*
+ * this is like ipath_getpiobuf(), except it just probes to see if a buffer
+ * is available. If it returns that there is one, it's not allocated,
+ * and so may not be available if caller tries to send.
+ * NOTE: This can be called from interrupt context by ipath_intr()
+ * and from non-interrupt context by layer_send_getpiobuf().
+ */
+int ipath_bufavail(int mdev)
+{
+ int i;
+ unsigned piobcnt;
+ uint64_t *shadow = devdata[mdev].ipath_pioavailshadow;
+
+ piobcnt = (unsigned)devdata[mdev].ipath_piobcnt;
+
+ for (i = devdata[mdev].ipath_lastport_piobuf; i < piobcnt; i++)
+ if (!test_bit((2 * i) + 1, shadow))
+ return 1;
+
+ /* if none, check for update and rescan if we updated */
+ ipath_update_pio_bufs(mdev);
+ for (i = devdata[mdev].ipath_lastport_piobuf; i < piobcnt; i++)
+ if (!test_bit((2 * i) + 1, shadow))
+ return 1;
+ _IPATH_PDBG("No bufs avail\n");
+ return 0;
+}
+
+/*
+ * This routine is no longer on any critical paths; it is used only
+ * for sending SMA packets, but that could change in the future, so it
+ * should be kept pretty tight, with anything that
+ * increases the cache footprint, adds branches, etc. carefully
+ * examined, and if needed only for unusual cases, should, be moved out to
+ * a separate routine, or out of the main execution path.
+ * Because it's currently sma only, there are no checks to see if the
+ * link is up; sma must be able to send in the not fully initialized state
+ */
+int ipath_send_smapkt(struct ipath_sendpkt * upkt)
+{
+ int i, ret = 0, whichpb;
+ uint32_t *piobuf, plen = 0, clen;
+ uint64_t pboff;
+ struct ipath_sendpkt kpkt;
+ struct ipath_iovec *iov = kpkt.sps_iov;
+ ipath_type t;
+
+ if (unlikely((copy_from_user(&kpkt, upkt, sizeof kpkt))))
+ ret = -EFAULT;
+ if (ret) {
+ _IPATH_VDBG("Send failed: error %d\n", -ret);
+ goto done;
+ }
+ t = kpkt.sps_flags;
+ if (t >= infinipath_max || !(devdata[t].ipath_flags & IPATH_PRESENT) ||
+ !devdata[t].ipath_kregbase) {
+ _IPATH_SMADBG("illegal unit %u for sma send\n", t);
+ return -ENODEV;
+ }
+ if (!(devdata[t].ipath_flags & IPATH_INITTED)) {
+ /* no hardware, freeze, etc. */
+ _IPATH_SMADBG("unit %u not usable\n", t);
+ return -ENODEV;
+ }
+
+ /* need total length before first word written */
+ plen = sizeof(uint32_t); /* +1 word is for the qword padding */
+ for (i = 0; i < kpkt.sps_cnt; i++)
+ /* each must be dword multiple */
+ plen += kpkt.sps_iov[i].iov_len;
+
+ if ((plen + 4) > devdata[t].ipath_ibmaxlen) {
+ _IPATH_DBG("Pkt len 0x%x > ibmaxlen %x!\n", plen - 4,
+ devdata[t].ipath_ibmaxlen);
+ ret = -EINVAL;
+ goto done; /* before writing pbc */
+ }
+ plen >>= 2; /* in words */
+
+ whichpb = ipath_getpiobuf(t);
+ if (whichpb < 0) {
+ ret = whichpb;
+ devdata[t].ipath_nosma_bufs++;
+ _IPATH_SMADBG("No PIO buffers available unit %u %u times\n",
+ t, devdata[t].ipath_nosma_bufs);
+ goto done;
+ }
+ if(devdata[t].ipath_nosma_bufs) {
+ _IPATH_SMADBG(
+ "Unit %u got SMA send buffer after %u failures, %u seconds\n",
+ t, devdata[t].ipath_nosma_bufs, devdata[t].ipath_nosma_secs);
+ devdata[t].ipath_nosma_bufs = 0;
+ devdata[t].ipath_nosma_secs = 0;
+ }
+ if((devdata[t].ipath_lastibcstat & 0x11) != 0x11 &&
+ (devdata[t].ipath_lastibcstat & 0x21) != 0x21) {
+ /* we need to be at least at INIT for SMA packets to go out. If we
+ * aren't, something has gone wrong, and SMA hasn't noticed.
+ * Therefore we'll try to go to INIT here, in hopes of fixing up the
+ * problem. First we verify that indeed the state is still "bad"
+ * (that is, that lastibcstat * isn't "stale") */
+ uint64_t val;
+ val = ipath_kget_kreg64(t, kr_ibcstatus);
+ if((val & 0x11) != 0x11 && (val & 0x21) != 0x21) {
+ _IPATH_SMADBG("Invalid Link state 0x%llx unit %u for send, try INIT\n",
+ val, t);
+ ipath_set_ib_lstate(t, INFINIPATH_IBCC_LINKCMD_INIT);
+ val = ipath_kget_kreg64(t, kr_ibcstatus);
+ if((val & 0x11) != 0x11 && (val & 0x21) != 0x21)
+ _IPATH_SMADBG("Link state still not OK unit %u (0x%llx) after INIT\n",
+ t, val);
+ else
+ _IPATH_SMADBG("Link state OK unit %u (0x%llx) after INIT\n",
+ t, val);
+ }
+ /* and continue, regardless */
+ }
+
+ pboff = devdata[t].ipath_piobufbase;
+ piobuf = (uint32_t *) (((char *)(devdata[t].ipath_kregbase)) + pboff
+ + whichpb * devdata[t].ipath_palign);
+
+ if(infinipath_debug & __IPATH_PKTDBG) // SMA and PKT, both
+ _IPATH_SMADBG("unit %u 0x%x+1w pio%d, (scnt %d)\n",
+ t, plen - 1, whichpb, kpkt.sps_cnt);
+
+ ret = 0;
+ clen = 2; /* size of the pbc */
+ {
+ /*
+ * If this code ever gets used for anything performance
+ * oriented, or that isn't inherently single-threaded,
+ * then I need to implement the original idea of our
+ * own equivalent of copy_from_user that uses only dword
+ * or qword copies. copy_from_user() can use byte copies,
+ * and that is a problem for our chip.
+ */
+ static uint32_t tmpbuf[2176 / sizeof(uint32_t)];
+ *(uint64_t *) tmpbuf = (uint64_t) plen;
+ for (i = 0; i < kpkt.sps_cnt; i++) {
+ if (unlikely
+ (copy_from_user
+ (tmpbuf + clen, (void *)iov->iov_base,
+ iov->iov_len)))
+ ret = -EFAULT; /* no break */
+ clen += iov->iov_len >> 2;
+ iov++;
+ }
+ ipath_dwordcpy(piobuf, tmpbuf, clen);
+ }
+
+ /* flush the packet out now, don't leave it waiting around */
+ mb();
+
+ if (ret) {
+ /*
+ * Packet is bad, so we need to use the PIO abort mechanism to
+ * abort the packet
+ */
+ uint32_t sendctrl;
+ sendctrl = devdata[t].ipath_sendctrl | INFINIPATH_S_DISARM |
+ (whichpb << INFINIPATH_S_DISARMPIOBUF_SHIFT);
+ _IPATH_DBG("Doing PIO abort on buffer %u after error\n",
+ whichpb);
+ ipath_kput_kreg(t, kr_sendctrl, sendctrl);
+ }
+
+done:
+ return ret;
+}
+
+/*
+ * implemention of the ioctl to get the counter values from the chip
+ * For the time being, we get all of them when asked, no shadowing.
+ * We need to shadow the byte counters at a minimum, because otherwise
+ * they will wrap in just a few seconds at full bandwidth
+ * The second argument is the user address to which we do the copy_to_user()
+ */
+static int ipath_get_counters(ipath_type t,
+ struct infinipath_counters * ucounters)
+{
+ int ret = 0;
+ uint64_t val;
+ uint64_t *ucreg;
+ uint16_t vcreg;
+
+ ucreg = (uint64_t *) ucounters;
+ /*
+ * for now, let's do this one at a time. It's not the most
+ * optimal method, but it is simple, and has no intermediate
+ * memory requirements.
+ */
+ for (vcreg = 0;
+ vcreg < (sizeof(struct infinipath_counters) / sizeof(val));
+ vcreg++, ucreg++) {
+ ipath_creg creg = vcreg;
+ val = ipath_snap_cntr(t, creg);
+ if ((ret = copy_to_user(ucreg, &val, sizeof(val)))) {
+ _IPATH_DBG("copy_to_user error on counter %d\n", creg);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * implemention of the ioctl to get the stats values from the driver
+ * The argument is the user address to which we do the copy_to_user()
+ */
+static int ipath_get_stats(struct infinipath_stats *ustats)
+{
+ int ret = 0;
+
+ if ((ret = copy_to_user(ustats, &ipath_stats, sizeof(ipath_stats))))
+ _IPATH_DBG("copy_to_user error on driver stats\n");
+
+ return ret;
+}
+
+/* set a partition key. We can have up to 4 active at a time (other than
+ * the default, which is always allowed). This is somewhat tricky, since
+ * multiple ports may set the same key, so we reference count them, and
+ * clean up at exit. All 4 partition keys are packed into a single
+ * infinipath register. It's an error for a process to set the same
+ * pkey multiple times. We provide no mechanism to de-allocate a pkey
+ * at this time, we may eventually need to do that.
+ * I've used the atomic operations, and no locking, and only make a single
+ * pass through what's available. This should be more than adequate for
+ * some time. I'll think about spinlocks or the like if and as it's necessary
+ */
+static int ipath_set_partkey(ipath_portdata *pd, uint16_t key)
+{
+ ipath_devdata *dd;
+ int i, any = 0, pidx = -1;
+ uint16_t lkey = key & 0x7FFF;
+
+ dd = &devdata[pd->port_unit];
+
+ if (lkey == (IPS_DEFAULT_P_KEY & 0x7FFF)) {
+ /* nothing to do; this key always valid */
+ return 0;
+ }
+
+ _IPATH_VDBG
+ ("p%u try to set pkey %hx, current keys %hx:%x %hx:%x %hx:%x %hx:%x\n",
+ pd->port_port, key, dd->ipath_pkeys[0],
+ atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
+ atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
+ atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
+ atomic_read(&dd->ipath_pkeyrefs[3]));
+
+ if (!lkey) {
+ _IPATH_PRDBG("p%u tries to set key 0, not allowed\n",
+ pd->port_port);
+ return -EINVAL;
+ }
+
+ /*
+ * Set the full membership bit, because it has to be
+ * set in the register or the packet, and it seems
+ * cleaner to set in the register than to force all
+ * callers to set it. (see bug 4331)
+ */
+ key |= 0x8000;
+
+ for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
+ if (!pd->port_pkeys[i] && pidx == -1)
+ pidx = i;
+ if (pd->port_pkeys[i] == key) {
+ _IPATH_VDBG
+ ("p%u tries to set same pkey (%x) more than once\n",
+ pd->port_port, key);
+ return -EEXIST;
+ }
+ }
+ if (pidx == -1) {
+ _IPATH_DBG
+ ("All pkeys for port %u already in use, can't set %x\n",
+ pd->port_port, key);
+ return -EBUSY;
+ }
+ for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (!dd->ipath_pkeys[i]) {
+ any++;
+ continue;
+ }
+ if (dd->ipath_pkeys[i] == key) {
+ if (atomic_inc_return(&dd->ipath_pkeyrefs[i]) > 1) {
+ pd->port_pkeys[pidx] = key;
+ _IPATH_VDBG
+ ("p%u set key %x matches #%d, count now %d\n",
+ pd->port_port, key, i,
+ atomic_read(&dd->ipath_pkeyrefs[i]));
+ return 0;
+ } else {
+ /* lost race, decrement count, catch below */
+ atomic_dec(&dd->ipath_pkeyrefs[i]);
+ _IPATH_VDBG
+ ("Lost race, count was 0, after dec, it's %d\n",
+ atomic_read(&dd->ipath_pkeyrefs[i]));
+ any++;
+ }
+ }
+ if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
+ /*
+ * It makes no sense to have both the limited and full
+ * membership PKEY set at the same time since the
+ * unlimited one will disable the limited one.
+ */
+ return -EEXIST;
+ }
+ }
+ if (!any) {
+ _IPATH_DBG
+ ("port %u, all pkeys already in use, can't set %x\n",
+ pd->port_port, key);
+ return -EBUSY;
+ }
+ for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
+ if (!dd->ipath_pkeys[i] &&
+ atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
+ uint64_t pkey;
+
+ /* for ipathstats, etc. */
+ ipath_stats.sps_pkeys[i] = lkey;
+ pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
+ pkey =
+ (uint64_t) dd->ipath_pkeys[0] |
+ ((uint64_t) dd->ipath_pkeys[1] << 16) |
+ ((uint64_t) dd->ipath_pkeys[2] << 32) |
+ ((uint64_t) dd->ipath_pkeys[3] << 48);
+ _IPATH_PRDBG
+ ("p%u set key %x in #%d, portidx %d, new pkey reg %llx\n",
+ pd->port_port, key, i, pidx, pkey);
+ ipath_kput_kreg(pd->port_unit, kr_partitionkey, pkey);
+
+ return 0;
+ }
+ }
+ _IPATH_DBG
+ ("port %u, all pkeys already in use 2nd pass, can't set %x\n",
+ pd->port_port, key);
+ return -EBUSY;
+}
+
+/*
+ * stop_start == 0 disables receive on the port, for use in queue overflow
+ * conditions. stop_start==1 re-enables, and returns value of tail register,
+ * to be used to re-init the software copy of the head register
+ */
+
+static int ipath_manage_rcvq(ipath_portdata * pd, uint16_t start_stop)
+{
+ ipath_devdata *dd;
+ /*
+ * This needs to be volatile, so that the compiler doesn't
+ * optimize away the read to the device's mapped memory.
+ */
+ volatile uint64_t tval;
+
+ dd = &devdata[pd->port_unit];
+ _IPATH_PRDBG("%sabling rcv for unit %u port %u\n",
+ start_stop ? "en" : "dis", pd->port_unit, pd->port_port);
+ /* atomically clear receive enable port. */
+ if (start_stop) {
+ /*
+ * on enable, force in-memory copy of the tail register
+ * to 0, so that protocol code doesn't have to worry
+ * about whether or not the chip has yet updated
+ * the in-memory copy or not on return from the system
+ * call. The chip always resets it's tail register back
+ * to 0 on a transition from disabled to enabled.
+ * This could cause a problem if software was broken,
+ * and did the enable w/o the disable, but eventually
+ * the in-memory copy will be updated and correct
+ * itself, even in the face of software bugs.
+ */
+ *pd->port_rcvhdrtail_kvaddr = 0;
+ atomic_set_mask(1U <<
+ (INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port),
+ &dd->ipath_rcvctrl);
+ } else
+ atomic_clear_mask(1U <<
+ (INFINIPATH_R_PORTENABLE_SHIFT +
+ pd->port_port), &dd->ipath_rcvctrl);
+ ipath_kput_kreg(pd->port_unit, kr_rcvctrl, dd->ipath_rcvctrl);
+ /* now be sure chip saw it before we return */
+ tval = ipath_kget_kreg64(pd->port_unit, kr_scratch);
+ if (start_stop) {
+ /*
+ * and try to be sure that tail reg update has happened
+ * too. This should in theory interlock with the RXE
+ * changes to the tail register. Don't assign it to
+ * the tail register in memory copy, since we could
+ * overwrite an update by the chip if we did.
+ */
+ tval =
+ ipath_kget_ureg32(pd->port_unit, ur_rcvhdrtail,
+ pd->port_port);
+ }
+ /* always; new head should be equal to new tail; see above */
+ return 0;
+}
+
+/*
+ * This routine is now quite different for user and kernel, because
+ * the kernel uses skb's, for the accelerated network performance
+ * This is the user port version
+ *
+ * allocate the eager TID buffers and program them into infinipath
+ * They are no longer completely contiguous, we do multiple
+ * alloc_pages() calls.
+ */
+static int ipath_create_user_egr(ipath_portdata * pd)
+{
+ char *buf;
+ ipath_devdata *dd = &devdata[pd->port_unit];
+ uint64_t *egrbase, egroff, lenvalid;
+ unsigned e, egrcnt, alloced, order, egrperchunk, chunk;
+ unsigned long pa, pent;
+
+ egrcnt = dd->ipath_rcvegrcnt;
+ egroff =
+ dd->ipath_rcvegrbase + pd->port_port * egrcnt * sizeof(*egrbase);
+ egrbase = (uint64_t *) ((char *)(dd->ipath_kregbase) + egroff);
+ _IPATH_VDBG("Allocating %d egr buffers, at chip offset %llx (%p)\n",
+ egrcnt, egroff, egrbase);
+
+ /*
+ * to avoid wasting a lot of memory, we allocate 32KB chunks of
+ * physically contiguous memory, advance through it until used up
+ * and then allocate more. Of course, we need memory to store
+ * those extra pointers, now. Started out with 256KB, but under
+ * heavy memory pressure (creating large files and then copying
+ * them over NFS while doing lots of MPI jobs), we hit some
+ * alloc_pages() failures, even though we can sleep... (2.6.10)
+ * Still get failures at 64K. 32K is the lowest we can go without
+ * waiting more memory again. It seems likely that the coalescing
+ * in free_pages, etc. still has issues (as it has had previously
+ * during 2.6.x development).
+ */
+ order = get_order(0x8000);
+ alloced =
+ round_up(dd->ipath_rcvegrbufsize * egrcnt,
+ (1 << order) * PAGE_SIZE);
+ egrperchunk = ((1 << order) * PAGE_SIZE) / dd->ipath_rcvegrbufsize;
+ chunk = (egrcnt + egrperchunk - 1) / egrperchunk;
+ pd->port_rcvegrbuf_chunks = chunk;
+ pd->port_rcvegrbufs_perchunk = egrperchunk;
+ pd->port_rcvegrbuf_order = order;
+ pd->port_rcvegrbuf_pages =
+ vmalloc(chunk * sizeof(pd->port_rcvegrbuf_pages[0]));
+ pd->port_rcvegrbuf_virt =
+ vmalloc(chunk * sizeof(pd->port_rcvegrbuf_virt[0]));
+ if (!pd->port_rcvegrbuf_pages || !pd->port_rcvegrbuf_pages) {
+ _IPATH_UNIT_ERROR(pd->port_unit,
+ "Unable to allocate %u EGR buffer array pointers\n",
+ chunk);
+ if (pd->port_rcvegrbuf_pages) {
+ vfree(pd->port_rcvegrbuf_pages);
+ pd->port_rcvegrbuf_pages = NULL;
+ }
+ return -ENOMEM;
+ }
+ for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
+ /*
+ * GFP_USER, but without GFP_FS, so buffer cache can
+ * be coalesced (we hope); otherwise, even at order 4, heavy
+ * filesystem activity makes these fail
+ */
+ if (!
+ (pd->port_rcvegrbuf_pages[e] =
+ alloc_pages(__GFP_WAIT | __GFP_IO, order))) {
+ _IPATH_UNIT_ERROR(pd->port_unit,
+ "Unable to allocate EGR buffer array %u/%u\n",
+ e, pd->port_rcvegrbuf_chunks);
+ vfree(pd->port_rcvegrbuf_pages);
+ pd->port_rcvegrbuf_pages = NULL;
+ vfree(pd->port_rcvegrbuf_virt);
+ pd->port_rcvegrbuf_virt = NULL;
+ return -ENOMEM;
+ }
+ }
+
+ /*
+ * calculate physical, then phys_to_virt()
+ * so that we get an address that fits in 64 bits, so we can use
+ * mmap64 from 32 bit programs on the chip and kernel virtual
+ * addresses (mmap64 for 32 bit programs on i386 and x86_64
+ * only has 44 bits of address, because it uses mmap2())
+ * We do this with the first chunk; We don't need a kernel
+ * virtually contiguous address to give the user virtually
+ * contiguous mappings. It just complicates the nopage routine
+ * a little tiny bit ;)
+ */
+ buf = page_address(pd->port_rcvegrbuf_pages[0]);
+ pa = virt_to_phys(buf);
+ pd->port_rcvegr_phys = pa;
+
+ /* in words */
+ lenvalid = (dd->ipath_rcvegrbufsize - pd->port_egrskip) >> 2;
+ _IPATH_VDBG
+ ("port%u egrbuf vaddr %p, cpu %d, egrskip %u, len %llx words\n",
+ pd->port_port, buf, smp_processor_id(), pd->port_egrskip,
+ lenvalid);
+ lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
+ lenvalid |= INFINIPATH_RT_VALID;
+
+ for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
+ int i, n;
+ struct page *p;
+ p = pd->port_rcvegrbuf_pages[chunk];
+ pa = page_to_phys(p);
+ buf = page_address(p);
+ /*
+ * stash away for later use, since page_address() lookup
+ * is not cheap
+ */
+ pd->port_rcvegrbuf_virt[chunk] = buf;
+ if (pa & ~INFINIPATH_RT_ADDR_MASK)
+ _IPATH_INFO
+ ("physaddr %lx has more than 40 bits, using only 40!\n",
+ pa);
+ n = 1 << pd->port_rcvegrbuf_order;
+ for (i = 0; i < n; i++)
+ SetPageReserved(virt_to_page(buf + (i * PAGE_SIZE)));
+
+ /* clear buffer for security, sanity, and, debugging */
+ memset(buf, 0, PAGE_SIZE * n);
+
+ for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
+ pent =
+ ((pa +
+ pd->
+ port_egrskip) & INFINIPATH_RT_ADDR_MASK) |
+ lenvalid;
+
+ ipath_kput_memq(pd->port_unit, &egrbase[e], pent);
+ _IPATH_VDBG("egr %u phys %lx val %lx\n", e, pa, pent);
+ pa += dd->ipath_rcvegrbufsize;
+ }
+ yield(); /* don't hog the cpu */
+ }
+
+ return 0;
+}
+
+/*
+ * This routine is now quite different for user and kernel, because
+ * the kernel uses skb's, for the accelerated network performance
+ * This is the kernel (port0) version
+ *
+ * Allocate the eager TID buffers and program them into infinipath.
+ * We use the network layer alloc_skb() allocator to allocate the memory, and
+ * either use the buffers as is for things like SMA packets, or pass
+ * the buffers up to the ipath layered driver and thence the network layer,
+ * replacing them as we do so (see ipath_kreceive())
+ */
+static int ipath_create_port0_egr(ipath_portdata * pd)
+{
+ int ret = 0;
+ uint64_t *egrbase, egroff;
+ unsigned e, egrcnt;
+ ipath_devdata *dd;
+ struct sk_buff **skbs;
+
+ dd = &devdata[pd->port_unit];
+ egrcnt = dd->ipath_rcvegrcnt;
+ egroff =
+ dd->ipath_rcvegrbase + pd->port_port * egrcnt * sizeof(*egrbase);
+ egrbase = (uint64_t *) ((char *)(dd->ipath_kregbase) + egroff);
+ _IPATH_VDBG
+ ("unit%u Allocating %d egr buffers, at chip offset %llx (%p)\n",
+ pd->port_unit, egrcnt, egroff, egrbase);
+
+ skbs = vmalloc(sizeof(*dd->ipath_port0_skbs) * egrcnt);
+ if (skbs == NULL)
+ ret = -ENOMEM;
+ else {
+ for (e = 0; e < egrcnt; e++) {
+ /*
+ * This is a bit tricky in that we allocate
+ * extra space for 2 bytes of the 14 byte
+ * ethernet header. These two bytes are passed
+ * in the ipath header so the rest of the data
+ * is word aligned. We allocate 4 bytes so that the
+ * data buffer stays word aligned.
+ * See ipath_kreceive() for more details.
+ */
+ skbs[e] =
+ __dev_alloc_skb(dd->ipath_ibmaxlen + 4, GFP_KERNEL);
+ if (skbs[e] == NULL) {
+ _IPATH_UNIT_ERROR(pd->port_unit,
+ "SKB allocation error for eager TID %u\n",
+ e);
+ while (e != 0)
+ dev_kfree_skb(skbs[--e]);
+ ret = -ENOMEM;
+ break;
+ }
+ skb_reserve(skbs[e], 4);
+ }
+ }
+ /*
+ * after loop above, so we can test non-NULL
+ * to see if ready to use at receive, etc. Hope this fixes some
+ * panics.
+ */
+ dd->ipath_port0_skbs = skbs;
+
+ /*
+ * have to tell chip each time we init it
+ * even if we are re-using previous memory.
+ */
+ if (!ret) {
+ uint64_t lenvalid; /* in words */
+
+ lenvalid = (dd->ipath_ibmaxlen - pd->port_egrskip) >> 2;
+ lenvalid <<= INFINIPATH_RT_BUFSIZE_SHIFT;
+ lenvalid |= INFINIPATH_RT_VALID;
+ for (e = 0; e < egrcnt; e++) {
+ unsigned long pa, pent;
+
+ pa = virt_to_phys(dd->ipath_port0_skbs[e]->data);
+ pa += pd->port_egrskip;
+ if (!e && (pa & ~INFINIPATH_RT_ADDR_MASK))
+ _IPATH_INFO
+ ("phys addr %lx has more than 40 bits, using only 40!!!\n",
+ pa);
+ pent = (pa & INFINIPATH_RT_ADDR_MASK) | lenvalid;
+ /*
+ * don't need this except extreme debugging,
+ * but leaving to save future typing.
+ * _IPATH_VDBG("egr[%d] %p <- %lx\n", e, &egrbase[e], pent);
+ */
+ ipath_kput_memq(pd->port_unit, &egrbase[e], pent);
+ }
+ yield(); /* don't hog the cpu */
+ }
+
+ return ret;
+}
+
+/*
+ * this *must* be physically contiguous memory, and for now,
+ * that limits it to what kmalloc can do.
+ */
+static int ipath_create_rcvhdrq(ipath_portdata * pd)
+{
+ int i, ret = 0, amt, order, pgs;
+ char *qt;
+ struct page *p;
+ unsigned long pa, pa0;
+
+ amt = round_up(devdata[pd->port_unit].ipath_rcvhdrcnt
+ * devdata[pd->port_unit].ipath_rcvhdrentsize *
+ sizeof(uint32_t), PAGE_SIZE);
+ if (!pd->port_rcvhdrq) {
+ order = get_order(amt);
+ /*
+ * not using REPEAT isn't viable; at 128KB, we can easily fail
+ * this. The problem with REPEAT is we can block here
+ * "forever". There isn't an inbetween, unfortunately.
+ * We could reduce the risk by never freeing the rcvhdrq
+ * except at unload, but even then, the first time a
+ * port is used, we could delay for some time...
+ */
+ p = alloc_pages(GFP_USER, order);
+ if (!p) {
+ _IPATH_UNIT_ERROR(pd->port_unit,
+ "attempt to allocate order %u memory for port %u rcvhdrq failed\n",
+ order, pd->port_port);
+ return -ENOMEM;
+ }
+
+ /*
+ * should use kmap (and later kunmap), even though high mem will
+ * always be mapped on x86_64, to play it safe, but for some
+ * bizarre reason these aren't exported symbols...
+ */
+ pd->port_rcvhdrq = page_address(p);
+ if (!virt_addr_valid(pd->port_rcvhdrq)) {
+ _IPATH_DBG
+ ("weird, virt_addr_valid false right after alloc_pages\n");
+ _IPATH_DBG("__pa(%p) is %lx, num_physpages %lx\n",
+ pd->port_rcvhdrq, __pa(pd->port_rcvhdrq),
+ num_physpages);
+ }
+ pd->port_rcvhdrq_phys = virt_to_phys(pd->port_rcvhdrq);
+ pd->port_rcvhdrq_order = order;
+
+ pa0 = pd->port_rcvhdrq_phys;
+ pgs = amt >> PAGE_SHIFT;
+ _IPATH_VDBG
+ ("%d pages at %p (phys %lx) order=%u for port %u rcvhdr Q\n",
+ pgs, pd->port_rcvhdrq, pa0, pd->port_rcvhdrq_order,
+ pd->port_port);
+
+ /*
+ * verify it's really physically contiguous, to be paranoid
+ * also mark pages as reserved, to avoid problems when
+ * user process with them mapped then exits.
+ */
+ qt = pd->port_rcvhdrq;
+ SetPageReserved(virt_to_page(qt));
+ qt += PAGE_SIZE;
+ for (pa = pa0, i = 1; i < pgs; i++, qt += PAGE_SIZE) {
+ SetPageReserved(virt_to_page(qt));
+ pa = virt_to_phys(qt);
+ if (pa != (pa0 + (i * PAGE_SIZE)))
+ _IPATH_INFO
+ ("pg %d at %p phys %lx not contiguous\n", i,
+ qt, pa);
+ else
+ _IPATH_VDBG("pg %d at %p phys %lx\n", i, qt,
+ pa);
+ }
+ }
+
+ /*
+ * clear for security, sanity, and/or debugging (each time we
+ * use/reuse)
+ */
+ memset(pd->port_rcvhdrq, 0, amt);
+
+ /*
+ * tell chip each time we init it, even if we are re-using previous
+ * memory (we zero it at process close)
+ */
+ _IPATH_VDBG("writing port %d rcvhdraddr as %lx\n", pd->port_port,
+ pd->port_rcvhdrq_phys);
+ ipath_kput_kreg_port(pd->port_unit, kr_rcvhdraddr, pd->port_port,
+ pd->port_rcvhdrq_phys);
+
+ return ret;
+}
+
+#ifdef _IPATH_EXTRA_DEBUG
+/*
+ * occasionally useful to dump the full set of kernel registers for debugging.
+ */
+static void ipath_dump_allregs(char *what, ipath_type t)
+{
+ uint16_t reg;
+ _IPATH_DBG("%s\n", what);
+ for (reg = 0; reg <= 0x100; reg++) {
+ uint64_t v = ipath_kget_kreg64(t, reg);
+ if (!(reg % 4))
+ printk("\n%3x: ", reg);
+ printk("%16llx ", v);
+ }
+ printk("\n");
+}
+#endif /* _IPATH_EXTRA_DEBUG */
+
+/*
+ * Do the actual initialization sequence on the chip. For the real
+ * hardware, this is done from the init routine called from the PCI
+ * infrastructure.
+ */
+int ipath_init_chip(const ipath_type t)
+{
+ int ret = 0, i;
+ uint32_t val32, kpiobufs;
+ uint64_t val, atmp;
+ volatile uint32_t *piobuf;
+ uint32_t pioincr;
+ ipath_devdata *dd = &devdata[t];
+ ipath_portdata *pd;
+ struct page *vpage;
+ char boardn[32];
+
+ /* first time only, set after static version info */
+ if (!chip_driver_version) {
+ i = strlen(ipath_core_version);
+ chip_driver_version = ipath_core_version + i;
+ chip_driver_size = sizeof ipath_core_version - i;
+ }
+
+ /*
+ * have to clear shadow copies of registers at init that are not
+ * otherwise set here, or all kinds of bizarre things happen with
+ * driver on chip reset
+ */
+ dd->ipath_rcvhdrsize = 0;
+
+ /*
+ * don't clear ipath_flags as 8bit mode was set before entering
+ * this func. However, we do set the linkstate to unknown
+ */
+
+ /* so we can watch for a transition */
+ dd->ipath_flags |= IPATH_LINKUNK;
+ dd->ipath_flags &= ~(IPATH_LINKACTIVE | IPATH_LINKARMED | IPATH_LINKDOWN
+ | IPATH_LINKINIT);
+
+ _IPATH_VDBG("Try to read spc chip revision\n");
+ dd->ipath_revision = ipath_kget_kreg64(t, kr_revision);
+
+ /*
+ * set up fundamental info we need to use the chip; we assume if
+ * the revision reg and these regs are OK, we don't need to special
+ * case the rest
+ */
+ dd->ipath_sregbase = ipath_kget_kreg32(t, kr_sendregbase);
+ dd->ipath_cregbase = ipath_kget_kreg32(t, kr_counterregbase);
+ dd->ipath_uregbase = ipath_kget_kreg32(t, kr_userregbase);
+ _IPATH_VDBG("ipath_kregbase %p, sendbase %x usrbase %x, cntrbase %x\n",
+ dd->ipath_kregbase, dd->ipath_sregbase, dd->ipath_uregbase,
+ dd->ipath_cregbase);
+ if ((dd->ipath_revision & 0xffffffff) == 0xffffffff ||
+ (dd->ipath_sregbase & 0xffffffff) == 0xffffffff ||
+ (dd->ipath_cregbase & 0xffffffff) == 0xffffffff ||
+ (dd->ipath_uregbase & 0xffffffff) == 0xffffffff) {
+ _IPATH_UNIT_ERROR(t,
+ "Register read failures from chip, giving up initialization\n");
+ ret = -ENODEV;
+ goto done;
+ }
+
+ /* clear the initial reset flag, in case first driver load */
+ ipath_kput_kreg(t, kr_errorclear, INFINIPATH_E_RESET);
+
+ dd->ipath_portcnt = ipath_kget_kreg32(t, kr_portcnt);
+ if (!infinipath_cfgports)
+ dd->ipath_cfgports = dd->ipath_portcnt;
+ else if (infinipath_cfgports <= dd->ipath_portcnt) {
+ dd->ipath_cfgports = infinipath_cfgports;
+ _IPATH_DBG("Configured to use %u ports out of %u in chip\n",
+ dd->ipath_cfgports, dd->ipath_portcnt);
+ } else {
+ dd->ipath_cfgports = dd->ipath_portcnt;
+ _IPATH_DBG
+ ("Tried to configured to use %u ports; chip only supports %u\n",
+ infinipath_cfgports, dd->ipath_portcnt);
+ }
+ dd->ipath_pd = kmalloc(sizeof(*dd->ipath_pd) * dd->ipath_cfgports,
+ GFP_KERNEL);
+ if (!dd->ipath_pd) {
+ _IPATH_UNIT_ERROR(t,
+ "Unable to allocate portdata array, failing\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+ memset(dd->ipath_pd, 0, sizeof(*dd->ipath_pd) * dd->ipath_cfgports);
+
+ dd->ipath_lastegrheads = kmalloc(sizeof(*dd->ipath_lastegrheads)
+ * dd->ipath_cfgports, GFP_KERNEL);
+ dd->ipath_lastrcvhdrqtails = kmalloc(sizeof(*dd->ipath_lastrcvhdrqtails)
+ * dd->ipath_cfgports, GFP_KERNEL);
+ if (!dd->ipath_lastegrheads || !dd->ipath_lastrcvhdrqtails) {
+ _IPATH_UNIT_ERROR(t,
+ "Unable to allocate head arrays, failing\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+ memset(dd->ipath_lastrcvhdrqtails, 0,
+ sizeof(*dd->ipath_lastrcvhdrqtails)
+ * dd->ipath_cfgports);
+ memset(dd->ipath_lastegrheads, 0, sizeof(*dd->ipath_lastegrheads)
+ * dd->ipath_cfgports);
+
+ dd->ipath_pd[0] = kmalloc(sizeof(ipath_portdata), GFP_KERNEL);
+ if (!dd->ipath_pd[0]) {
+ _IPATH_UNIT_ERROR(t,
+ "Unable to allocate portdata for port 0, failing\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+ memset(dd->ipath_pd[0], 0, sizeof(ipath_portdata));
+
+ pd = dd->ipath_pd[0];
+ pd->port_unit = t;
+ pd->port_port = 0;
+ pd->port_cnt = 1;
+ /* The port 0 pkey table is used by the layer interface. */
+ pd->port_pkeys[0] = IPS_DEFAULT_P_KEY;
+
+ dd->ipath_rcvtidcnt = ipath_kget_kreg32(t, kr_rcvtidcnt);
+ dd->ipath_rcvtidbase = ipath_kget_kreg32(t, kr_rcvtidbase);
+ dd->ipath_rcvegrcnt = ipath_kget_kreg32(t, kr_rcvegrcnt);
+ dd->ipath_rcvegrbase = ipath_kget_kreg32(t, kr_rcvegrbase);
+ dd->ipath_palign = ipath_kget_kreg32(t, kr_pagealign);
+ dd->ipath_piobufbase = ipath_kget_kreg32(t, kr_sendpiobufbase);
+ dd->ipath_piosize = ipath_kget_kreg32(t, kr_sendpiosize);
+ dd->ipath_ibmtu = 4096; /* default to largest legal MTU */
+ dd->ipath_piobcnt = ipath_kget_kreg32(t, kr_sendpiobufcnt);
+
+ _IPATH_VDBG
+ ("Revision %llx (PCI %x), %u ports, %u tids, %u egrtids, %u piobufs\n",
+ dd->ipath_revision, dd->ipath_pcirev, dd->ipath_portcnt,
+ dd->ipath_rcvtidcnt, dd->ipath_rcvegrcnt, dd->ipath_piobcnt);
+
+ if (((dd->ipath_revision >> INFINIPATH_R_SOFTWARE_SHIFT) & INFINIPATH_R_SOFTWARE_MASK) != IPATH_CHIP_SWVERSION) { /* >= maybe, someday */
+ _IPATH_UNIT_ERROR(t,
+ "Driver only handles version %d, chip swversion is %d (%llx), failng\n",
+ IPATH_CHIP_SWVERSION,
+ (int)(dd->
+ ipath_revision >>
+ INFINIPATH_R_SOFTWARE_SHIFT) &
+ INFINIPATH_R_SOFTWARE_MASK,
+ dd->ipath_revision);
+ ret = -ENOSYS;
+ goto done;
+ }
+ dd->ipath_majrev = (uint8_t) ((dd->ipath_revision >>
+ INFINIPATH_R_CHIPREVMAJOR_SHIFT) &
+ INFINIPATH_R_CHIPREVMAJOR_MASK);
+ dd->ipath_minrev =
+ (uint8_t) ((dd->
+ ipath_revision >> INFINIPATH_R_CHIPREVMINOR_SHIFT) &
+ INFINIPATH_R_CHIPREVMINOR_MASK);
+ dd->ipath_boardrev =
+ (uint8_t) ((dd->
+ ipath_revision >> INFINIPATH_R_BOARDID_SHIFT) &
+ INFINIPATH_R_BOARDID_MASK);
+
+ ipath_get_boardname(t, boardn, sizeof boardn);
+
+ {
+ snprintf(chip_driver_version, chip_driver_size,
+ "Driver %u.%u, %s, InfiniPath%u %u.%u, PCI %u, SW Compat %u\n",
+ IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
+ (unsigned)(dd->
+ ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
+ INFINIPATH_R_ARCH_MASK, dd->ipath_majrev,
+ dd->ipath_minrev, dd->ipath_pcirev,
+ (unsigned)(dd->
+ ipath_revision >>
+ INFINIPATH_R_SOFTWARE_SHIFT) &
+ INFINIPATH_R_SOFTWARE_MASK);
+
+ }
+
+ _IPATH_DBG("%s", chip_driver_version);
+
+ /*
+ * we ignore most issues after reporting them, but have to specially
+ * handle hardware-disabled chips.
+ */
+ if(ipath_validate_rev(dd) == 2) {
+ ret = -EPERM; /* unique error, known to infinipath_init_one() */
+ goto done;
+ }
+
+ /*
+ * zero all the TID entries at startup. We do this for sanity,
+ * in case of a previous driver crash of some kind, and also
+ * because the chip powers up with these memories in an unknown
+ * state. Use portcnt, not cfgports, since this is for the full chip,
+ * not for current (possibly different) configuration value
+ * Chip Errata bug 6447
+ */
+ for (val32 = 0; val32 < dd->ipath_portcnt; val32++)
+ ipath_clear_tids(t, val32);
+
+ dd->ipath_rcvhdrentsize = IPATH_RCVHDRENTSIZE;
+ /* we could bump this
+ * to allow for full rcvegrcnt + rcvtidcnt, but then it no
+ * longer nicely fits power of two, and since we now use
+ * alloc_pages, the rest would be wasted.
+ */
+ dd->ipath_rcvhdrcnt = dd->ipath_rcvegrcnt;
+ /*
+ * setup offset of last valid entry in rcvhdrq, for various tests, to
+ * avoid calculating each time we need it
+ */
+ dd->ipath_hdrqlast =
+ dd->ipath_rcvhdrentsize * (dd->ipath_rcvhdrcnt - 1);
+ ipath_kput_kreg(t, kr_rcvhdrentsize, dd->ipath_rcvhdrentsize);
+ ipath_kput_kreg(t, kr_rcvhdrcnt, dd->ipath_rcvhdrcnt);
+ /*
+ * not in ipath_rcvhdrsize, so user programs can set differently, but
+ * so any early packets see the default size.
+ */
+ ipath_kput_kreg(t, kr_rcvhdrsize, IPATH_DFLT_RCVHDRSIZE);
+
+ /*
+ * we "know" that this works
+ * out OK. It's actually a bit more than we need, but 2048+64 isn't
+ * quite enough for full size, and we want the +N to be a power of 2
+ * to give us reasonable alignment and fit within page_alloc()'ed
+ * memory
+ */
+ dd->ipath_rcvegrbufsize = dd->ipath_piosize;
+
+ /*
+ * the min() check here is currently a nop, but it may not always be,
+ * depending on just how we do ipath_rcvegrbufsize
+ */
+ dd->ipath_ibmaxlen = min(dd->ipath_piosize, dd->ipath_rcvegrbufsize);
+ dd->ipath_init_ibmaxlen = dd->ipath_ibmaxlen;
+
+ /*
+ * set up the shadow copies of the piobufavail registers, which
+ * we compare against the chip registers for now, and the in
+ * memory DMA'ed copies of the registers. This has to be done
+ * early, before we calculate lastport, etc.
+ */
+ val = dd->ipath_piobcnt;
+ /*
+ * calc number of pioavail registers, and save it; we have 2 bits
+ * per buffer
+ */
+ dd->ipath_pioavregs =
+ round_up(val, sizeof(uint64_t) * _BITS_PER_BYTE / 2) /
+ (sizeof(uint64_t) * _BITS_PER_BYTE / 2);
+ if (dd->ipath_pioavregs >
+ (sizeof(dd->ipath_pioavailshadow) /
+ sizeof(dd->ipath_pioavailshadow[0]))) {
+ dd->ipath_pioavregs =
+ sizeof(dd->ipath_pioavailshadow) /
+ sizeof(dd->ipath_pioavailshadow[0]);
+ dd->ipath_piobcnt = dd->ipath_pioavregs * sizeof(uint64_t) * _BITS_PER_BYTE >> 1; /* 2 bits/reg */
+ _IPATH_INFO
+ ("Warning: %lld piobufs is too many to fit in shadow, only using %d\n",
+ val, dd->ipath_piobcnt);
+ }
+
+ if (!infinipath_kpiobufs) {
+ /* have to have at least one, for SMA */
+ kpiobufs = infinipath_kpiobufs = 1;
+ } else if (dd->ipath_piobcnt <
+ (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT)) {
+ _IPATH_INFO
+ ("Too few PIO buffers (%u) for %u ports to have %u each!\n",
+ dd->ipath_piobcnt, dd->ipath_cfgports,
+ IPATH_MIN_USER_PORT_BUFCNT);
+ kpiobufs = 1; /* reserve just the minimum for SMA/ether */
+ } else
+ kpiobufs = infinipath_kpiobufs;
+
+ if (kpiobufs >
+ (dd->ipath_piobcnt -
+ (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT))) {
+ i = dd->ipath_piobcnt -
+ (dd->ipath_cfgports * IPATH_MIN_USER_PORT_BUFCNT);
+ if (i < 0)
+ i = 0;
+ _IPATH_INFO
+ ("Allocating %d PIO bufs for kernel leaves too few for %d user ports (%d each); using %u\n",
+ kpiobufs, dd->ipath_cfgports - 1,
+ IPATH_MIN_USER_PORT_BUFCNT, i);
+ /*
+ * shouldn't change infinipath_kpiobufs, because could be
+ * different for different devices...
+ */
+ kpiobufs = i;
+ }
+ dd->ipath_lastport_piobuf = dd->ipath_piobcnt - kpiobufs;
+ dd->ipath_pbufsport = dd->ipath_cfgports > 1 ?
+ dd->ipath_lastport_piobuf / (dd->ipath_cfgports - 1) : 0;
+ val32 = dd->ipath_lastport_piobuf -
+ (dd->ipath_pbufsport * (dd->ipath_cfgports - 1));
+ if (val32 > 0) {
+ _IPATH_DBG
+ ("allocating %u pbufs/port leaves %u unused, add to kernel\n",
+ dd->ipath_pbufsport, val32);
+ dd->ipath_lastport_piobuf -= val32;
+ _IPATH_DBG("%u pbufs/port leaves %u unused, add to kernel\n",
+ dd->ipath_pbufsport, val32);
+ }
+ dd->ipath_lastpioindex = dd->ipath_lastport_piobuf;
+ _IPATH_VDBG
+ ("%d PIO bufs %u - %u, %u each for %u user ports\n",
+ kpiobufs, dd->ipath_lastport_piobuf, dd->ipath_piobcnt, dd->ipath_pbufsport,
+ dd->ipath_cfgports - 1);
+
+ /*
+ * this has to be page aligned, and on a page of it's own, so we
+ * can map it into user space. We also use it to give processes
+ * a copy of ipath_statusp, on a separate cacheline, followed by
+ * a copy of the freeze error string, if it's happened. Might also
+ * use that space for other things.
+ */
+ val = round_up(2 * L1_CACHE_BYTES + sizeof(*dd->ipath_statusp) +
+ dd->ipath_pioavregs * sizeof(uint64_t), 2 * PAGE_SIZE);
+ if (!(dd->ipath_pioavailregs_dma = kmalloc(val * sizeof(uint64_t),
+ GFP_KERNEL))) {
+ _IPATH_UNIT_ERROR(t,
+ "failed to allocate PIOavail reg area in memory\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+ if ((PAGE_SIZE - 1) & (uint64_t) dd->ipath_pioavailregs_dma) {
+ dd->__ipath_pioavailregs_base = dd->ipath_pioavailregs_dma;
+ dd->ipath_pioavailregs_dma = (uint64_t *)
+ round_up((uint64_t) dd->ipath_pioavailregs_dma, PAGE_SIZE);
+ } else
+ dd->__ipath_pioavailregs_base = dd->ipath_pioavailregs_dma;
+ /*
+ * zero initial, since whole thing mapped
+ * into user space, and don't want info leak, or confusing garbage
+ */
+ memset((void *)dd->ipath_pioavailregs_dma, 0, PAGE_SIZE);
+
+ /*
+ * we really want L2 cache aligned, but for current CPUs of interest,
+ * they are the same.
+ */
+ dd->ipath_statusp = (uint64_t *) ((char *)dd->ipath_pioavailregs_dma +
+ ((2 * L1_CACHE_BYTES +
+ dd->ipath_pioavregs *
+ sizeof(uint64_t)) &
+ ~L1_CACHE_BYTES));
+ /* copy the current value now that it's really allocated */
+ *dd->ipath_statusp = dd->_ipath_status;
+ /*
+ * setup buffer to hold freeze msg, accessible to apps, following
+ * statusp
+ */
+ dd->ipath_freezemsg = (char *)&dd->ipath_statusp[1];
+ /* and it's length */
+ dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
+
+ atmp = virt_to_phys(dd->ipath_pioavailregs_dma);
+ /* stash physical address for user progs */
+ dd->ipath_pioavailregs_phys = atmp;
+ (void)ipath_kput_kreg(t, kr_sendpioavailaddr, atmp);
+ /*
+ * this is to detect s/w errors, which the h/w works around by
+ * ignoring the low 6 bits of address, if it wasn't aligned.
+ */
+ val = ipath_kget_kreg64(t, kr_sendpioavailaddr);
+ if (val != atmp) {
+ _IPATH_UNIT_ERROR(t,
+ "Catastrophic software error, SendPIOAvailAddr written as %llx, read back as %llx\n",
+ atmp, val);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ if (t * 64 > (sizeof(ipath_port0_rcvhdrtail) - 64)) {
+ _IPATH_UNIT_ERROR(t,
+ "unit %u too large for port 0 rcvhdrtail buffer size\n",
+ t);
+ ret = -ENODEV;
+ }
+
+ /*
+ * kernel modules loaded into vmalloc'ed memory,
+ * verify that when we assume that, map to phys, and back to virt,
+ * that we get the right contents, so we did the mapping right.
+ */
+ vpage = vmalloc_to_page((void *)ipath_port0_rcvhdrtail);
+ if (vpage == NOPAGE_SIGBUS || vpage == NOPAGE_OOM) {
+ _IPATH_UNIT_ERROR(t, "vmalloc_to_page for rcvhdrtail fails!\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ /*
+ * 64 is driven by cache line size, and also by chip requirement
+ * that low 6 bits be 0
+ */
+ val = page_to_phys(vpage) + t * 64;
+
+ /* verify that the alignment requirement was met */
+ ipath_kput_kreg_port(t, kr_rcvhdrtailaddr, 0, val);
+ atmp = ipath_kget_kreg64_port(t, kr_rcvhdrtailaddr, 0);
+ if (val != atmp) {
+ _IPATH_UNIT_ERROR(t,
+ "Catastrophic software error, RcvHdrTailAddr0 written as %llx, read back as %llx from %x\n",
+ val, atmp, kr_rcvhdrtailaddr);
+ ret = -EINVAL;
+ goto done;
+ }
+ /* so we can get current tail in ipath_kreceive(), per chip */
+ dd->ipath_hdrqtailptr =
+ &ipath_port0_rcvhdrtail[t *
+ (64 / sizeof(ipath_port0_rcvhdrtail[0]))];
+
+ ipath_kput_kreg(t, kr_rcvbthqp, IPATH_KD_QP);
+
+ /*
+ * make sure we are not in freeze, and PIO send enabled, so
+ * writes to pbc happen
+ */
+ ipath_kput_kreg(t, kr_hwerrmask, 0ULL);
+ ipath_kput_kreg(t, kr_hwerrclear, ~0ULL);
+ ipath_kput_kreg(t, kr_control, 0ULL);
+ ipath_kput_kreg(t, kr_sendctrl, INFINIPATH_S_PIOENABLE);
+
+ /*
+ * write the pbc of each buffer, to be sure it's initialized, then
+ * cancel all the buffers, and also abort any packets that might
+ * have been in flight for some reason (the latter is for driver
+ * unload/reload, but isn't a bad idea at first init).
+ * PIO send isn't enabled at this point, so there is no danger
+ * of sending these out on the wire.
+ * Chip Errata bug 6610
+ */
+ piobuf = (uint32_t *) (((char *)(dd->ipath_kregbase)) +
+ dd->ipath_piobufbase);
+ pioincr = devdata[t].ipath_palign / sizeof(*piobuf);
+ for (i = 0; i < dd->ipath_piobcnt; i++) {
+ *piobuf = 16; /* reasonable word count, just to init pbc */
+ piobuf += pioincr;
+ }
+ /* self-clearing */
+ ipath_kput_kreg(t, kr_sendctrl, INFINIPATH_S_ABORT);
+
+ /*
+ * before error clears, since we expect serdes pll errors during
+ * this, the first time after reset
+ */
+ if (ipath_bringup_link(t)) {
+ _IPATH_INFO("Failed to bringup IB link\n");
+ ret = -ENETDOWN;
+ goto done;
+ }
+
+ /*
+ * clear any "expected" hwerrs from reset and/or initialization
+ * clear any that aren't enabled (at least this once), and then
+ * set the enable mask
+ */
+ ipath_clear_init_hwerrs(t);
+ ipath_kput_kreg(t, kr_hwerrclear, ~0ULL);
+ ipath_kput_kreg(t, kr_hwerrmask, dd->ipath_hwerrmask);
+
+ dd->ipath_maskederrs = dd->ipath_ignorederrs;
+ ipath_kput_kreg(t, kr_errorclear, ~0ULL); /* clear all */
+ /* enable errors that are masked, at least this first time. */
+ ipath_kput_kreg(t, kr_errormask, ~dd->ipath_maskederrs);
+ /* clear any interrups up to this point (ints still not enabled) */
+ ipath_kput_kreg(t, kr_intclear, ~0ULL);
+
+ ipath_stats.sps_lid[t] = dd->ipath_lid;
+
+ /*
+ * allocate the shadow TID array, so we can ipath_munlock
+ * previous entries. It make make more sense to move the pageshadow
+ * to the port data structure, so we only allocate memory for ports
+ * actually in use, since we at 8k per port, now
+ */
+ dd->ipath_pageshadow = (struct page **)
+ vmalloc(dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(struct page *));
+ if (!dd->ipath_pageshadow)
+ _IPATH_UNIT_ERROR(t,
+ "failed to allocate shadow page * array, no expected sends!\n");
+ else
+ memset(dd->ipath_pageshadow, 0,
+ dd->ipath_cfgports * dd->ipath_rcvtidcnt *
+ sizeof(struct page *));
+
+ /* set up the port 0 (kernel) rcvhdr q and egr TIDs */
+ if (!(ret = ipath_create_rcvhdrq(dd->ipath_pd[0])))
+ ret = ipath_create_port0_egr(dd->ipath_pd[0]);
+ if (ret)
+ _IPATH_UNIT_ERROR(t,
+ "failed to allocate port 0 (kernel) rcvhdrq and/or egr bufs\n");
+ else {
+ init_waitqueue_head(&ipath_sma_wait);
+ init_waitqueue_head(&ipath_sma_state_wait);
+
+ ipath_kput_kreg(pd->port_unit, kr_rcvctrl, dd->ipath_rcvctrl);
+
+ ipath_kput_kreg(t, kr_rcvbthqp, IPATH_KD_QP);
+
+ /* Enable PIO send, and update of PIOavail regs to memory. */
+ dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE
+ | INFINIPATH_S_PIOBUFAVAILUPD;
+ ipath_kput_kreg(t, kr_sendctrl, dd->ipath_sendctrl);
+
+ /*
+ * enable port 0 receive, and receive interrupt
+ * other ports done as user opens and inits them
+ */
+ dd->ipath_rcvctrl = INFINIPATH_R_TAILUPD |
+ (1ULL << INFINIPATH_R_PORTENABLE_SHIFT) |
+ (1ULL << INFINIPATH_R_INTRAVAIL_SHIFT);
+ ipath_kput_kreg(t, kr_rcvctrl, dd->ipath_rcvctrl);
+
+ /*
+ * now ready for use
+ * this should be cleared whenever we detect a reset, or
+ * initiate one.
+ */
+ dd->ipath_flags |= IPATH_INITTED;
+
+ /*
+ * init our shadow copies of head from tail values, and write
+ * head values to match
+ */
+ val32 = ipath_kget_ureg32(t, ur_rcvegrindextail, 0);
+ (void)ipath_kput_ureg(t, ur_rcvegrindexhead, val32, 0);
+ dd->ipath_port0head = ipath_kget_ureg32(t, ur_rcvhdrtail, 0);
+ (void)ipath_kput_ureg(t, ur_rcvhdrhead, dd->ipath_port0head, 0);
+
+ /*
+ * by now pioavail updates to memory should have occurred,
+ * so copy them into our working/shadow registers; this is
+ * in case something went wrong with abort, but mostly to
+ * get the initial values of the generation bit correct
+ */
+ for (i = 0; i < dd->ipath_pioavregs; i++) {
+ /*
+ * Chip Errata bug 6641; even and odd qwords>3
+ * are swapped
+ */
+ if (i > 3) {
+ if (i & 1)
+ dd->ipath_pioavailshadow[i] =
+ dd->ipath_pioavailregs_dma[i - 1];
+ else
+ dd->ipath_pioavailshadow[i] =
+ dd->ipath_pioavailregs_dma[i + 1];
+ } else
+ dd->ipath_pioavailshadow[i] =
+ dd->ipath_pioavailregs_dma[i];
+ }
+ /* can get counters, stats, etc. */
+ dd->ipath_flags |= IPATH_PRESENT;
+ }
+
+ /*
+ * cause retrigger of pending interrupts ignored during init, even if
+ * we had errors
+ */
+ ipath_kput_kreg(t, kr_intclear, 0ULL);
+
+ /*
+ * set up stats retrieval timer, even if we had errors in last
+ * portion of setup
+ */
+ init_timer(&dd->ipath_stats_timer);
+ dd->ipath_stats_timer.function = ipath_get_faststats;
+ dd->ipath_stats_timer.data = (unsigned long)t;
+ /* every 5 seconds; */
+ dd->ipath_stats_timer.expires = jiffies + 5 * HZ;
+ /* takes ~16 seconds to overflow at full IB 4x bandwdith */
+ add_timer(&dd->ipath_stats_timer);
+
+ dd->ipath_stats_timer_active = 1;
+
+done:
+ if (!ret) {
+ ipath_get_guid(t);
+ *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
+ if (!ipath_sma_data_spare) {
+ /* first init, setup SMA data structs */
+ ipath_sma_data_spare =
+ ipath_sma_data_bufs[IPATH_NUM_SMAPKTS];
+ for (i = 0; i < IPATH_NUM_SMAPKTS; i++)
+ ipath_sma_data[i].buf = ipath_sma_data_bufs[i];
+ }
+ /*
+ * sps_nports is a global, so, we set it to the highest
+ * number of ports of any of the chips we find; we never
+ * decrement it, at least for now.
+ */
+ if (dd->ipath_cfgports > ipath_stats.sps_nports)
+ ipath_stats.sps_nports = dd->ipath_cfgports;
+ }
+ /* if ret is non-zero, we probably should do some cleanup here... */
+ return ret;
+}
+
+int ipath_waitfor_complete(const ipath_type t, ipath_kreg reg_id,
+ uint64_t bits_to_wait_for, uint64_t * valp)
+{
+ uint64_t timeout, lastval, val;
+
+ lastval = ipath_kget_kreg64(t, reg_id);
+ timeout = get_cycles() + 0x10000000ULL; /* <- ridiculously long time */
+ do {
+ val = ipath_kget_kreg64(t, reg_id);
+ *valp = val; /* so they have something, even on failures. */
+ if ((val & bits_to_wait_for) == bits_to_wait_for)
+ return 0;
+ if (val != lastval)
+ _IPATH_VDBG
+ ("Changed from %llx to %llx, waiting for %llx bits\n",
+ lastval, val, bits_to_wait_for);
+ yield();
+ if (get_cycles() > timeout) {
+ _IPATH_DBG
+ ("Didn't get bits %llx in register 0x%x, got %llx\n",
+ bits_to_wait_for, reg_id, *valp);
+ return ENODEV;
+ }
+ } while (1);
+}
+
+/*
+ * like ipath_waitfor_complete(), but we wait for the CMDVALID bit to go away
+ * indicating the last command has completed. It doesn't return data
+ */
+int ipath_waitfor_mdio_cmdready(const ipath_type t)
+{
+ uint64_t timeout;
+ uint64_t val;
+
+ timeout = get_cycles() + 0x10000000ULL; /* <- ridiculously long time */
+ do {
+ val = ipath_kget_kreg64(t, kr_mdio);
+ if (!(val & IPATH_MDIO_CMDVALID))
+ return 0;
+ yield();
+ if (get_cycles() > timeout) {
+ _IPATH_DBG("CMDVALID stuck in mdio reg? (%llx)\n", val);
+ return ENODEV;
+ }
+ } while (1);
+}
+
+void ipath_set_ib_lstate(const ipath_type t, int which)
+{
+ ipath_devdata *dd = &devdata[t];
+ char *what;
+
+ /*
+ * For all cases, we'll either be setting a new value of linkcmd, or
+ * we want it to be NOP, so clear it here.
+ * Similarly, we want the linkinitcmd to be NOP for everything
+ * other than explictly than explictly changing linkinitcmd,
+ * and for that case, we want to first clear any existing bits
+ */
+ dd->ipath_ibcctrl &= ~((INFINIPATH_IBCC_LINKCMD_MASK <<
+ INFINIPATH_IBCC_LINKCMD_SHIFT) |
+ (INFINIPATH_IBCC_LINKINITCMD_MASK <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT));
+
+ if (which == INFINIPATH_IBCC_LINKCMD_INIT) {
+ dd->ipath_flags &= ~(IPATH_LINK_TOARMED | IPATH_LINK_TOACTIVE
+ | IPATH_LINK_SLEEPING);
+ /* so we can watch for a transition */
+ dd->ipath_flags |= IPATH_LINKDOWN;
+ what = "INIT";
+ } else if (which == INFINIPATH_IBCC_LINKCMD_ARMED) {
+ dd->ipath_flags |= IPATH_LINK_TOARMED;
+ dd->ipath_flags &= ~(IPATH_LINK_TOACTIVE | IPATH_LINK_SLEEPING);
+ /*
+ * this is mainly for loopback testing. If INITCMD is
+ * NOP or SLEEP, the link won't ever come up in loopback...
+ */
+ if (!
+ (dd->
+ ipath_flags & (IPATH_LINKINIT | IPATH_LINKARMED |
+ IPATH_LINKACTIVE))) {
+ _IPATH_SMADBG
+ ("going to armed, but link not yet up, set POLL\n");
+ dd->ipath_ibcctrl |=
+ INFINIPATH_IBCC_LINKINITCMD_POLL <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT;
+ }
+ what = "ARMED";
+ } else if (which == INFINIPATH_IBCC_LINKCMD_ACTIVE) {
+ dd->ipath_flags |= IPATH_LINK_TOACTIVE;
+ dd->ipath_flags &= ~(IPATH_LINK_TOARMED | IPATH_LINK_SLEEPING);
+ what = "ACTIVE";
+ } else if (which & (INFINIPATH_IBCC_LINKINITCMD_MASK << INFINIPATH_IBCC_LINKINITCMD_SHIFT)) { /* down, disable, etc. */
+ dd->ipath_flags &= ~(IPATH_LINK_TOARMED | IPATH_LINK_TOACTIVE);
+ if (((which & INFINIPATH_IBCC_LINKINITCMD_MASK) >>
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT) ==
+ INFINIPATH_IBCC_LINKINITCMD_SLEEP) {
+ dd->ipath_flags |= IPATH_LINK_SLEEPING | IPATH_LINKDOWN;
+ } else
+ dd->ipath_flags |= IPATH_LINKDOWN;
+ dd->ipath_ibcctrl |=
+ which & (INFINIPATH_IBCC_LINKINITCMD_MASK <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+ what = "DOWN";
+ } else {
+ what = "UNKNOWN";
+ _IPATH_INFO("Unknown link transition requested (which=0x%x)\n",
+ which);
+ }
+
+ dd->ipath_ibcctrl |= ((uint64_t) which & INFINIPATH_IBCC_LINKCMD_MASK)
+ << INFINIPATH_IBCC_LINKCMD_SHIFT;
+
+ _IPATH_SMADBG("Trying to move unit %u to %s, current ltstate is %s\n",
+ t, what, ipath_ibcstatus_str[(ipath_kget_kreg64(t, kr_ibcstatus)
+ >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)
+ & INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
+ ipath_kput_kreg(t, kr_ibcctrl, dd->ipath_ibcctrl);
+}
+
+static int ipath_bringup_link(const ipath_type t)
+{
+ ipath_devdata *dd = &devdata[t];
+ uint64_t val, ibc;
+ int ret = 0;
+
+ dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; /* hold IBC in reset */
+ ipath_kput_kreg(t, kr_control, dd->ipath_control);
+
+ /*
+ * Note that prior to try 14 or 15 of IB, the credit scaling
+ * wasn't working, because it was swapped for writes with the
+ * 1 bit default linkstate field
+ */
+
+ /* ignore pbc and align word */
+ val = dd->ipath_piosize - 2 * sizeof(uint32_t);
+ /*
+ * for ICRC, which we only send in diag test pkt mode, and we don't
+ * need to worry about that for mtu
+ */
+ val += 1;
+ /*
+ * set the IBC maxpktlength to the size of our pio buffers
+ * the maxpktlength is in words. This is *not* the IB data MTU
+ */
+ ibc = (val / sizeof(uint32_t)) << INFINIPATH_IBCC_MAXPKTLEN_SHIFT;
+ /* in KB */
+ ibc |= 0x5ULL << INFINIPATH_IBCC_FLOWCTRLWATERMARK_SHIFT;
+ /* how often flowctrl sent
+ * more or less in usecs; balance against watermark value, so that
+ * in theory senders always get a flow control update in time to not
+ * let the IB link go idle.
+ */
+ ibc |= 0x3ULL << INFINIPATH_IBCC_FLOWCTRLPERIOD_SHIFT;
+ /* max error tolerance */
+ ibc |= 0xfULL << INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT;
+ /* use "real" buffer space for */
+ ibc |= 4ULL << INFINIPATH_IBCC_CREDITSCALE_SHIFT;
+ /* IB credit flow control. */
+ ibc |= 0xfULL << INFINIPATH_IBCC_OVERRUNTHRESHOLD_SHIFT;
+ /* initially come up waiting for TS1, without sending anything. */
+ dd->ipath_ibcctrl = ibc;
+ /* don't put linkinitcmd in ipath_ibcctrl, want that to stay a NOP */
+ ibc |=
+ INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT;
+ dd->ipath_flags |= IPATH_LINK_SLEEPING;
+ ipath_kput_kreg(t, kr_ibcctrl, ibc);
+
+ ret = ipath_bringup_serdes(t);
+
+ if (ret)
+ _IPATH_INFO("Could not initialize SerDes, not usable\n");
+ else {
+ dd->ipath_control |= INFINIPATH_C_LINKENABLE; /* enable IBC */
+ ipath_kput_kreg(t, kr_control, dd->ipath_control);
+ }
+
+ return ret;
+}
+
+/*
+ * called from ipath_shutdown_link(), and from sma doing a LINKDOWN
+ * Left as a separate function for historical reasons, and may want
+ * it to do more than just call ipath_set_ib_lstate() again sometime
+ * in the future.
+ */
+void ipath_down_link(const ipath_type t)
+{
+ ipath_set_ib_lstate(t, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+}
+
+/*
+ * do this when driver is being unloaded, or perhaps for diags, and
+ * maybe when we get an interrupt of a fatal link error that requires
+ * bringing the linkd down and back up
+ */
+static int ipath_shutdown_link(const ipath_type t)
+{
+ uint64_t val;
+ ipath_devdata *dd = &devdata[t];
+ int ret = 0;
+
+ _IPATH_DBG("Shutting down the link\n");
+ ipath_down_link(t);
+
+ /*
+ * we are shutting down, so tell the layered driver. We don't
+ * do this on just a link state change, much like ethernet,
+ * a cable unplug, etc. doesn't change driver state
+ */
+ if (dd->ipath_layer.l_intr)
+ dd->ipath_layer.l_intr(t, IPATH_LAYER_INT_IF_DOWN);
+
+ dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; /* disable IBC */
+ ipath_kput_kreg(t, kr_control, dd->ipath_control);
+
+ *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF | IPATH_STATUS_IB_READY);
+
+ /*
+ * clear SerdesEnable and turn the leds off; do this here because
+ * we are unloading, so don't count on interrupts to move along
+ */
+
+ ipath_quiet_serdes(t);
+ val = dd->ipath_extctrl &
+ ~(INFINIPATH_EXTC_LEDPRIPORTGREENON |
+ INFINIPATH_EXTC_LEDPRIPORTYELLOWON);
+ dd->ipath_extctrl = val;
+ ipath_kput_kreg(t, kr_extctrl, val);
+
+ if (dd->ipath_stats_timer_active) {
+ del_timer_sync(&dd->ipath_stats_timer);
+ dd->ipath_stats_timer_active = 0;
+ }
+ if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
+ /* can't do anything more with chip */
+ /* needs re-init */
+ *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
+ if (dd->ipath_kregbase) {
+ /*
+ * if we haven't already cleaned up before these
+ * are to ensure any register reads/writes "fail"
+ * until re-init
+ */
+ dd->ipath_kregbase = NULL;
+ dd->ipath_kregvirt = NULL;
+ dd->ipath_uregbase = 0ULL;
+ dd->ipath_sregbase = 0ULL;
+ dd->ipath_cregbase = 0ULL;
+ dd->ipath_kregsize = 0;
+ }
+#ifdef CONFIG_MTRR
+ if (dd->ipath_mtrr) {
+ _IPATH_VDBG("undoing WCCOMB on pio buffers\n");
+ mtrr_del(dd->ipath_mtrr, 0, 0);
+ dd->ipath_mtrr = 0;
+ }
+#endif
+ }
+
+ return ret;
+}
+
+/*
+ * when closing, free up any allocated data for a port, if the
+ * reference count goes to zero
+ * Note: this also frees the portdata itself!
+ */
+void ipath_free_pddata(ipath_devdata * dd, uint32_t port, int freehdrq)
+{
+ ipath_portdata *pd = dd->ipath_pd[port];
+
+ if (!pd)
+ return;
+ if (freehdrq)
+ /*
+ * only clear and free portdata if we are going to
+ * also release the hdrq, otherwise we leak the hdrq on each
+ * open/close cycle
+ */
+ dd->ipath_pd[port] = NULL;
+ /* cleanup locked pages private data structures */
+ ipath_mlock_cleanup(pd);
+ if (freehdrq && pd->port_rcvhdrq) {
+ int i, n = 1 << pd->port_rcvhdrq_order;
+ _IPATH_VDBG("free closed port %d rcvhdrq @ %p (order=%u)\n",
+ pd->port_port, pd->port_rcvhdrq,
+ pd->port_rcvhdrq_order);
+ for (i = 0; i < n; i++)
+ ClearPageReserved(virt_to_page
+ (pd->port_rcvhdrq + (i * PAGE_SIZE)));
+ free_pages((unsigned long)pd->port_rcvhdrq,
+ pd->port_rcvhdrq_order);
+ pd->port_rcvhdrq = NULL;
+ }
+ if (port && pd->port_rcvegrbuf_pages) { /* always free this, however */
+ void *virt;
+ unsigned e, i, n = 1 << pd->port_rcvegrbuf_order;
+ if (pd->port_rcvegrbuf_virt) {
+ for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
+ virt = pd->port_rcvegrbuf_virt[e];
+ for (i = 0; i < n; i++)
+ ClearPageReserved(virt_to_page
+ (virt +
+ (i * PAGE_SIZE)));
+ _IPATH_VDBG
+ ("egrbuf free_pages(%p, %x), chunk %u/%u\n",
+ virt, pd->port_rcvegrbuf_order, e,
+ pd->port_rcvegrbuf_chunks);
+ free_pages((unsigned long)virt,
+ pd->port_rcvegrbuf_order);
+ }
+ vfree(pd->port_rcvegrbuf_virt);
+ pd->port_rcvegrbuf_virt = NULL;
+ }
+ pd->port_rcvegrbuf_chunks = 0;
+ _IPATH_VDBG("free closed port %d rcvegrbufs ptr array\n",
+ pd->port_port);
+ /* now the pointer array. */
+ vfree(pd->port_rcvegrbuf_pages);
+ pd->port_rcvegrbuf_pages = NULL;
+ } else if (port == 0 && dd->ipath_port0_skbs) {
+ unsigned e;
+ struct sk_buff **skbs = dd->ipath_port0_skbs;
+
+ dd->ipath_port0_skbs = NULL;
+ _IPATH_VDBG("free closed port %d ipath_port0_skbs @ %p\n",
+ pd->port_port, skbs);
+ for (e = 0; e < dd->ipath_rcvegrcnt; e++)
+ if (skbs[e])
+ dev_kfree_skb(skbs[e]);
+ vfree(skbs);
+ }
+ if (freehdrq) {
+ kfree(pd->port_tid_pg_list);
+ kfree(pd);
+ }
+}
+
+int __init infinipath_init(void)
+{
+ int r = 0, i;
+
+ _IPATH_DBG(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version);
+
+ ipath_init_picotime(); /* init cycles -> pico conversion */
+
+ if (!ipath_ctl_header) { /* should be always */
+ if (!(ipath_ctl_header = register_sysctl_table(ipath_ctl, 1)))
+ _IPATH_INFO("Couldn't register sysctl interface\n");
+ }
+
+ /*
+ * initialize the statusp to temporary storage so we can use it
+ * everywhere without first checking. When we "really" assign it,
+ * we copy from _ipath_status
+ */
+ for (i = 0; i < infinipath_max; i++)
+ devdata[i].ipath_statusp = &devdata[i]._ipath_status;
+
+ /*
+ * init these early, in case we take an interrupt as soon as the irq
+ * is setup. Saw a spinlock panic once that appeared to be due to that
+ * problem, when they were initted later on.
+ */
+ spin_lock_init(&ipath_pioavail_lock);
+ spin_lock_init(&ipath_sma_lock);
+
+ pci_register_driver(&infinipath_driver);
+
+ driver_create_file(&(infinipath_driver.driver), &driver_attr_version);
+
+ if ((r = register_chrdev(ipath_major, MODNAME, &ipath_fops)))
+ _IPATH_ERROR("Unable to register %s device\n", MODNAME);
+
+
+ /*
+ * never return an error, since we could have stuff registered,
+ * resources used, etc., even if no hardware found. This way we
+ * can clean up through unload.
+ */
+ return 0;
+}
+
+/*
+ * note: if for some reason the unload fails after this routine, and leaves
+ * the driver enterable by user code, we'll almost certainly crash and burn...
+ */
+static void __exit infinipath_cleanup(void)
+{
+ int r, m, port;
+
+ driver_remove_file(&(infinipath_driver.driver), &driver_attr_version);
+ if (ipath_ctl_header) {
+ unregister_sysctl_table(ipath_ctl_header);
+ ipath_ctl_header = NULL;
+ } else
+ _IPATH_DBG("No sysctl unregister, not registered OK\n");
+ if ((r = unregister_chrdev(ipath_major, MODNAME)))
+ _IPATH_DBG("unregister of device failed: %d\n", r);
+
+
+ /*
+ * turn off rcv, send, and interrupts for all ports, all drivers
+ * should also hard reset the chip here?
+ * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
+ * for all versions of the driver, if they were allocated
+ */
+ for (m = 0; m < infinipath_max; m++) {
+ uint64_t val;
+ ipath_devdata *dd = &devdata[m];
+ if (dd->ipath_kregbase) {
+ /* in case unload fails, be consistent */
+ dd->ipath_rcvctrl = 0U;
+ ipath_kput_kreg(m, kr_rcvctrl, dd->ipath_rcvctrl);
+
+ /*
+ * gracefully stop all sends allowing any in
+ * progress to trickle out first.
+ */
+ ipath_kput_kreg(m, kr_sendctrl, 0ULL);
+ val = ipath_kget_kreg64(m, kr_scratch); /* flush it */
+ /*
+ * enough for anything that's going to trickle
+ * out to have actually done so.
+ */
+ udelay(5);
+
+ /*
+ * abort any armed or launched PIO buffers that
+ * didn't go. (self clearing). Will cause any
+ * packet currently being transmitted to go out
+ * with an EBP, and may also cause a short packet
+ * error on the receiver.
+ */
+ ipath_kput_kreg(m, kr_sendctrl, INFINIPATH_S_ABORT);
+
+ /* mask interrupts, but not errors */
+ ipath_kput_kreg(m, kr_intmask, 0ULL);
+ ipath_shutdown_link(m);
+
+ /*
+ * clear all interrupts and errors. Next time
+ * driver is loaded, we know that whatever is
+ * set happened while we were unloaded
+ */
+ ipath_kput_kreg(m, kr_hwerrclear, ~0ULL);
+ ipath_kput_kreg(m, kr_errorclear, ~0ULL);
+ ipath_kput_kreg(m, kr_intclear, ~0ULL);
+ if (dd->__ipath_pioavailregs_base) {
+ kfree((void *)dd->__ipath_pioavailregs_base);
+ dd->__ipath_pioavailregs_base =
+ dd->ipath_pioavailregs_dma = 0;
+ }
+
+ if (dd->ipath_pageshadow) {
+ struct page **tmpp = dd->ipath_pageshadow;
+ int i, cnt = 0;
+
+ _IPATH_VDBG
+ ("Unlocking any expTID pages still locked\n");
+ for (port = 0; port < dd->ipath_cfgports;
+ port++) {
+ int port_tidbase =
+ port * dd->ipath_rcvtidcnt;
+ int maxtid =
+ port_tidbase + dd->ipath_rcvtidcnt;
+ for (i = port_tidbase; i < maxtid; i++) {
+ if (tmpp[i]) {
+ ipath_munlock(1,
+ &tmpp[i]);
+ tmpp[i] = 0;
+ cnt++;
+ }
+ }
+ }
+ if (cnt) {
+ ipath_stats.sps_pageunlocks += cnt;
+ _IPATH_VDBG
+ ("There were still %u expTID entries locked\n",
+ cnt);
+ }
+ if (ipath_stats.sps_pagelocks
+ || ipath_stats.sps_pageunlocks)
+ _IPATH_VDBG
+ ("%llu pages locked, %llu unlocked via ipath_m{un}lock\n",
+ ipath_stats.sps_pagelocks,
+ ipath_stats.sps_pageunlocks);
+
+ _IPATH_VDBG
+ ("Free shadow page tid array at %p\n",
+ dd->ipath_pageshadow);
+ vfree(dd->ipath_pageshadow);
+ dd->ipath_pageshadow = NULL;
+ }
+
+ /*
+ * free any resources still in use (usually just
+ * kernel ports) at unload
+ */
+ for (port = 0; port < dd->ipath_cfgports; port++)
+ ipath_free_pddata(dd, port, 1);
+ kfree(dd->ipath_pd);
+ /*
+ * debuggability, in case some cleanup path
+ * tries to use it after this
+ */
+ dd->ipath_pd = NULL;
+ }
+
+ if (dd->pcidev) {
+ if (dd->pcidev->irq) {
+ _IPATH_VDBG("unit %u free_irq of irq %x\n", m,
+ dd->pcidev->irq);
+ free_irq(dd->pcidev->irq, dd);
+ } else
+ _IPATH_DBG
+ ("irq is 0, not doing free_irq for unit %u\n",
+ m);
+ dd->pcidev = NULL;
+ }
+ if (dd->pci_registered) {
+ _IPATH_VDBG
+ ("Unregistering pci infrastructure unit %u\n", m);
+ pci_unregister_driver(&infinipath_driver);
+ dd->pci_registered = 0;
+ } else
+ _IPATH_VDBG
+ ("unit %u: no pci unreg, wasn't registered\n", m);
+ ipath_chip_cleanup(dd); /* clean up any per-chip chip-specific stuff */
+ }
+ /*
+ * clean up any chip-specific stuff for now, only one type of chip
+ * for any given driver
+ */
+ ipath_chip_done();
+
+ /* cleanup all our locked pages private data structures */
+ ipath_mlock_cleanup(NULL);
+}
+
+/* This is a generic function here, so it can return device-specific
+ * info. This allows keeping in sync with the version that supports
+ * multiple chip types.
+*/
+void ipath_get_boardname(const ipath_type t, char *name, size_t namelen)
+{
+ ipath_ht_get_boardname(t, name, namelen);
+}
+
+module_init(infinipath_init);
+module_exit(infinipath_cleanup);
+
+EXPORT_SYMBOL(infinipath_debug);
+EXPORT_SYMBOL(ipath_get_boardname);
--
0.99.9n
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/



Relevant Pages

  • [PATCH 8/8] scsi: megaraid_sas - Update version and changelog
    ... Added the megasas_reset_timer routine to intercept cmd timeout and throttle io. ... When MegaSAS driver receives reset call from OS, ... Added new memory management module to support the IOCTL memory allocation. ... For IOCTL we try to allocate from the memory pool created during driver initialization. ...
    (Linux-Kernel)
  • PATCH 6/6] scsi: megaraid_sas - Update version and changelog
    ... Added the megasas_reset_timer routine to intercept cmd timeout and throttle io. ... When MegaSAS driver receives reset call from OS, ... Added new memory management module to support the IOCTL memory allocation. ... For IOCTL we try to allocate from the memory pool created during driver initialization. ...
    (Linux-Kernel)
  • [PATCH 8/8] scsi: megaraid_sas - Update version and changelog
    ... Added the megasas_reset_timer routine to intercept cmd timeout and throttle io. ... When MegaSAS driver receives reset call from OS, ... Added new memory management module to support the IOCTL memory allocation. ... For IOCTL we try to allocate from the memory pool created during driver initialization. ...
    (Linux-Kernel)
  • [PATCH 10 of 20] ipath - core driver, part 3 of 4
    ... + * receive a packet for the layered driver. ... + * Update our shadow copy of the PIO availability register map, ... + * the tid and returns it to the caller. ...
    (Linux-Kernel)