alloc_percpu: Use __get_cpu_ptr in networking

Impact: slight efficiency improvement on some archs.

Let's use __get_cpu_ptr and get_cpu_ptr now, rather than
per_cpu_ptr(..., smp_processor_id()).

(The nfconntrack code seems a bit confused over when raw_smp_processor_id()
 should be used: not here).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
Cc: Christoph Raisch <raisch@de.ibm.com>
Cc: netdev@vger.kernel.org
---
 drivers/infiniband/hw/ehca/ehca_irq.c       |    3 +--
 drivers/net/chelsio/sge.c                   |    5 ++---
 drivers/net/loopback.c                      |    4 ++--
 drivers/net/veth.c                          |    7 +++----
 include/net/netfilter/nf_conntrack.h        |    4 ++--
 include/net/netfilter/nf_conntrack_ecache.h |    2 +-
 net/core/sock.c                             |    3 +--
 net/netfilter/nf_conntrack_ecache.c         |    4 ++--
 net/xfrm/xfrm_ipcomp.c                      |   20 ++++++++------------
 9 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -827,8 +827,7 @@ static void __cpuinit take_over_work(str
 		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
 
 		list_del(&cq->entry);
-		__queue_comp_task(cq, per_cpu_ptr(pool->cpu_comp_tasks,
-						  smp_processor_id()));
+		__queue_comp_task(cq, __get_cpu_ptr(pool->cpu_comp_tasks));
 	}
 
 	spin_unlock_irqrestore(&cct->task_lock, flags_cct);
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1378,7 +1378,7 @@ static void sge_rx(struct sge *sge, stru
 	}
 	__skb_pull(skb, sizeof(*p));
 
-	st = per_cpu_ptr(sge->port_stats[p->iff], smp_processor_id());
+	st = __get_cpu_ptr(sge->port_stats[p->iff]);
 
 	skb->protocol = eth_type_trans(skb, adapter->port[p->iff].dev);
 	if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff &&
@@ -1780,8 +1780,7 @@ int t1_start_xmit(struct sk_buff *skb, s
 {
 	struct adapter *adapter = dev->ml_priv;
 	struct sge *sge = adapter->sge;
-	struct sge_port_stats *st = per_cpu_ptr(sge->port_stats[dev->if_port],
-						smp_processor_id());
+	struct sge_port_stats *st = __get_cpu_ptr(sge->port_stats[dev->if_port]);
 	struct cpl_tx_pkt *cpl;
 	struct sk_buff *orig_skb = skb;
 	int ret;
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -76,9 +76,9 @@ static int loopback_xmit(struct sk_buff 
 
 	skb->protocol = eth_type_trans(skb,dev);
 
-	/* it's OK to use per_cpu_ptr() because BHs are off */
+	/* it's OK to use __get_cpu_ptr() because BHs are off */
 	pcpu_lstats = dev->ml_priv;
-	lb_stats = per_cpu_ptr(pcpu_lstats, smp_processor_id());
+	lb_stats = __get_cpu_ptr(pcpu_lstats);
 	lb_stats->bytes += skb->len;
 	lb_stats->packets++;
 
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -148,7 +148,7 @@ static int veth_xmit(struct sk_buff *skb
 	struct net_device *rcv = NULL;
 	struct veth_priv *priv, *rcv_priv;
 	struct veth_net_stats *stats;
-	int length, cpu;
+	int length;
 
 	skb_orphan(skb);
 
@@ -156,8 +156,7 @@ static int veth_xmit(struct sk_buff *skb
 	rcv = priv->peer;
 	rcv_priv = netdev_priv(rcv);
 
-	cpu = smp_processor_id();
-	stats = per_cpu_ptr(priv->stats, cpu);
+	stats = __get_cpu_ptr(priv->stats);
 
 	if (!(rcv->flags & IFF_UP))
 		goto outf;
@@ -178,7 +177,7 @@ static int veth_xmit(struct sk_buff *skb
 	stats->tx_bytes += length;
 	stats->tx_packets++;
 
-	stats = per_cpu_ptr(rcv_priv->stats, cpu);
+	stats = __get_cpu_ptr(rcv_priv->stats);
 	stats->rx_bytes += length;
 	stats->rx_packets++;
 
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -290,11 +290,11 @@ extern int nf_conntrack_max;
 extern int nf_conntrack_max;
 
 #define NF_CT_STAT_INC(net, count)	\
-	(per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++)
+	(__get_cpu_ptr((net)->ct.stat)->count++)
 #define NF_CT_STAT_INC_ATOMIC(net, count)		\
 do {							\
 	local_bh_disable();				\
-	per_cpu_ptr((net)->ct.stat, raw_smp_processor_id())->count++;	\
+	__get_cpu_ptr((net)->ct.stat)->count++;		\
 	local_bh_enable();				\
 } while (0)
 
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -39,7 +39,7 @@ nf_conntrack_event_cache(enum ip_conntra
 	struct nf_conntrack_ecache *ecache;
 
 	local_bh_disable();
-	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
+	ecache = __get_cpu_ptr(net->ct.ecache);
 	if (ct != ecache->ct)
 		__nf_ct_event_cache_init(ct);
 	ecache->events |= event;
diff --git a/net/core/sock.c b/net/core/sock.c
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1946,8 +1946,7 @@ static DECLARE_BITMAP(proto_inuse_idx, P
 #ifdef CONFIG_NET_NS
 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
 {
-	int cpu = smp_processor_id();
-	per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
+	__get_cpu_ptr(net->core.inuse)->val[prot->inuse_idx] += val;
 }
 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
 
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -60,7 +60,7 @@ void nf_ct_deliver_cached_events(const s
 	struct nf_conntrack_ecache *ecache;
 
 	local_bh_disable();
-	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
+	ecache = __get_cpu_ptr(net->ct.ecache);
 	if (ecache->ct == ct)
 		__nf_ct_deliver_cached_events(ecache);
 	local_bh_enable();
@@ -74,7 +74,7 @@ void __nf_ct_event_cache_init(struct nf_
 	struct nf_conntrack_ecache *ecache;
 
 	/* take care of delivering potentially old events */
-	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
+	ecache = __get_cpu_ptr(net->ct.ecache);
 	BUG_ON(ecache->ct == ct);
 	if (ecache->ct)
 		__nf_ct_deliver_cached_events(ecache);
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -45,9 +45,8 @@ static int ipcomp_decompress(struct xfrm
 	const int plen = skb->len;
 	int dlen = IPCOMP_SCRATCH_SIZE;
 	const u8 *start = skb->data;
-	const int cpu = get_cpu();
-	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	u8 *scratch = *get_cpu_ptr(ipcomp_scratches);
+	struct crypto_comp *tfm = *__get_cpu_ptr(ipcd->tfms);
 	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
 	int len;
 
@@ -101,7 +100,7 @@ static int ipcomp_decompress(struct xfrm
 	err = 0;
 
 out:
-	put_cpu();
+	put_cpu_ptr(ipcomp_scratches);
 	return err;
 }
 
@@ -139,9 +138,8 @@ static int ipcomp_compress(struct xfrm_s
 	const int plen = skb->len;
 	int dlen = IPCOMP_SCRATCH_SIZE;
 	u8 *start = skb->data;
-	const int cpu = get_cpu();
-	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+	u8 *scratch = *get_cpu_ptr(ipcomp_scratches);
+	struct crypto_comp *tfm = *__get_cpu_ptr(ipcd->tfms);
 	int err;
 
 	local_bh_disable();
@@ -162,7 +160,7 @@ static int ipcomp_compress(struct xfrm_s
 	return 0;
 
 out:
-	put_cpu();
+	put_cpu_ptr(ipcomp_scratches);
 	return err;
 }
 
@@ -274,14 +272,12 @@ static struct crypto_comp **ipcomp_alloc
 	struct crypto_comp **tfms;
 	int cpu;
 
-	/* This can be any valid CPU ID so we don't need locking. */
-	cpu = raw_smp_processor_id();
-
 	list_for_each_entry(pos, &ipcomp_tfms_list, list) {
 		struct crypto_comp *tfm;
 
 		tfms = pos->tfms;
-		tfm = *per_cpu_ptr(tfms, cpu);
+		/* This can be any valid CPU ID so we don't need locking. */
+		tfm = *__raw_get_cpu_ptr(tfms);
 
 		if (!strcmp(crypto_comp_name(tfm), alg_name)) {
 			pos->users++;